1 //
    2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }
 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;
 1678 }
 1679 
 1680 // This could be in MacroAssembler but it's fairly C2 specific
 1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1682   Label exit;
 1683   __ jccb(Assembler::noParity, exit);
 1684   __ pushf();
 1685   //
 1686   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1687   // zero OF,AF,SF for NaN values.
 1688   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1689   // values returns 'less than' result (CF is set).
 1690   // Leave the rest of flags unchanged.
 1691   //
 1692   //    7 6 5 4 3 2 1 0
 1693   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1694   //    0 0 1 0 1 0 1 1   (0x2B)
 1695   //
 1696   __ andq(Address(rsp, 0), 0xffffff2b);
 1697   __ popf();
 1698   __ bind(exit);
 1699 }
 1700 
 1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1702   // If any floating point comparison instruction is used, unordered case always triggers jump
 1703   // for below condition, CF=1 is true when at least one input is NaN
 1704   Label done;
 1705   __ movl(dst, -1);
 1706   __ jcc(Assembler::below, done);
 1707   __ setcc(Assembler::notEqual, dst);
 1708   __ bind(done);
 1709 }
 1710 
 1711 // Math.min()    # Math.max()
 1712 // --------------------------
 1713 // ucomis[s/d]   #
 1714 // ja   -> b     # a
 1715 // jp   -> NaN   # NaN
 1716 // jb   -> a     # b
 1717 // je            #
 1718 // |-jz -> a | b # a & b
 1719 // |    -> a     #
 1720 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1721                             XMMRegister a, XMMRegister b,
 1722                             XMMRegister xmmt, Register rt,
 1723                             bool min, bool single) {
 1724 
 1725   Label nan, zero, below, above, done;
 1726 
 1727   if (single)
 1728     __ ucomiss(a, b);
 1729   else
 1730     __ ucomisd(a, b);
 1731 
 1732   if (dst->encoding() != (min ? b : a)->encoding())
 1733     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1734   else
 1735     __ jccb(Assembler::above, done);
 1736 
 1737   __ jccb(Assembler::parity, nan);  // PF=1
 1738   __ jccb(Assembler::below, below); // CF=1
 1739 
 1740   // equal
 1741   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
 1742   if (single) {
 1743     __ ucomiss(a, xmmt);
 1744     __ jccb(Assembler::equal, zero);
 1745 
 1746     __ movflt(dst, a);
 1747     __ jmp(done);
 1748   }
 1749   else {
 1750     __ ucomisd(a, xmmt);
 1751     __ jccb(Assembler::equal, zero);
 1752 
 1753     __ movdbl(dst, a);
 1754     __ jmp(done);
 1755   }
 1756 
 1757   __ bind(zero);
 1758   if (min)
 1759     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1760   else
 1761     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1762 
 1763   __ jmp(done);
 1764 
 1765   __ bind(above);
 1766   if (single)
 1767     __ movflt(dst, min ? b : a);
 1768   else
 1769     __ movdbl(dst, min ? b : a);
 1770 
 1771   __ jmp(done);
 1772 
 1773   __ bind(nan);
 1774   if (single) {
 1775     __ movl(rt, 0x7fc00000); // Float.NaN
 1776     __ movdl(dst, rt);
 1777   }
 1778   else {
 1779     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1780     __ movdq(dst, rt);
 1781   }
 1782   __ jmp(done);
 1783 
 1784   __ bind(below);
 1785   if (single)
 1786     __ movflt(dst, min ? a : b);
 1787   else
 1788     __ movdbl(dst, min ? a : b);
 1789 
 1790   __ bind(done);
 1791 }
 1792 
 1793 //=============================================================================
 1794 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1795 
 1796 int ConstantTable::calculate_table_base_offset() const {
 1797   return 0;  // absolute addressing, no offset
 1798 }
 1799 
 1800 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1801 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1802   ShouldNotReachHere();
 1803 }
 1804 
 1805 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1806   // Empty encoding
 1807 }
 1808 
 1809 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1810   return 0;
 1811 }
 1812 
 1813 #ifndef PRODUCT
 1814 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1815   st->print("# MachConstantBaseNode (empty encoding)");
 1816 }
 1817 #endif
 1818 
 1819 
 1820 //=============================================================================
 1821 #ifndef PRODUCT
 1822 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1823   Compile* C = ra_->C;
 1824 
 1825   int framesize = C->output()->frame_size_in_bytes();
 1826   int bangsize = C->output()->bang_size_in_bytes();
 1827   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1828   // Remove wordSize for return addr which is already pushed.
 1829   framesize -= wordSize;
 1830 
 1831   if (C->output()->need_stack_bang(bangsize)) {
 1832     framesize -= wordSize;
 1833     st->print("# stack bang (%d bytes)", bangsize);
 1834     st->print("\n\t");
 1835     st->print("pushq   rbp\t# Save rbp");
 1836     if (PreserveFramePointer) {
 1837         st->print("\n\t");
 1838         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1839     }
 1840     if (framesize) {
 1841       st->print("\n\t");
 1842       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1843     }
 1844   } else {
 1845     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1846     st->print("\n\t");
 1847     framesize -= wordSize;
 1848     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1849     if (PreserveFramePointer) {
 1850       st->print("\n\t");
 1851       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1852       if (framesize > 0) {
 1853         st->print("\n\t");
 1854         st->print("addq    rbp, #%d", framesize);
 1855       }
 1856     }
 1857   }
 1858 
 1859   if (VerifyStackAtCalls) {
 1860     st->print("\n\t");
 1861     framesize -= wordSize;
 1862     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1863 #ifdef ASSERT
 1864     st->print("\n\t");
 1865     st->print("# stack alignment check");
 1866 #endif
 1867   }
 1868   if (C->stub_function() != nullptr) {
 1869     st->print("\n\t");
 1870     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1871     st->print("\n\t");
 1872     st->print("je      fast_entry\t");
 1873     st->print("\n\t");
 1874     st->print("call    #nmethod_entry_barrier_stub\t");
 1875     st->print("\n\tfast_entry:");
 1876   }
 1877   st->cr();
 1878 }
 1879 #endif
 1880 
 1881 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1882   Compile* C = ra_->C;
 1883 
 1884   int framesize = C->output()->frame_size_in_bytes();
 1885   int bangsize = C->output()->bang_size_in_bytes();
 1886 
 1887   if (C->clinit_barrier_on_entry()) {
 1888     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1889     assert(!C->method()->holder()->is_not_initialized() || C->do_clinit_barriers(), "initialization should have been started");
 1890 
 1891     Label L_skip_barrier;
 1892     Register klass = rscratch1;
 1893 
 1894     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1895     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1896 
 1897     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1898 
 1899     __ bind(L_skip_barrier);
 1900   }
 1901 
 1902   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
 1903 
 1904   C->output()->set_frame_complete(__ offset());
 1905 
 1906   if (C->has_mach_constant_base_node()) {
 1907     // NOTE: We set the table base offset here because users might be
 1908     // emitted before MachConstantBaseNode.
 1909     ConstantTable& constant_table = C->output()->constant_table();
 1910     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1911   }
 1912 }
 1913 
 1914 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1915 {
 1916   return MachNode::size(ra_); // too many variables; just compute it
 1917                               // the hard way
 1918 }
 1919 
 1920 int MachPrologNode::reloc() const
 1921 {
 1922   return 0; // a large enough number
 1923 }
 1924 
 1925 //=============================================================================
 1926 #ifndef PRODUCT
 1927 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1928 {
 1929   Compile* C = ra_->C;
 1930   if (generate_vzeroupper(C)) {
 1931     st->print("vzeroupper");
 1932     st->cr(); st->print("\t");
 1933   }
 1934 
 1935   int framesize = C->output()->frame_size_in_bytes();
 1936   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1937   // Remove word for return adr already pushed
 1938   // and RBP
 1939   framesize -= 2*wordSize;
 1940 
 1941   if (framesize) {
 1942     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1943     st->print("\t");
 1944   }
 1945 
 1946   st->print_cr("popq    rbp");
 1947   if (do_polling() && C->is_method_compilation()) {
 1948     st->print("\t");
 1949     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1950                  "ja      #safepoint_stub\t"
 1951                  "# Safepoint: poll for GC");
 1952   }
 1953 }
 1954 #endif
 1955 
 1956 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1957 {
 1958   Compile* C = ra_->C;
 1959 
 1960   if (generate_vzeroupper(C)) {
 1961     // Clear upper bits of YMM registers when current compiled code uses
 1962     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1963     __ vzeroupper();
 1964   }
 1965 
 1966   int framesize = C->output()->frame_size_in_bytes();
 1967   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1968   // Remove word for return adr already pushed
 1969   // and RBP
 1970   framesize -= 2*wordSize;
 1971 
 1972   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1973 
 1974   if (framesize) {
 1975     __ addq(rsp, framesize);
 1976   }
 1977 
 1978   __ popq(rbp);
 1979 
 1980   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1981     __ reserved_stack_check();
 1982   }
 1983 
 1984   if (do_polling() && C->is_method_compilation()) {
 1985     Label dummy_label;
 1986     Label* code_stub = &dummy_label;
 1987     if (!C->output()->in_scratch_emit_size()) {
 1988       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1989       C->output()->add_stub(stub);
 1990       code_stub = &stub->entry();
 1991     }
 1992     __ relocate(relocInfo::poll_return_type);
 1993     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1994   }
 1995 }
 1996 
 1997 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1998 {
 1999   return MachNode::size(ra_); // too many variables; just compute it
 2000                               // the hard way
 2001 }
 2002 
 2003 int MachEpilogNode::reloc() const
 2004 {
 2005   return 2; // a large enough number
 2006 }
 2007 
 2008 const Pipeline* MachEpilogNode::pipeline() const
 2009 {
 2010   return MachNode::pipeline_class();
 2011 }
 2012 
 2013 //=============================================================================
 2014 
 2015 enum RC {
 2016   rc_bad,
 2017   rc_int,
 2018   rc_kreg,
 2019   rc_float,
 2020   rc_stack
 2021 };
 2022 
 2023 static enum RC rc_class(OptoReg::Name reg)
 2024 {
 2025   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 2026 
 2027   if (OptoReg::is_stack(reg)) return rc_stack;
 2028 
 2029   VMReg r = OptoReg::as_VMReg(reg);
 2030 
 2031   if (r->is_Register()) return rc_int;
 2032 
 2033   if (r->is_KRegister()) return rc_kreg;
 2034 
 2035   assert(r->is_XMMRegister(), "must be");
 2036   return rc_float;
 2037 }
 2038 
 2039 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2040 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2041                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2042 
 2043 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2044                      int stack_offset, int reg, uint ireg, outputStream* st);
 2045 
 2046 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2047                                       int dst_offset, uint ireg, outputStream* st) {
 2048   if (masm) {
 2049     switch (ireg) {
 2050     case Op_VecS:
 2051       __ movq(Address(rsp, -8), rax);
 2052       __ movl(rax, Address(rsp, src_offset));
 2053       __ movl(Address(rsp, dst_offset), rax);
 2054       __ movq(rax, Address(rsp, -8));
 2055       break;
 2056     case Op_VecD:
 2057       __ pushq(Address(rsp, src_offset));
 2058       __ popq (Address(rsp, dst_offset));
 2059       break;
 2060     case Op_VecX:
 2061       __ pushq(Address(rsp, src_offset));
 2062       __ popq (Address(rsp, dst_offset));
 2063       __ pushq(Address(rsp, src_offset+8));
 2064       __ popq (Address(rsp, dst_offset+8));
 2065       break;
 2066     case Op_VecY:
 2067       __ vmovdqu(Address(rsp, -32), xmm0);
 2068       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2069       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2070       __ vmovdqu(xmm0, Address(rsp, -32));
 2071       break;
 2072     case Op_VecZ:
 2073       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2074       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2075       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2076       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2077       break;
 2078     default:
 2079       ShouldNotReachHere();
 2080     }
 2081 #ifndef PRODUCT
 2082   } else {
 2083     switch (ireg) {
 2084     case Op_VecS:
 2085       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2086                 "movl    rax, [rsp + #%d]\n\t"
 2087                 "movl    [rsp + #%d], rax\n\t"
 2088                 "movq    rax, [rsp - #8]",
 2089                 src_offset, dst_offset);
 2090       break;
 2091     case Op_VecD:
 2092       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2093                 "popq    [rsp + #%d]",
 2094                 src_offset, dst_offset);
 2095       break;
 2096      case Op_VecX:
 2097       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2098                 "popq    [rsp + #%d]\n\t"
 2099                 "pushq   [rsp + #%d]\n\t"
 2100                 "popq    [rsp + #%d]",
 2101                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2102       break;
 2103     case Op_VecY:
 2104       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2105                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2106                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2107                 "vmovdqu xmm0, [rsp - #32]",
 2108                 src_offset, dst_offset);
 2109       break;
 2110     case Op_VecZ:
 2111       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2112                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2113                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2114                 "vmovdqu xmm0, [rsp - #64]",
 2115                 src_offset, dst_offset);
 2116       break;
 2117     default:
 2118       ShouldNotReachHere();
 2119     }
 2120 #endif
 2121   }
 2122 }
 2123 
 2124 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2125                                        PhaseRegAlloc* ra_,
 2126                                        bool do_size,
 2127                                        outputStream* st) const {
 2128   assert(masm != nullptr || st  != nullptr, "sanity");
 2129   // Get registers to move
 2130   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2131   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2132   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2133   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2134 
 2135   enum RC src_second_rc = rc_class(src_second);
 2136   enum RC src_first_rc = rc_class(src_first);
 2137   enum RC dst_second_rc = rc_class(dst_second);
 2138   enum RC dst_first_rc = rc_class(dst_first);
 2139 
 2140   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2141          "must move at least 1 register" );
 2142 
 2143   if (src_first == dst_first && src_second == dst_second) {
 2144     // Self copy, no move
 2145     return 0;
 2146   }
 2147   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 2148     uint ireg = ideal_reg();
 2149     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2150     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2151     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2152       // mem -> mem
 2153       int src_offset = ra_->reg2offset(src_first);
 2154       int dst_offset = ra_->reg2offset(dst_first);
 2155       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2156     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2157       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2158     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2159       int stack_offset = ra_->reg2offset(dst_first);
 2160       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2161     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2162       int stack_offset = ra_->reg2offset(src_first);
 2163       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2164     } else {
 2165       ShouldNotReachHere();
 2166     }
 2167     return 0;
 2168   }
 2169   if (src_first_rc == rc_stack) {
 2170     // mem ->
 2171     if (dst_first_rc == rc_stack) {
 2172       // mem -> mem
 2173       assert(src_second != dst_first, "overlap");
 2174       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2175           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2176         // 64-bit
 2177         int src_offset = ra_->reg2offset(src_first);
 2178         int dst_offset = ra_->reg2offset(dst_first);
 2179         if (masm) {
 2180           __ pushq(Address(rsp, src_offset));
 2181           __ popq (Address(rsp, dst_offset));
 2182 #ifndef PRODUCT
 2183         } else {
 2184           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2185                     "popq    [rsp + #%d]",
 2186                      src_offset, dst_offset);
 2187 #endif
 2188         }
 2189       } else {
 2190         // 32-bit
 2191         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2192         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2193         // No pushl/popl, so:
 2194         int src_offset = ra_->reg2offset(src_first);
 2195         int dst_offset = ra_->reg2offset(dst_first);
 2196         if (masm) {
 2197           __ movq(Address(rsp, -8), rax);
 2198           __ movl(rax, Address(rsp, src_offset));
 2199           __ movl(Address(rsp, dst_offset), rax);
 2200           __ movq(rax, Address(rsp, -8));
 2201 #ifndef PRODUCT
 2202         } else {
 2203           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2204                     "movl    rax, [rsp + #%d]\n\t"
 2205                     "movl    [rsp + #%d], rax\n\t"
 2206                     "movq    rax, [rsp - #8]",
 2207                      src_offset, dst_offset);
 2208 #endif
 2209         }
 2210       }
 2211       return 0;
 2212     } else if (dst_first_rc == rc_int) {
 2213       // mem -> gpr
 2214       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2215           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2216         // 64-bit
 2217         int offset = ra_->reg2offset(src_first);
 2218         if (masm) {
 2219           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2220 #ifndef PRODUCT
 2221         } else {
 2222           st->print("movq    %s, [rsp + #%d]\t# spill",
 2223                      Matcher::regName[dst_first],
 2224                      offset);
 2225 #endif
 2226         }
 2227       } else {
 2228         // 32-bit
 2229         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2230         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2231         int offset = ra_->reg2offset(src_first);
 2232         if (masm) {
 2233           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2234 #ifndef PRODUCT
 2235         } else {
 2236           st->print("movl    %s, [rsp + #%d]\t# spill",
 2237                      Matcher::regName[dst_first],
 2238                      offset);
 2239 #endif
 2240         }
 2241       }
 2242       return 0;
 2243     } else if (dst_first_rc == rc_float) {
 2244       // mem-> xmm
 2245       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2246           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2247         // 64-bit
 2248         int offset = ra_->reg2offset(src_first);
 2249         if (masm) {
 2250           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2251 #ifndef PRODUCT
 2252         } else {
 2253           st->print("%s  %s, [rsp + #%d]\t# spill",
 2254                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2255                      Matcher::regName[dst_first],
 2256                      offset);
 2257 #endif
 2258         }
 2259       } else {
 2260         // 32-bit
 2261         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2262         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2263         int offset = ra_->reg2offset(src_first);
 2264         if (masm) {
 2265           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2266 #ifndef PRODUCT
 2267         } else {
 2268           st->print("movss   %s, [rsp + #%d]\t# spill",
 2269                      Matcher::regName[dst_first],
 2270                      offset);
 2271 #endif
 2272         }
 2273       }
 2274       return 0;
 2275     } else if (dst_first_rc == rc_kreg) {
 2276       // mem -> kreg
 2277       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2278           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2279         // 64-bit
 2280         int offset = ra_->reg2offset(src_first);
 2281         if (masm) {
 2282           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2283 #ifndef PRODUCT
 2284         } else {
 2285           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2286                      Matcher::regName[dst_first],
 2287                      offset);
 2288 #endif
 2289         }
 2290       }
 2291       return 0;
 2292     }
 2293   } else if (src_first_rc == rc_int) {
 2294     // gpr ->
 2295     if (dst_first_rc == rc_stack) {
 2296       // gpr -> mem
 2297       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2298           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2299         // 64-bit
 2300         int offset = ra_->reg2offset(dst_first);
 2301         if (masm) {
 2302           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2303 #ifndef PRODUCT
 2304         } else {
 2305           st->print("movq    [rsp + #%d], %s\t# spill",
 2306                      offset,
 2307                      Matcher::regName[src_first]);
 2308 #endif
 2309         }
 2310       } else {
 2311         // 32-bit
 2312         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2313         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2314         int offset = ra_->reg2offset(dst_first);
 2315         if (masm) {
 2316           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2317 #ifndef PRODUCT
 2318         } else {
 2319           st->print("movl    [rsp + #%d], %s\t# spill",
 2320                      offset,
 2321                      Matcher::regName[src_first]);
 2322 #endif
 2323         }
 2324       }
 2325       return 0;
 2326     } else if (dst_first_rc == rc_int) {
 2327       // gpr -> gpr
 2328       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2329           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2330         // 64-bit
 2331         if (masm) {
 2332           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2333                   as_Register(Matcher::_regEncode[src_first]));
 2334 #ifndef PRODUCT
 2335         } else {
 2336           st->print("movq    %s, %s\t# spill",
 2337                      Matcher::regName[dst_first],
 2338                      Matcher::regName[src_first]);
 2339 #endif
 2340         }
 2341         return 0;
 2342       } else {
 2343         // 32-bit
 2344         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2345         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2346         if (masm) {
 2347           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2348                   as_Register(Matcher::_regEncode[src_first]));
 2349 #ifndef PRODUCT
 2350         } else {
 2351           st->print("movl    %s, %s\t# spill",
 2352                      Matcher::regName[dst_first],
 2353                      Matcher::regName[src_first]);
 2354 #endif
 2355         }
 2356         return 0;
 2357       }
 2358     } else if (dst_first_rc == rc_float) {
 2359       // gpr -> xmm
 2360       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2361           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2362         // 64-bit
 2363         if (masm) {
 2364           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2365 #ifndef PRODUCT
 2366         } else {
 2367           st->print("movdq   %s, %s\t# spill",
 2368                      Matcher::regName[dst_first],
 2369                      Matcher::regName[src_first]);
 2370 #endif
 2371         }
 2372       } else {
 2373         // 32-bit
 2374         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2375         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2376         if (masm) {
 2377           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2378 #ifndef PRODUCT
 2379         } else {
 2380           st->print("movdl   %s, %s\t# spill",
 2381                      Matcher::regName[dst_first],
 2382                      Matcher::regName[src_first]);
 2383 #endif
 2384         }
 2385       }
 2386       return 0;
 2387     } else if (dst_first_rc == rc_kreg) {
 2388       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2389           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2390         // 64-bit
 2391         if (masm) {
 2392           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2393   #ifndef PRODUCT
 2394         } else {
 2395            st->print("kmovq   %s, %s\t# spill",
 2396                        Matcher::regName[dst_first],
 2397                        Matcher::regName[src_first]);
 2398   #endif
 2399         }
 2400       }
 2401       Unimplemented();
 2402       return 0;
 2403     }
 2404   } else if (src_first_rc == rc_float) {
 2405     // xmm ->
 2406     if (dst_first_rc == rc_stack) {
 2407       // xmm -> mem
 2408       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2409           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2410         // 64-bit
 2411         int offset = ra_->reg2offset(dst_first);
 2412         if (masm) {
 2413           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2414 #ifndef PRODUCT
 2415         } else {
 2416           st->print("movsd   [rsp + #%d], %s\t# spill",
 2417                      offset,
 2418                      Matcher::regName[src_first]);
 2419 #endif
 2420         }
 2421       } else {
 2422         // 32-bit
 2423         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2424         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2425         int offset = ra_->reg2offset(dst_first);
 2426         if (masm) {
 2427           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2428 #ifndef PRODUCT
 2429         } else {
 2430           st->print("movss   [rsp + #%d], %s\t# spill",
 2431                      offset,
 2432                      Matcher::regName[src_first]);
 2433 #endif
 2434         }
 2435       }
 2436       return 0;
 2437     } else if (dst_first_rc == rc_int) {
 2438       // xmm -> gpr
 2439       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2440           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2441         // 64-bit
 2442         if (masm) {
 2443           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2444 #ifndef PRODUCT
 2445         } else {
 2446           st->print("movdq   %s, %s\t# spill",
 2447                      Matcher::regName[dst_first],
 2448                      Matcher::regName[src_first]);
 2449 #endif
 2450         }
 2451       } else {
 2452         // 32-bit
 2453         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2454         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2455         if (masm) {
 2456           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2457 #ifndef PRODUCT
 2458         } else {
 2459           st->print("movdl   %s, %s\t# spill",
 2460                      Matcher::regName[dst_first],
 2461                      Matcher::regName[src_first]);
 2462 #endif
 2463         }
 2464       }
 2465       return 0;
 2466     } else if (dst_first_rc == rc_float) {
 2467       // xmm -> xmm
 2468       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2469           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2470         // 64-bit
 2471         if (masm) {
 2472           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2473 #ifndef PRODUCT
 2474         } else {
 2475           st->print("%s  %s, %s\t# spill",
 2476                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2477                      Matcher::regName[dst_first],
 2478                      Matcher::regName[src_first]);
 2479 #endif
 2480         }
 2481       } else {
 2482         // 32-bit
 2483         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2484         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2485         if (masm) {
 2486           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2487 #ifndef PRODUCT
 2488         } else {
 2489           st->print("%s  %s, %s\t# spill",
 2490                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2491                      Matcher::regName[dst_first],
 2492                      Matcher::regName[src_first]);
 2493 #endif
 2494         }
 2495       }
 2496       return 0;
 2497     } else if (dst_first_rc == rc_kreg) {
 2498       assert(false, "Illegal spilling");
 2499       return 0;
 2500     }
 2501   } else if (src_first_rc == rc_kreg) {
 2502     if (dst_first_rc == rc_stack) {
 2503       // mem -> kreg
 2504       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2505           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2506         // 64-bit
 2507         int offset = ra_->reg2offset(dst_first);
 2508         if (masm) {
 2509           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2510 #ifndef PRODUCT
 2511         } else {
 2512           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2513                      offset,
 2514                      Matcher::regName[src_first]);
 2515 #endif
 2516         }
 2517       }
 2518       return 0;
 2519     } else if (dst_first_rc == rc_int) {
 2520       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2521           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2522         // 64-bit
 2523         if (masm) {
 2524           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2525 #ifndef PRODUCT
 2526         } else {
 2527          st->print("kmovq   %s, %s\t# spill",
 2528                      Matcher::regName[dst_first],
 2529                      Matcher::regName[src_first]);
 2530 #endif
 2531         }
 2532       }
 2533       Unimplemented();
 2534       return 0;
 2535     } else if (dst_first_rc == rc_kreg) {
 2536       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2537           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2538         // 64-bit
 2539         if (masm) {
 2540           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2541 #ifndef PRODUCT
 2542         } else {
 2543          st->print("kmovq   %s, %s\t# spill",
 2544                      Matcher::regName[dst_first],
 2545                      Matcher::regName[src_first]);
 2546 #endif
 2547         }
 2548       }
 2549       return 0;
 2550     } else if (dst_first_rc == rc_float) {
 2551       assert(false, "Illegal spill");
 2552       return 0;
 2553     }
 2554   }
 2555 
 2556   assert(0," foo ");
 2557   Unimplemented();
 2558   return 0;
 2559 }
 2560 
 2561 #ifndef PRODUCT
 2562 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2563   implementation(nullptr, ra_, false, st);
 2564 }
 2565 #endif
 2566 
 2567 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2568   implementation(masm, ra_, false, nullptr);
 2569 }
 2570 
 2571 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2572   return MachNode::size(ra_);
 2573 }
 2574 
 2575 //=============================================================================
 2576 #ifndef PRODUCT
 2577 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2578 {
 2579   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2580   int reg = ra_->get_reg_first(this);
 2581   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2582             Matcher::regName[reg], offset);
 2583 }
 2584 #endif
 2585 
 2586 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2587 {
 2588   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2589   int reg = ra_->get_encode(this);
 2590 
 2591   __ lea(as_Register(reg), Address(rsp, offset));
 2592 }
 2593 
 2594 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2595 {
 2596   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2597   if (ra_->get_encode(this) > 15) {
 2598     return (offset < 0x80) ? 6 : 9; // REX2
 2599   } else {
 2600     return (offset < 0x80) ? 5 : 8; // REX
 2601   }
 2602 }
 2603 
 2604 //=============================================================================
 2605 #ifndef PRODUCT
 2606 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2607 {
 2608   if (UseCompressedClassPointers) {
 2609     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2610     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2611   } else {
 2612     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2613     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2614   }
 2615   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2616 }
 2617 #endif
 2618 
 2619 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2620 {
 2621   __ ic_check(InteriorEntryAlignment);
 2622 }
 2623 
 2624 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2625 {
 2626   return MachNode::size(ra_); // too many variables; just compute it
 2627                               // the hard way
 2628 }
 2629 
 2630 
 2631 //=============================================================================
 2632 
 2633 bool Matcher::supports_vector_calling_convention(void) {
 2634   return EnableVectorSupport;
 2635 }
 2636 
 2637 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2638   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2639 }
 2640 
 2641 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2642   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2643 }
 2644 
 2645 #ifdef ASSERT
 2646 static bool is_ndd_demotable(const MachNode* mdef) {
 2647   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2648 }
 2649 #endif
 2650 
 2651 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
 2652                                             int oper_index) {
 2653   if (mdef == nullptr) {
 2654     return false;
 2655   }
 2656 
 2657   if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
 2658       mdef->in(mdef->operand_index(oper_index)) == nullptr) {
 2659     assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
 2660     assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
 2661     return false;
 2662   }
 2663 
 2664   // Complex memory operand covers multiple incoming edges needed for
 2665   // address computation. Biasing def towards any address component will not
 2666   // result in NDD demotion by assembler.
 2667   if (mdef->operand_num_edges(oper_index) != 1) {
 2668     return false;
 2669   }
 2670 
 2671   // Demotion candidate must be register mask compatible with definition.
 2672   const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
 2673   if (!oper_mask.overlap(mdef->out_RegMask())) {
 2674     assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
 2675     return false;
 2676   }
 2677 
 2678   switch (oper_index) {
 2679   // First operand of MachNode corresponding to Intel APX NDD selection
 2680   // pattern can share its assigned register with definition operand if
 2681   // their live ranges do not overlap. In such a scenario we can demote
 2682   // it to legacy map0/map1 instruction by replacing its 4-byte extended
 2683   // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
 2684   // are decorated with a special flag by instruction selector.
 2685   case 1:
 2686     return is_ndd_demotable_opr1(mdef);
 2687 
 2688   // Definition operand of commutative operation can be biased towards second
 2689   // operand.
 2690   case 2:
 2691     return is_ndd_demotable_opr2(mdef);
 2692 
 2693   // Current scheme only selects up to two biasing candidates
 2694   default:
 2695     assert(false, "unhandled operand index: %s", mdef->Name());
 2696     break;
 2697   }
 2698 
 2699   return false;
 2700 }
 2701 
 2702 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2703   assert(EnableVectorSupport, "sanity");
 2704   int lo = XMM0_num;
 2705   int hi = XMM0b_num;
 2706   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2707   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2708   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2709   return OptoRegPair(hi, lo);
 2710 }
 2711 
 2712 // Is this branch offset short enough that a short branch can be used?
 2713 //
 2714 // NOTE: If the platform does not provide any short branch variants, then
 2715 //       this method should return false for offset 0.
 2716 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2717   // The passed offset is relative to address of the branch.
 2718   // On 86 a branch displacement is calculated relative to address
 2719   // of a next instruction.
 2720   offset -= br_size;
 2721 
 2722   // the short version of jmpConUCF2 contains multiple branches,
 2723   // making the reach slightly less
 2724   if (rule == jmpConUCF2_rule)
 2725     return (-126 <= offset && offset <= 125);
 2726   return (-128 <= offset && offset <= 127);
 2727 }
 2728 
 2729 #ifdef ASSERT
 2730 // Return whether or not this register is ever used as an argument.
 2731 bool Matcher::can_be_java_arg(int reg)
 2732 {
 2733   return
 2734     reg ==  RDI_num || reg == RDI_H_num ||
 2735     reg ==  RSI_num || reg == RSI_H_num ||
 2736     reg ==  RDX_num || reg == RDX_H_num ||
 2737     reg ==  RCX_num || reg == RCX_H_num ||
 2738     reg ==   R8_num || reg ==  R8_H_num ||
 2739     reg ==   R9_num || reg ==  R9_H_num ||
 2740     reg ==  R12_num || reg == R12_H_num ||
 2741     reg == XMM0_num || reg == XMM0b_num ||
 2742     reg == XMM1_num || reg == XMM1b_num ||
 2743     reg == XMM2_num || reg == XMM2b_num ||
 2744     reg == XMM3_num || reg == XMM3b_num ||
 2745     reg == XMM4_num || reg == XMM4b_num ||
 2746     reg == XMM5_num || reg == XMM5b_num ||
 2747     reg == XMM6_num || reg == XMM6b_num ||
 2748     reg == XMM7_num || reg == XMM7b_num;
 2749 }
 2750 #endif
 2751 
 2752 uint Matcher::int_pressure_limit()
 2753 {
 2754   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2755 }
 2756 
 2757 uint Matcher::float_pressure_limit()
 2758 {
 2759   // After experiment around with different values, the following default threshold
 2760   // works best for LCM's register pressure scheduling on x64.
 2761   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2762   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2763   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2764 }
 2765 
 2766 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 2767   // In 64 bit mode a code which use multiply when
 2768   // devisor is constant is faster than hardware
 2769   // DIV instruction (it uses MulHiL).
 2770   return false;
 2771 }
 2772 
 2773 // Register for DIVI projection of divmodI
 2774 const RegMask& Matcher::divI_proj_mask() {
 2775   return INT_RAX_REG_mask();
 2776 }
 2777 
 2778 // Register for MODI projection of divmodI
 2779 const RegMask& Matcher::modI_proj_mask() {
 2780   return INT_RDX_REG_mask();
 2781 }
 2782 
 2783 // Register for DIVL projection of divmodL
 2784 const RegMask& Matcher::divL_proj_mask() {
 2785   return LONG_RAX_REG_mask();
 2786 }
 2787 
 2788 // Register for MODL projection of divmodL
 2789 const RegMask& Matcher::modL_proj_mask() {
 2790   return LONG_RDX_REG_mask();
 2791 }
 2792 
 2793 %}
 2794 
 2795 source_hpp %{
 2796 // Header information of the source block.
 2797 // Method declarations/definitions which are used outside
 2798 // the ad-scope can conveniently be defined here.
 2799 //
 2800 // To keep related declarations/definitions/uses close together,
 2801 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2802 
 2803 #include "runtime/vm_version.hpp"
 2804 
 2805 class NativeJump;
 2806 
 2807 class CallStubImpl {
 2808 
 2809   //--------------------------------------------------------------
 2810   //---<  Used for optimization in Compile::shorten_branches  >---
 2811   //--------------------------------------------------------------
 2812 
 2813  public:
 2814   // Size of call trampoline stub.
 2815   static uint size_call_trampoline() {
 2816     return 0; // no call trampolines on this platform
 2817   }
 2818 
 2819   // number of relocations needed by a call trampoline stub
 2820   static uint reloc_call_trampoline() {
 2821     return 0; // no call trampolines on this platform
 2822   }
 2823 };
 2824 
 2825 class HandlerImpl {
 2826 
 2827  public:
 2828 
 2829   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2830 
 2831   static uint size_deopt_handler() {
 2832     // one call and one jmp.
 2833     return 7;
 2834   }
 2835 };
 2836 
 2837 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2838   switch(bytes) {
 2839     case  4: // fall-through
 2840     case  8: // fall-through
 2841     case 16: return Assembler::AVX_128bit;
 2842     case 32: return Assembler::AVX_256bit;
 2843     case 64: return Assembler::AVX_512bit;
 2844 
 2845     default: {
 2846       ShouldNotReachHere();
 2847       return Assembler::AVX_NoVec;
 2848     }
 2849   }
 2850 }
 2851 
 2852 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2853   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2854 }
 2855 
 2856 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2857   uint def_idx = use->operand_index(opnd);
 2858   Node* def = use->in(def_idx);
 2859   return vector_length_encoding(def);
 2860 }
 2861 
 2862 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2863   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2864          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2865 }
 2866 
 2867 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2868   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2869            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2870 }
 2871 
 2872 class Node::PD {
 2873 public:
 2874   enum NodeFlags : uint64_t {
 2875     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2876     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2877     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2878     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2879     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2880     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2881     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2882     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2883     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2884     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2885     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2886     Flag_ndd_demotable_opr1   = Node::_last_flag << 12,
 2887     Flag_ndd_demotable_opr2   = Node::_last_flag << 13,
 2888     _last_flag                = Flag_ndd_demotable_opr2
 2889   };
 2890 };
 2891 
 2892 %} // end source_hpp
 2893 
 2894 source %{
 2895 
 2896 #include "opto/addnode.hpp"
 2897 #include "c2_intelJccErratum_x86.hpp"
 2898 
 2899 void PhaseOutput::pd_perform_mach_node_analysis() {
 2900   if (VM_Version::has_intel_jcc_erratum()) {
 2901     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2902     _buf_sizes._code += extra_padding;
 2903   }
 2904 }
 2905 
 2906 int MachNode::pd_alignment_required() const {
 2907   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2908     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2909     return IntelJccErratum::largest_jcc_size() + 1;
 2910   } else {
 2911     return 1;
 2912   }
 2913 }
 2914 
 2915 int MachNode::compute_padding(int current_offset) const {
 2916   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2917     Compile* C = Compile::current();
 2918     PhaseOutput* output = C->output();
 2919     Block* block = output->block();
 2920     int index = output->index();
 2921     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2922   } else {
 2923     return 0;
 2924   }
 2925 }
 2926 
 2927 // Emit deopt handler code.
 2928 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2929 
 2930   // Note that the code buffer's insts_mark is always relative to insts.
 2931   // That's why we must use the macroassembler to generate a handler.
 2932   address base = __ start_a_stub(size_deopt_handler());
 2933   if (base == nullptr) {
 2934     ciEnv::current()->record_failure("CodeCache is full");
 2935     return 0;  // CodeBuffer::expand failed
 2936   }
 2937   int offset = __ offset();
 2938 
 2939   Label start;
 2940   __ bind(start);
 2941 
 2942   __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2943 
 2944   int entry_offset = __ offset();
 2945 
 2946   __ jmp(start);
 2947 
 2948   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2949   assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
 2950          "out of bounds read in post-call NOP check");
 2951   __ end_a_stub();
 2952   return entry_offset;
 2953 }
 2954 
 2955 static Assembler::Width widthForType(BasicType bt) {
 2956   if (bt == T_BYTE) {
 2957     return Assembler::B;
 2958   } else if (bt == T_SHORT) {
 2959     return Assembler::W;
 2960   } else if (bt == T_INT) {
 2961     return Assembler::D;
 2962   } else {
 2963     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2964     return Assembler::Q;
 2965   }
 2966 }
 2967 
 2968 //=============================================================================
 2969 
 2970   // Float masks come from different places depending on platform.
 2971   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2972   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2973   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2974   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2975   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2976   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2977   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2978   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2979   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2980   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2981   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2982   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 2983   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 2984   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 2985   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 2986   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 2987   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 2988   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 2989   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 2990 
 2991 //=============================================================================
 2992 bool Matcher::match_rule_supported(int opcode) {
 2993   if (!has_match_rule(opcode)) {
 2994     return false; // no match rule present
 2995   }
 2996   switch (opcode) {
 2997     case Op_AbsVL:
 2998     case Op_StoreVectorScatter:
 2999       if (UseAVX < 3) {
 3000         return false;
 3001       }
 3002       break;
 3003     case Op_PopCountI:
 3004     case Op_PopCountL:
 3005       if (!UsePopCountInstruction) {
 3006         return false;
 3007       }
 3008       break;
 3009     case Op_PopCountVI:
 3010       if (UseAVX < 2) {
 3011         return false;
 3012       }
 3013       break;
 3014     case Op_CompressV:
 3015     case Op_ExpandV:
 3016     case Op_PopCountVL:
 3017       if (UseAVX < 2) {
 3018         return false;
 3019       }
 3020       break;
 3021     case Op_MulVI:
 3022       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 3023         return false;
 3024       }
 3025       break;
 3026     case Op_MulVL:
 3027       if (UseSSE < 4) { // only with SSE4_1 or AVX
 3028         return false;
 3029       }
 3030       break;
 3031     case Op_MulReductionVL:
 3032       if (VM_Version::supports_avx512dq() == false) {
 3033         return false;
 3034       }
 3035       break;
 3036     case Op_AbsVB:
 3037     case Op_AbsVS:
 3038     case Op_AbsVI:
 3039     case Op_AddReductionVI:
 3040     case Op_AndReductionV:
 3041     case Op_OrReductionV:
 3042     case Op_XorReductionV:
 3043       if (UseSSE < 3) { // requires at least SSSE3
 3044         return false;
 3045       }
 3046       break;
 3047     case Op_MaxHF:
 3048     case Op_MinHF:
 3049       if (!VM_Version::supports_avx512vlbw()) {
 3050         return false;
 3051       }  // fallthrough
 3052     case Op_AddHF:
 3053     case Op_DivHF:
 3054     case Op_FmaHF:
 3055     case Op_MulHF:
 3056     case Op_ReinterpretS2HF:
 3057     case Op_ReinterpretHF2S:
 3058     case Op_SubHF:
 3059     case Op_SqrtHF:
 3060       if (!VM_Version::supports_avx512_fp16()) {
 3061         return false;
 3062       }
 3063       break;
 3064     case Op_VectorLoadShuffle:
 3065     case Op_VectorRearrange:
 3066     case Op_MulReductionVI:
 3067       if (UseSSE < 4) { // requires at least SSE4
 3068         return false;
 3069       }
 3070       break;
 3071     case Op_IsInfiniteF:
 3072     case Op_IsInfiniteD:
 3073       if (!VM_Version::supports_avx512dq()) {
 3074         return false;
 3075       }
 3076       break;
 3077     case Op_SqrtVD:
 3078     case Op_SqrtVF:
 3079     case Op_VectorMaskCmp:
 3080     case Op_VectorCastB2X:
 3081     case Op_VectorCastS2X:
 3082     case Op_VectorCastI2X:
 3083     case Op_VectorCastL2X:
 3084     case Op_VectorCastF2X:
 3085     case Op_VectorCastD2X:
 3086     case Op_VectorUCastB2X:
 3087     case Op_VectorUCastS2X:
 3088     case Op_VectorUCastI2X:
 3089     case Op_VectorMaskCast:
 3090       if (UseAVX < 1) { // enabled for AVX only
 3091         return false;
 3092       }
 3093       break;
 3094     case Op_PopulateIndex:
 3095       if (UseAVX < 2) {
 3096         return false;
 3097       }
 3098       break;
 3099     case Op_RoundVF:
 3100       if (UseAVX < 2) { // enabled for AVX2 only
 3101         return false;
 3102       }
 3103       break;
 3104     case Op_RoundVD:
 3105       if (UseAVX < 3) {
 3106         return false;  // enabled for AVX3 only
 3107       }
 3108       break;
 3109     case Op_CompareAndSwapL:
 3110     case Op_CompareAndSwapP:
 3111       break;
 3112     case Op_StrIndexOf:
 3113       if (!UseSSE42Intrinsics) {
 3114         return false;
 3115       }
 3116       break;
 3117     case Op_StrIndexOfChar:
 3118       if (!UseSSE42Intrinsics) {
 3119         return false;
 3120       }
 3121       break;
 3122     case Op_OnSpinWait:
 3123       if (VM_Version::supports_on_spin_wait() == false) {
 3124         return false;
 3125       }
 3126       break;
 3127     case Op_MulVB:
 3128     case Op_LShiftVB:
 3129     case Op_RShiftVB:
 3130     case Op_URShiftVB:
 3131     case Op_VectorInsert:
 3132     case Op_VectorLoadMask:
 3133     case Op_VectorStoreMask:
 3134     case Op_VectorBlend:
 3135       if (UseSSE < 4) {
 3136         return false;
 3137       }
 3138       break;
 3139     case Op_MaxD:
 3140     case Op_MaxF:
 3141     case Op_MinD:
 3142     case Op_MinF:
 3143       if (UseAVX < 1) { // enabled for AVX only
 3144         return false;
 3145       }
 3146       break;
 3147     case Op_CacheWB:
 3148     case Op_CacheWBPreSync:
 3149     case Op_CacheWBPostSync:
 3150       if (!VM_Version::supports_data_cache_line_flush()) {
 3151         return false;
 3152       }
 3153       break;
 3154     case Op_ExtractB:
 3155     case Op_ExtractL:
 3156     case Op_ExtractI:
 3157     case Op_RoundDoubleMode:
 3158       if (UseSSE < 4) {
 3159         return false;
 3160       }
 3161       break;
 3162     case Op_RoundDoubleModeV:
 3163       if (VM_Version::supports_avx() == false) {
 3164         return false; // 128bit vroundpd is not available
 3165       }
 3166       break;
 3167     case Op_LoadVectorGather:
 3168     case Op_LoadVectorGatherMasked:
 3169       if (UseAVX < 2) {
 3170         return false;
 3171       }
 3172       break;
 3173     case Op_FmaF:
 3174     case Op_FmaD:
 3175     case Op_FmaVD:
 3176     case Op_FmaVF:
 3177       if (!UseFMA) {
 3178         return false;
 3179       }
 3180       break;
 3181     case Op_MacroLogicV:
 3182       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3183         return false;
 3184       }
 3185       break;
 3186 
 3187     case Op_VectorCmpMasked:
 3188     case Op_VectorMaskGen:
 3189       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3190         return false;
 3191       }
 3192       break;
 3193     case Op_VectorMaskFirstTrue:
 3194     case Op_VectorMaskLastTrue:
 3195     case Op_VectorMaskTrueCount:
 3196     case Op_VectorMaskToLong:
 3197       if (UseAVX < 1) {
 3198          return false;
 3199       }
 3200       break;
 3201     case Op_RoundF:
 3202     case Op_RoundD:
 3203       break;
 3204     case Op_CopySignD:
 3205     case Op_CopySignF:
 3206       if (UseAVX < 3)  {
 3207         return false;
 3208       }
 3209       if (!VM_Version::supports_avx512vl()) {
 3210         return false;
 3211       }
 3212       break;
 3213     case Op_CompressBits:
 3214     case Op_ExpandBits:
 3215       if (!VM_Version::supports_bmi2()) {
 3216         return false;
 3217       }
 3218       break;
 3219     case Op_CompressM:
 3220       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3221         return false;
 3222       }
 3223       break;
 3224     case Op_ConvF2HF:
 3225     case Op_ConvHF2F:
 3226       if (!VM_Version::supports_float16()) {
 3227         return false;
 3228       }
 3229       break;
 3230     case Op_VectorCastF2HF:
 3231     case Op_VectorCastHF2F:
 3232       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3233         return false;
 3234       }
 3235       break;
 3236   }
 3237   return true;  // Match rules are supported by default.
 3238 }
 3239 
 3240 //------------------------------------------------------------------------
 3241 
 3242 static inline bool is_pop_count_instr_target(BasicType bt) {
 3243   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3244          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3245 }
 3246 
 3247 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3248   return match_rule_supported_vector(opcode, vlen, bt);
 3249 }
 3250 
 3251 // Identify extra cases that we might want to provide match rules for vector nodes and
 3252 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3253 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3254   if (!match_rule_supported(opcode)) {
 3255     return false;
 3256   }
 3257   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3258   //   * SSE2 supports 128bit vectors for all types;
 3259   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3260   //   * AVX2 supports 256bit vectors for all types;
 3261   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3262   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3263   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3264   // And MaxVectorSize is taken into account as well.
 3265   if (!vector_size_supported(bt, vlen)) {
 3266     return false;
 3267   }
 3268   // Special cases which require vector length follow:
 3269   //   * implementation limitations
 3270   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3271   //   * 128bit vroundpd instruction is present only in AVX1
 3272   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3273   switch (opcode) {
 3274     case Op_MaxVHF:
 3275     case Op_MinVHF:
 3276       if (!VM_Version::supports_avx512bw()) {
 3277         return false;
 3278       }
 3279     case Op_AddVHF:
 3280     case Op_DivVHF:
 3281     case Op_FmaVHF:
 3282     case Op_MulVHF:
 3283     case Op_SubVHF:
 3284     case Op_SqrtVHF:
 3285       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3286         return false;
 3287       }
 3288       if (!VM_Version::supports_avx512_fp16()) {
 3289         return false;
 3290       }
 3291       break;
 3292     case Op_AbsVF:
 3293     case Op_NegVF:
 3294       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3295         return false; // 512bit vandps and vxorps are not available
 3296       }
 3297       break;
 3298     case Op_AbsVD:
 3299     case Op_NegVD:
 3300       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3301         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3302       }
 3303       break;
 3304     case Op_RotateRightV:
 3305     case Op_RotateLeftV:
 3306       if (bt != T_INT && bt != T_LONG) {
 3307         return false;
 3308       } // fallthrough
 3309     case Op_MacroLogicV:
 3310       if (!VM_Version::supports_evex() ||
 3311           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3312         return false;
 3313       }
 3314       break;
 3315     case Op_ClearArray:
 3316     case Op_VectorMaskGen:
 3317     case Op_VectorCmpMasked:
 3318       if (!VM_Version::supports_avx512bw()) {
 3319         return false;
 3320       }
 3321       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3322         return false;
 3323       }
 3324       break;
 3325     case Op_LoadVectorMasked:
 3326     case Op_StoreVectorMasked:
 3327       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3328         return false;
 3329       }
 3330       break;
 3331     case Op_UMinV:
 3332     case Op_UMaxV:
 3333       if (UseAVX == 0) {
 3334         return false;
 3335       }
 3336       break;
 3337     case Op_UMinReductionV:
 3338     case Op_UMaxReductionV:
 3339       if (UseAVX == 0) {
 3340         return false;
 3341       }
 3342       if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
 3343         return false;
 3344       }
 3345       if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
 3346         return false;
 3347       }
 3348       break;
 3349     case Op_MaxV:
 3350     case Op_MinV:
 3351       if (UseSSE < 4 && is_integral_type(bt)) {
 3352         return false;
 3353       }
 3354       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3355           // Float/Double intrinsics are enabled for AVX family currently.
 3356           if (UseAVX == 0) {
 3357             return false;
 3358           }
 3359           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3360             return false;
 3361           }
 3362       }
 3363       break;
 3364     case Op_CallLeafVector:
 3365       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3366         return false;
 3367       }
 3368       break;
 3369     case Op_AddReductionVI:
 3370       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3371         return false;
 3372       }
 3373       // fallthrough
 3374     case Op_AndReductionV:
 3375     case Op_OrReductionV:
 3376     case Op_XorReductionV:
 3377       if (is_subword_type(bt) && (UseSSE < 4)) {
 3378         return false;
 3379       }
 3380       break;
 3381     case Op_MinReductionV:
 3382     case Op_MaxReductionV:
 3383       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3384         return false;
 3385       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3386         return false;
 3387       }
 3388       // Float/Double intrinsics enabled for AVX family.
 3389       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3390         return false;
 3391       }
 3392       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3393         return false;
 3394       }
 3395       break;
 3396     case Op_VectorBlend:
 3397       if (UseAVX == 0 && size_in_bits < 128) {
 3398         return false;
 3399       }
 3400       break;
 3401     case Op_VectorTest:
 3402       if (UseSSE < 4) {
 3403         return false; // Implementation limitation
 3404       } else if (size_in_bits < 32) {
 3405         return false; // Implementation limitation
 3406       }
 3407       break;
 3408     case Op_VectorLoadShuffle:
 3409     case Op_VectorRearrange:
 3410       if(vlen == 2) {
 3411         return false; // Implementation limitation due to how shuffle is loaded
 3412       } else if (size_in_bits == 256 && UseAVX < 2) {
 3413         return false; // Implementation limitation
 3414       }
 3415       break;
 3416     case Op_VectorLoadMask:
 3417     case Op_VectorMaskCast:
 3418       if (size_in_bits == 256 && UseAVX < 2) {
 3419         return false; // Implementation limitation
 3420       }
 3421       // fallthrough
 3422     case Op_VectorStoreMask:
 3423       if (vlen == 2) {
 3424         return false; // Implementation limitation
 3425       }
 3426       break;
 3427     case Op_PopulateIndex:
 3428       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3429         return false;
 3430       }
 3431       break;
 3432     case Op_VectorCastB2X:
 3433     case Op_VectorCastS2X:
 3434     case Op_VectorCastI2X:
 3435       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3436         return false;
 3437       }
 3438       break;
 3439     case Op_VectorCastL2X:
 3440       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3441         return false;
 3442       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3443         return false;
 3444       }
 3445       break;
 3446     case Op_VectorCastF2X: {
 3447         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3448         // happen after intermediate conversion to integer and special handling
 3449         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3450         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3451         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3452           return false;
 3453         }
 3454       }
 3455       // fallthrough
 3456     case Op_VectorCastD2X:
 3457       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3458         return false;
 3459       }
 3460       break;
 3461     case Op_VectorCastF2HF:
 3462     case Op_VectorCastHF2F:
 3463       if (!VM_Version::supports_f16c() &&
 3464          ((!VM_Version::supports_evex() ||
 3465          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3466         return false;
 3467       }
 3468       break;
 3469     case Op_RoundVD:
 3470       if (!VM_Version::supports_avx512dq()) {
 3471         return false;
 3472       }
 3473       break;
 3474     case Op_MulReductionVI:
 3475       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3476         return false;
 3477       }
 3478       break;
 3479     case Op_LoadVectorGatherMasked:
 3480       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3481         return false;
 3482       }
 3483       if (is_subword_type(bt) &&
 3484          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3485           (size_in_bits < 64)                                      ||
 3486           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3487         return false;
 3488       }
 3489       break;
 3490     case Op_StoreVectorScatterMasked:
 3491     case Op_StoreVectorScatter:
 3492       if (is_subword_type(bt)) {
 3493         return false;
 3494       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3495         return false;
 3496       }
 3497       // fallthrough
 3498     case Op_LoadVectorGather:
 3499       if (!is_subword_type(bt) && size_in_bits == 64) {
 3500         return false;
 3501       }
 3502       if (is_subword_type(bt) && size_in_bits < 64) {
 3503         return false;
 3504       }
 3505       break;
 3506     case Op_SaturatingAddV:
 3507     case Op_SaturatingSubV:
 3508       if (UseAVX < 1) {
 3509         return false; // Implementation limitation
 3510       }
 3511       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3512         return false;
 3513       }
 3514       break;
 3515     case Op_SelectFromTwoVector:
 3516        if (size_in_bits < 128) {
 3517          return false;
 3518        }
 3519        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3520          return false;
 3521        }
 3522        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3523          return false;
 3524        }
 3525        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3526          return false;
 3527        }
 3528        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3529          return false;
 3530        }
 3531        break;
 3532     case Op_MaskAll:
 3533       if (!VM_Version::supports_evex()) {
 3534         return false;
 3535       }
 3536       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3537         return false;
 3538       }
 3539       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3540         return false;
 3541       }
 3542       break;
 3543     case Op_VectorMaskCmp:
 3544       if (vlen < 2 || size_in_bits < 32) {
 3545         return false;
 3546       }
 3547       break;
 3548     case Op_CompressM:
 3549       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3550         return false;
 3551       }
 3552       break;
 3553     case Op_CompressV:
 3554     case Op_ExpandV:
 3555       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3556         return false;
 3557       }
 3558       if (size_in_bits < 128 ) {
 3559         return false;
 3560       }
 3561     case Op_VectorLongToMask:
 3562       if (UseAVX < 1) {
 3563         return false;
 3564       }
 3565       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3566         return false;
 3567       }
 3568       break;
 3569     case Op_SignumVD:
 3570     case Op_SignumVF:
 3571       if (UseAVX < 1) {
 3572         return false;
 3573       }
 3574       break;
 3575     case Op_PopCountVI:
 3576     case Op_PopCountVL: {
 3577         if (!is_pop_count_instr_target(bt) &&
 3578             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3579           return false;
 3580         }
 3581       }
 3582       break;
 3583     case Op_ReverseV:
 3584     case Op_ReverseBytesV:
 3585       if (UseAVX < 2) {
 3586         return false;
 3587       }
 3588       break;
 3589     case Op_CountTrailingZerosV:
 3590     case Op_CountLeadingZerosV:
 3591       if (UseAVX < 2) {
 3592         return false;
 3593       }
 3594       break;
 3595   }
 3596   return true;  // Per default match rules are supported.
 3597 }
 3598 
 3599 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3600   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3601   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3602   // of their non-masked counterpart with mask edge being the differentiator.
 3603   // This routine does a strict check on the existence of masked operation patterns
 3604   // by returning a default false value for all the other opcodes apart from the
 3605   // ones whose masked instruction patterns are defined in this file.
 3606   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3607     return false;
 3608   }
 3609 
 3610   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3611   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3612     return false;
 3613   }
 3614   switch(opcode) {
 3615     // Unary masked operations
 3616     case Op_AbsVB:
 3617     case Op_AbsVS:
 3618       if(!VM_Version::supports_avx512bw()) {
 3619         return false;  // Implementation limitation
 3620       }
 3621     case Op_AbsVI:
 3622     case Op_AbsVL:
 3623       return true;
 3624 
 3625     // Ternary masked operations
 3626     case Op_FmaVF:
 3627     case Op_FmaVD:
 3628       return true;
 3629 
 3630     case Op_MacroLogicV:
 3631       if(bt != T_INT && bt != T_LONG) {
 3632         return false;
 3633       }
 3634       return true;
 3635 
 3636     // Binary masked operations
 3637     case Op_AddVB:
 3638     case Op_AddVS:
 3639     case Op_SubVB:
 3640     case Op_SubVS:
 3641     case Op_MulVS:
 3642     case Op_LShiftVS:
 3643     case Op_RShiftVS:
 3644     case Op_URShiftVS:
 3645       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3646       if (!VM_Version::supports_avx512bw()) {
 3647         return false;  // Implementation limitation
 3648       }
 3649       return true;
 3650 
 3651     case Op_MulVL:
 3652       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3653       if (!VM_Version::supports_avx512dq()) {
 3654         return false;  // Implementation limitation
 3655       }
 3656       return true;
 3657 
 3658     case Op_AndV:
 3659     case Op_OrV:
 3660     case Op_XorV:
 3661     case Op_RotateRightV:
 3662     case Op_RotateLeftV:
 3663       if (bt != T_INT && bt != T_LONG) {
 3664         return false; // Implementation limitation
 3665       }
 3666       return true;
 3667 
 3668     case Op_VectorLoadMask:
 3669       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3670       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3671         return false;
 3672       }
 3673       return true;
 3674 
 3675     case Op_AddVI:
 3676     case Op_AddVL:
 3677     case Op_AddVF:
 3678     case Op_AddVD:
 3679     case Op_SubVI:
 3680     case Op_SubVL:
 3681     case Op_SubVF:
 3682     case Op_SubVD:
 3683     case Op_MulVI:
 3684     case Op_MulVF:
 3685     case Op_MulVD:
 3686     case Op_DivVF:
 3687     case Op_DivVD:
 3688     case Op_SqrtVF:
 3689     case Op_SqrtVD:
 3690     case Op_LShiftVI:
 3691     case Op_LShiftVL:
 3692     case Op_RShiftVI:
 3693     case Op_RShiftVL:
 3694     case Op_URShiftVI:
 3695     case Op_URShiftVL:
 3696     case Op_LoadVectorMasked:
 3697     case Op_StoreVectorMasked:
 3698     case Op_LoadVectorGatherMasked:
 3699     case Op_StoreVectorScatterMasked:
 3700       return true;
 3701 
 3702     case Op_UMinV:
 3703     case Op_UMaxV:
 3704       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3705         return false;
 3706       } // fallthrough
 3707     case Op_MaxV:
 3708     case Op_MinV:
 3709       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3710         return false; // Implementation limitation
 3711       }
 3712       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3713         return false; // Implementation limitation
 3714       }
 3715       return true;
 3716     case Op_SaturatingAddV:
 3717     case Op_SaturatingSubV:
 3718       if (!is_subword_type(bt)) {
 3719         return false;
 3720       }
 3721       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3722         return false; // Implementation limitation
 3723       }
 3724       return true;
 3725 
 3726     case Op_VectorMaskCmp:
 3727       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3728         return false; // Implementation limitation
 3729       }
 3730       return true;
 3731 
 3732     case Op_VectorRearrange:
 3733       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3734         return false; // Implementation limitation
 3735       }
 3736       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3737         return false; // Implementation limitation
 3738       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3739         return false; // Implementation limitation
 3740       }
 3741       return true;
 3742 
 3743     // Binary Logical operations
 3744     case Op_AndVMask:
 3745     case Op_OrVMask:
 3746     case Op_XorVMask:
 3747       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3748         return false; // Implementation limitation
 3749       }
 3750       return true;
 3751 
 3752     case Op_PopCountVI:
 3753     case Op_PopCountVL:
 3754       if (!is_pop_count_instr_target(bt)) {
 3755         return false;
 3756       }
 3757       return true;
 3758 
 3759     case Op_MaskAll:
 3760       return true;
 3761 
 3762     case Op_CountLeadingZerosV:
 3763       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3764         return true;
 3765       }
 3766     default:
 3767       return false;
 3768   }
 3769 }
 3770 
 3771 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3772   return false;
 3773 }
 3774 
 3775 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3776 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3777   switch (elem_bt) {
 3778     case T_BYTE:  return false;
 3779     case T_SHORT: return !VM_Version::supports_avx512bw();
 3780     case T_INT:   return !VM_Version::supports_avx();
 3781     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3782     default:
 3783       ShouldNotReachHere();
 3784       return false;
 3785   }
 3786 }
 3787 
 3788 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 3789   // Prefer predicate if the mask type is "TypeVectMask".
 3790   return vt->isa_vectmask() != nullptr;
 3791 }
 3792 
 3793 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3794   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3795   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3796   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3797       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3798     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3799     return new legVecZOper();
 3800   }
 3801   if (legacy) {
 3802     switch (ideal_reg) {
 3803       case Op_VecS: return new legVecSOper();
 3804       case Op_VecD: return new legVecDOper();
 3805       case Op_VecX: return new legVecXOper();
 3806       case Op_VecY: return new legVecYOper();
 3807       case Op_VecZ: return new legVecZOper();
 3808     }
 3809   } else {
 3810     switch (ideal_reg) {
 3811       case Op_VecS: return new vecSOper();
 3812       case Op_VecD: return new vecDOper();
 3813       case Op_VecX: return new vecXOper();
 3814       case Op_VecY: return new vecYOper();
 3815       case Op_VecZ: return new vecZOper();
 3816     }
 3817   }
 3818   ShouldNotReachHere();
 3819   return nullptr;
 3820 }
 3821 
 3822 bool Matcher::is_reg2reg_move(MachNode* m) {
 3823   switch (m->rule()) {
 3824     case MoveVec2Leg_rule:
 3825     case MoveLeg2Vec_rule:
 3826     case MoveF2VL_rule:
 3827     case MoveF2LEG_rule:
 3828     case MoveVL2F_rule:
 3829     case MoveLEG2F_rule:
 3830     case MoveD2VL_rule:
 3831     case MoveD2LEG_rule:
 3832     case MoveVL2D_rule:
 3833     case MoveLEG2D_rule:
 3834       return true;
 3835     default:
 3836       return false;
 3837   }
 3838 }
 3839 
 3840 bool Matcher::is_generic_vector(MachOper* opnd) {
 3841   switch (opnd->opcode()) {
 3842     case VEC:
 3843     case LEGVEC:
 3844       return true;
 3845     default:
 3846       return false;
 3847   }
 3848 }
 3849 
 3850 //------------------------------------------------------------------------
 3851 
 3852 const RegMask* Matcher::predicate_reg_mask(void) {
 3853   return &_VECTMASK_REG_mask;
 3854 }
 3855 
 3856 // Max vector size in bytes. 0 if not supported.
 3857 int Matcher::vector_width_in_bytes(BasicType bt) {
 3858   assert(is_java_primitive(bt), "only primitive type vectors");
 3859   // SSE2 supports 128bit vectors for all types.
 3860   // AVX2 supports 256bit vectors for all types.
 3861   // AVX2/EVEX supports 512bit vectors for all types.
 3862   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3863   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3864   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3865     size = (UseAVX > 2) ? 64 : 32;
 3866   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3867     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3868   // Use flag to limit vector size.
 3869   size = MIN2(size,(int)MaxVectorSize);
 3870   // Minimum 2 values in vector (or 4 for bytes).
 3871   switch (bt) {
 3872   case T_DOUBLE:
 3873   case T_LONG:
 3874     if (size < 16) return 0;
 3875     break;
 3876   case T_FLOAT:
 3877   case T_INT:
 3878     if (size < 8) return 0;
 3879     break;
 3880   case T_BOOLEAN:
 3881     if (size < 4) return 0;
 3882     break;
 3883   case T_CHAR:
 3884     if (size < 4) return 0;
 3885     break;
 3886   case T_BYTE:
 3887     if (size < 4) return 0;
 3888     break;
 3889   case T_SHORT:
 3890     if (size < 4) return 0;
 3891     break;
 3892   default:
 3893     ShouldNotReachHere();
 3894   }
 3895   return size;
 3896 }
 3897 
 3898 // Limits on vector size (number of elements) loaded into vector.
 3899 int Matcher::max_vector_size(const BasicType bt) {
 3900   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3901 }
 3902 int Matcher::min_vector_size(const BasicType bt) {
 3903   int max_size = max_vector_size(bt);
 3904   // Min size which can be loaded into vector is 4 bytes.
 3905   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3906   // Support for calling svml double64 vectors
 3907   if (bt == T_DOUBLE) {
 3908     size = 1;
 3909   }
 3910   return MIN2(size,max_size);
 3911 }
 3912 
 3913 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3914   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3915   // by default on Cascade Lake
 3916   if (VM_Version::is_default_intel_cascade_lake()) {
 3917     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3918   }
 3919   return Matcher::max_vector_size(bt);
 3920 }
 3921 
 3922 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3923   return -1;
 3924 }
 3925 
 3926 // Vector ideal reg corresponding to specified size in bytes
 3927 uint Matcher::vector_ideal_reg(int size) {
 3928   assert(MaxVectorSize >= size, "");
 3929   switch(size) {
 3930     case  4: return Op_VecS;
 3931     case  8: return Op_VecD;
 3932     case 16: return Op_VecX;
 3933     case 32: return Op_VecY;
 3934     case 64: return Op_VecZ;
 3935   }
 3936   ShouldNotReachHere();
 3937   return 0;
 3938 }
 3939 
 3940 // Check for shift by small constant as well
 3941 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3942   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3943       shift->in(2)->get_int() <= 3 &&
 3944       // Are there other uses besides address expressions?
 3945       !matcher->is_visited(shift)) {
 3946     address_visited.set(shift->_idx); // Flag as address_visited
 3947     mstack.push(shift->in(2), Matcher::Visit);
 3948     Node *conv = shift->in(1);
 3949     // Allow Matcher to match the rule which bypass
 3950     // ConvI2L operation for an array index on LP64
 3951     // if the index value is positive.
 3952     if (conv->Opcode() == Op_ConvI2L &&
 3953         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3954         // Are there other uses besides address expressions?
 3955         !matcher->is_visited(conv)) {
 3956       address_visited.set(conv->_idx); // Flag as address_visited
 3957       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3958     } else {
 3959       mstack.push(conv, Matcher::Pre_Visit);
 3960     }
 3961     return true;
 3962   }
 3963   return false;
 3964 }
 3965 
 3966 // This function identifies sub-graphs in which a 'load' node is
 3967 // input to two different nodes, and such that it can be matched
 3968 // with BMI instructions like blsi, blsr, etc.
 3969 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3970 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3971 // refers to the same node.
 3972 //
 3973 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3974 // This is a temporary solution until we make DAGs expressible in ADL.
 3975 template<typename ConType>
 3976 class FusedPatternMatcher {
 3977   Node* _op1_node;
 3978   Node* _mop_node;
 3979   int _con_op;
 3980 
 3981   static int match_next(Node* n, int next_op, int next_op_idx) {
 3982     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3983       return -1;
 3984     }
 3985 
 3986     if (next_op_idx == -1) { // n is commutative, try rotations
 3987       if (n->in(1)->Opcode() == next_op) {
 3988         return 1;
 3989       } else if (n->in(2)->Opcode() == next_op) {
 3990         return 2;
 3991       }
 3992     } else {
 3993       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 3994       if (n->in(next_op_idx)->Opcode() == next_op) {
 3995         return next_op_idx;
 3996       }
 3997     }
 3998     return -1;
 3999   }
 4000 
 4001  public:
 4002   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 4003     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 4004 
 4005   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 4006              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 4007              typename ConType::NativeType con_value) {
 4008     if (_op1_node->Opcode() != op1) {
 4009       return false;
 4010     }
 4011     if (_mop_node->outcnt() > 2) {
 4012       return false;
 4013     }
 4014     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 4015     if (op1_op2_idx == -1) {
 4016       return false;
 4017     }
 4018     // Memory operation must be the other edge
 4019     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 4020 
 4021     // Check that the mop node is really what we want
 4022     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 4023       Node* op2_node = _op1_node->in(op1_op2_idx);
 4024       if (op2_node->outcnt() > 1) {
 4025         return false;
 4026       }
 4027       assert(op2_node->Opcode() == op2, "Should be");
 4028       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 4029       if (op2_con_idx == -1) {
 4030         return false;
 4031       }
 4032       // Memory operation must be the other edge
 4033       int op2_mop_idx = (op2_con_idx & 1) + 1;
 4034       // Check that the memory operation is the same node
 4035       if (op2_node->in(op2_mop_idx) == _mop_node) {
 4036         // Now check the constant
 4037         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 4038         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 4039           return true;
 4040         }
 4041       }
 4042     }
 4043     return false;
 4044   }
 4045 };
 4046 
 4047 static bool is_bmi_pattern(Node* n, Node* m) {
 4048   assert(UseBMI1Instructions, "sanity");
 4049   if (n != nullptr && m != nullptr) {
 4050     if (m->Opcode() == Op_LoadI) {
 4051       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 4052       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 4053              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 4054              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 4055     } else if (m->Opcode() == Op_LoadL) {
 4056       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 4057       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 4058              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 4059              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 4060     }
 4061   }
 4062   return false;
 4063 }
 4064 
 4065 // Should the matcher clone input 'm' of node 'n'?
 4066 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 4067   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 4068   if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
 4069     mstack.push(m, Visit);
 4070     return true;
 4071   }
 4072   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 4073     mstack.push(m, Visit);           // m = ShiftCntV
 4074     return true;
 4075   }
 4076   if (is_encode_and_store_pattern(n, m)) {
 4077     mstack.push(m, Visit);
 4078     return true;
 4079   }
 4080   return false;
 4081 }
 4082 
 4083 // Should the Matcher clone shifts on addressing modes, expecting them
 4084 // to be subsumed into complex addressing expressions or compute them
 4085 // into registers?
 4086 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4087   Node *off = m->in(AddPNode::Offset);
 4088   if (off->is_Con()) {
 4089     address_visited.test_set(m->_idx); // Flag as address_visited
 4090     Node *adr = m->in(AddPNode::Address);
 4091 
 4092     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4093     // AtomicAdd is not an addressing expression.
 4094     // Cheap to find it by looking for screwy base.
 4095     if (adr->is_AddP() &&
 4096         !adr->in(AddPNode::Base)->is_top() &&
 4097         !adr->in(AddPNode::Offset)->is_Con() &&
 4098         off->get_long() == (int) (off->get_long()) && // immL32
 4099         // Are there other uses besides address expressions?
 4100         !is_visited(adr)) {
 4101       address_visited.set(adr->_idx); // Flag as address_visited
 4102       Node *shift = adr->in(AddPNode::Offset);
 4103       if (!clone_shift(shift, this, mstack, address_visited)) {
 4104         mstack.push(shift, Pre_Visit);
 4105       }
 4106       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4107       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4108     } else {
 4109       mstack.push(adr, Pre_Visit);
 4110     }
 4111 
 4112     // Clone X+offset as it also folds into most addressing expressions
 4113     mstack.push(off, Visit);
 4114     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4115     return true;
 4116   } else if (clone_shift(off, this, mstack, address_visited)) {
 4117     address_visited.test_set(m->_idx); // Flag as address_visited
 4118     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4119     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4120     return true;
 4121   }
 4122   return false;
 4123 }
 4124 
 4125 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4126   switch (bt) {
 4127     case BoolTest::eq:
 4128       return Assembler::eq;
 4129     case BoolTest::ne:
 4130       return Assembler::neq;
 4131     case BoolTest::le:
 4132     case BoolTest::ule:
 4133       return Assembler::le;
 4134     case BoolTest::ge:
 4135     case BoolTest::uge:
 4136       return Assembler::nlt;
 4137     case BoolTest::lt:
 4138     case BoolTest::ult:
 4139       return Assembler::lt;
 4140     case BoolTest::gt:
 4141     case BoolTest::ugt:
 4142       return Assembler::nle;
 4143     default : ShouldNotReachHere(); return Assembler::_false;
 4144   }
 4145 }
 4146 
 4147 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4148   switch (bt) {
 4149   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4150   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4151   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4152   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4153   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4154   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4155   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4156   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4157   }
 4158 }
 4159 
 4160 // Helper methods for MachSpillCopyNode::implementation().
 4161 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4162                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4163   assert(ireg == Op_VecS || // 32bit vector
 4164          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4165           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4166          "no non-adjacent vector moves" );
 4167   if (masm) {
 4168     switch (ireg) {
 4169     case Op_VecS: // copy whole register
 4170     case Op_VecD:
 4171     case Op_VecX:
 4172       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4173         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4174       } else {
 4175         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4176      }
 4177       break;
 4178     case Op_VecY:
 4179       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4180         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4181       } else {
 4182         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4183      }
 4184       break;
 4185     case Op_VecZ:
 4186       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4187       break;
 4188     default:
 4189       ShouldNotReachHere();
 4190     }
 4191 #ifndef PRODUCT
 4192   } else {
 4193     switch (ireg) {
 4194     case Op_VecS:
 4195     case Op_VecD:
 4196     case Op_VecX:
 4197       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4198       break;
 4199     case Op_VecY:
 4200     case Op_VecZ:
 4201       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4202       break;
 4203     default:
 4204       ShouldNotReachHere();
 4205     }
 4206 #endif
 4207   }
 4208 }
 4209 
 4210 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4211                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4212   if (masm) {
 4213     if (is_load) {
 4214       switch (ireg) {
 4215       case Op_VecS:
 4216         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4217         break;
 4218       case Op_VecD:
 4219         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4220         break;
 4221       case Op_VecX:
 4222         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4223           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4224         } else {
 4225           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4226           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4227         }
 4228         break;
 4229       case Op_VecY:
 4230         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4231           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4232         } else {
 4233           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4234           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4235         }
 4236         break;
 4237       case Op_VecZ:
 4238         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4239         break;
 4240       default:
 4241         ShouldNotReachHere();
 4242       }
 4243     } else { // store
 4244       switch (ireg) {
 4245       case Op_VecS:
 4246         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4247         break;
 4248       case Op_VecD:
 4249         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4250         break;
 4251       case Op_VecX:
 4252         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4253           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4254         }
 4255         else {
 4256           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4257         }
 4258         break;
 4259       case Op_VecY:
 4260         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4261           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4262         }
 4263         else {
 4264           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4265         }
 4266         break;
 4267       case Op_VecZ:
 4268         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4269         break;
 4270       default:
 4271         ShouldNotReachHere();
 4272       }
 4273     }
 4274 #ifndef PRODUCT
 4275   } else {
 4276     if (is_load) {
 4277       switch (ireg) {
 4278       case Op_VecS:
 4279         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4280         break;
 4281       case Op_VecD:
 4282         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4283         break;
 4284        case Op_VecX:
 4285         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4286         break;
 4287       case Op_VecY:
 4288       case Op_VecZ:
 4289         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4290         break;
 4291       default:
 4292         ShouldNotReachHere();
 4293       }
 4294     } else { // store
 4295       switch (ireg) {
 4296       case Op_VecS:
 4297         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4298         break;
 4299       case Op_VecD:
 4300         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4301         break;
 4302        case Op_VecX:
 4303         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4304         break;
 4305       case Op_VecY:
 4306       case Op_VecZ:
 4307         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4308         break;
 4309       default:
 4310         ShouldNotReachHere();
 4311       }
 4312     }
 4313 #endif
 4314   }
 4315 }
 4316 
 4317 template <class T>
 4318 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4319   int size = type2aelembytes(bt) * len;
 4320   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4321   for (int i = 0; i < len; i++) {
 4322     int offset = i * type2aelembytes(bt);
 4323     switch (bt) {
 4324       case T_BYTE: val->at(i) = con; break;
 4325       case T_SHORT: {
 4326         jshort c = con;
 4327         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4328         break;
 4329       }
 4330       case T_INT: {
 4331         jint c = con;
 4332         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4333         break;
 4334       }
 4335       case T_LONG: {
 4336         jlong c = con;
 4337         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4338         break;
 4339       }
 4340       case T_FLOAT: {
 4341         jfloat c = con;
 4342         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4343         break;
 4344       }
 4345       case T_DOUBLE: {
 4346         jdouble c = con;
 4347         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4348         break;
 4349       }
 4350       default: assert(false, "%s", type2name(bt));
 4351     }
 4352   }
 4353   return val;
 4354 }
 4355 
 4356 static inline jlong high_bit_set(BasicType bt) {
 4357   switch (bt) {
 4358     case T_BYTE:  return 0x8080808080808080;
 4359     case T_SHORT: return 0x8000800080008000;
 4360     case T_INT:   return 0x8000000080000000;
 4361     case T_LONG:  return 0x8000000000000000;
 4362     default:
 4363       ShouldNotReachHere();
 4364       return 0;
 4365   }
 4366 }
 4367 
 4368 #ifndef PRODUCT
 4369   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4370     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4371   }
 4372 #endif
 4373 
 4374   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4375     __ nop(_count);
 4376   }
 4377 
 4378   uint MachNopNode::size(PhaseRegAlloc*) const {
 4379     return _count;
 4380   }
 4381 
 4382 #ifndef PRODUCT
 4383   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4384     st->print("# breakpoint");
 4385   }
 4386 #endif
 4387 
 4388   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4389     __ int3();
 4390   }
 4391 
 4392   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4393     return MachNode::size(ra_);
 4394   }
 4395 
 4396 %}
 4397 
 4398 //----------ENCODING BLOCK-----------------------------------------------------
 4399 // This block specifies the encoding classes used by the compiler to
 4400 // output byte streams.  Encoding classes are parameterized macros
 4401 // used by Machine Instruction Nodes in order to generate the bit
 4402 // encoding of the instruction.  Operands specify their base encoding
 4403 // interface with the interface keyword.  There are currently
 4404 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4405 // COND_INTER.  REG_INTER causes an operand to generate a function
 4406 // which returns its register number when queried.  CONST_INTER causes
 4407 // an operand to generate a function which returns the value of the
 4408 // constant when queried.  MEMORY_INTER causes an operand to generate
 4409 // four functions which return the Base Register, the Index Register,
 4410 // the Scale Value, and the Offset Value of the operand when queried.
 4411 // COND_INTER causes an operand to generate six functions which return
 4412 // the encoding code (ie - encoding bits for the instruction)
 4413 // associated with each basic boolean condition for a conditional
 4414 // instruction.
 4415 //
 4416 // Instructions specify two basic values for encoding.  Again, a
 4417 // function is available to check if the constant displacement is an
 4418 // oop. They use the ins_encode keyword to specify their encoding
 4419 // classes (which must be a sequence of enc_class names, and their
 4420 // parameters, specified in the encoding block), and they use the
 4421 // opcode keyword to specify, in order, their primary, secondary, and
 4422 // tertiary opcode.  Only the opcode sections which a particular
 4423 // instruction needs for encoding need to be specified.
 4424 encode %{
 4425   enc_class cdql_enc(no_rax_rdx_RegI div)
 4426   %{
 4427     // Full implementation of Java idiv and irem; checks for
 4428     // special case as described in JVM spec., p.243 & p.271.
 4429     //
 4430     //         normal case                           special case
 4431     //
 4432     // input : rax: dividend                         min_int
 4433     //         reg: divisor                          -1
 4434     //
 4435     // output: rax: quotient  (= rax idiv reg)       min_int
 4436     //         rdx: remainder (= rax irem reg)       0
 4437     //
 4438     //  Code sequnce:
 4439     //
 4440     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4441     //    5:   75 07/08                jne    e <normal>
 4442     //    7:   33 d2                   xor    %edx,%edx
 4443     //  [div >= 8 -> offset + 1]
 4444     //  [REX_B]
 4445     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4446     //    c:   74 03/04                je     11 <done>
 4447     // 000000000000000e <normal>:
 4448     //    e:   99                      cltd
 4449     //  [div >= 8 -> offset + 1]
 4450     //  [REX_B]
 4451     //    f:   f7 f9                   idiv   $div
 4452     // 0000000000000011 <done>:
 4453     Label normal;
 4454     Label done;
 4455 
 4456     // cmp    $0x80000000,%eax
 4457     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4458 
 4459     // jne    e <normal>
 4460     __ jccb(Assembler::notEqual, normal);
 4461 
 4462     // xor    %edx,%edx
 4463     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4464 
 4465     // cmp    $0xffffffffffffffff,%ecx
 4466     __ cmpl($div$$Register, -1);
 4467 
 4468     // je     11 <done>
 4469     __ jccb(Assembler::equal, done);
 4470 
 4471     // <normal>
 4472     // cltd
 4473     __ bind(normal);
 4474     __ cdql();
 4475 
 4476     // idivl
 4477     // <done>
 4478     __ idivl($div$$Register);
 4479     __ bind(done);
 4480   %}
 4481 
 4482   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4483   %{
 4484     // Full implementation of Java ldiv and lrem; checks for
 4485     // special case as described in JVM spec., p.243 & p.271.
 4486     //
 4487     //         normal case                           special case
 4488     //
 4489     // input : rax: dividend                         min_long
 4490     //         reg: divisor                          -1
 4491     //
 4492     // output: rax: quotient  (= rax idiv reg)       min_long
 4493     //         rdx: remainder (= rax irem reg)       0
 4494     //
 4495     //  Code sequnce:
 4496     //
 4497     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4498     //    7:   00 00 80
 4499     //    a:   48 39 d0                cmp    %rdx,%rax
 4500     //    d:   75 08                   jne    17 <normal>
 4501     //    f:   33 d2                   xor    %edx,%edx
 4502     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4503     //   15:   74 05                   je     1c <done>
 4504     // 0000000000000017 <normal>:
 4505     //   17:   48 99                   cqto
 4506     //   19:   48 f7 f9                idiv   $div
 4507     // 000000000000001c <done>:
 4508     Label normal;
 4509     Label done;
 4510 
 4511     // mov    $0x8000000000000000,%rdx
 4512     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4513 
 4514     // cmp    %rdx,%rax
 4515     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4516 
 4517     // jne    17 <normal>
 4518     __ jccb(Assembler::notEqual, normal);
 4519 
 4520     // xor    %edx,%edx
 4521     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4522 
 4523     // cmp    $0xffffffffffffffff,$div
 4524     __ cmpq($div$$Register, -1);
 4525 
 4526     // je     1e <done>
 4527     __ jccb(Assembler::equal, done);
 4528 
 4529     // <normal>
 4530     // cqto
 4531     __ bind(normal);
 4532     __ cdqq();
 4533 
 4534     // idivq (note: must be emitted by the user of this rule)
 4535     // <done>
 4536     __ idivq($div$$Register);
 4537     __ bind(done);
 4538   %}
 4539 
 4540   enc_class clear_avx %{
 4541     DEBUG_ONLY(int off0 = __ offset());
 4542     if (generate_vzeroupper(Compile::current())) {
 4543       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4544       // Clear upper bits of YMM registers when current compiled code uses
 4545       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4546       __ vzeroupper();
 4547     }
 4548     DEBUG_ONLY(int off1 = __ offset());
 4549     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4550   %}
 4551 
 4552   enc_class Java_To_Runtime(method meth) %{
 4553     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4554     __ call(r10);
 4555     __ post_call_nop();
 4556   %}
 4557 
 4558   enc_class Java_Static_Call(method meth)
 4559   %{
 4560     // JAVA STATIC CALL
 4561     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4562     // determine who we intended to call.
 4563     if (!_method) {
 4564       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4565     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4566       // The NOP here is purely to ensure that eliding a call to
 4567       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4568       __ addr_nop_5();
 4569       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4570     } else {
 4571       int method_index = resolved_method_index(masm);
 4572       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4573                                                   : static_call_Relocation::spec(method_index);
 4574       address mark = __ pc();
 4575       int call_offset = __ offset();
 4576       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4577       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4578         // Calls of the same statically bound method can share
 4579         // a stub to the interpreter.
 4580         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4581       } else {
 4582         // Emit stubs for static call.
 4583         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4584         __ clear_inst_mark();
 4585         if (stub == nullptr) {
 4586           ciEnv::current()->record_failure("CodeCache is full");
 4587           return;
 4588         }
 4589       }
 4590     }
 4591     __ post_call_nop();
 4592   %}
 4593 
 4594   enc_class Java_Dynamic_Call(method meth) %{
 4595     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4596     __ post_call_nop();
 4597   %}
 4598 
 4599   enc_class call_epilog %{
 4600     if (VerifyStackAtCalls) {
 4601       // Check that stack depth is unchanged: find majik cookie on stack
 4602       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4603       Label L;
 4604       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4605       __ jccb(Assembler::equal, L);
 4606       // Die if stack mismatch
 4607       __ int3();
 4608       __ bind(L);
 4609     }
 4610   %}
 4611 
 4612 %}
 4613 
 4614 //----------FRAME--------------------------------------------------------------
 4615 // Definition of frame structure and management information.
 4616 //
 4617 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4618 //                             |   (to get allocators register number
 4619 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4620 //  r   CALLER     |        |
 4621 //  o     |        +--------+      pad to even-align allocators stack-slot
 4622 //  w     V        |  pad0  |        numbers; owned by CALLER
 4623 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4624 //  h     ^        |   in   |  5
 4625 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4626 //  |     |        |        |  3
 4627 //  |     |        +--------+
 4628 //  V     |        | old out|      Empty on Intel, window on Sparc
 4629 //        |    old |preserve|      Must be even aligned.
 4630 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4631 //        |        |   in   |  3   area for Intel ret address
 4632 //     Owned by    |preserve|      Empty on Sparc.
 4633 //       SELF      +--------+
 4634 //        |        |  pad2  |  2   pad to align old SP
 4635 //        |        +--------+  1
 4636 //        |        | locks  |  0
 4637 //        |        +--------+----> OptoReg::stack0(), even aligned
 4638 //        |        |  pad1  | 11   pad to align new SP
 4639 //        |        +--------+
 4640 //        |        |        | 10
 4641 //        |        | spills |  9   spills
 4642 //        V        |        |  8   (pad0 slot for callee)
 4643 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4644 //        ^        |  out   |  7
 4645 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4646 //     Owned by    +--------+
 4647 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4648 //        |    new |preserve|      Must be even-aligned.
 4649 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4650 //        |        |        |
 4651 //
 4652 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4653 //         known from SELF's arguments and the Java calling convention.
 4654 //         Region 6-7 is determined per call site.
 4655 // Note 2: If the calling convention leaves holes in the incoming argument
 4656 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4657 //         are owned by the CALLEE.  Holes should not be necessary in the
 4658 //         incoming area, as the Java calling convention is completely under
 4659 //         the control of the AD file.  Doubles can be sorted and packed to
 4660 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4661 //         varargs C calling conventions.
 4662 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4663 //         even aligned with pad0 as needed.
 4664 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4665 //         region 6-11 is even aligned; it may be padded out more so that
 4666 //         the region from SP to FP meets the minimum stack alignment.
 4667 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4668 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4669 //         SP meets the minimum alignment.
 4670 
 4671 frame
 4672 %{
 4673   // These three registers define part of the calling convention
 4674   // between compiled code and the interpreter.
 4675   inline_cache_reg(RAX);                // Inline Cache Register
 4676 
 4677   // Optional: name the operand used by cisc-spilling to access
 4678   // [stack_pointer + offset]
 4679   cisc_spilling_operand_name(indOffset32);
 4680 
 4681   // Number of stack slots consumed by locking an object
 4682   sync_stack_slots(2);
 4683 
 4684   // Compiled code's Frame Pointer
 4685   frame_pointer(RSP);
 4686 
 4687   // Stack alignment requirement
 4688   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4689 
 4690   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4691   // for calls to C.  Supports the var-args backing area for register parms.
 4692   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4693 
 4694   // The after-PROLOG location of the return address.  Location of
 4695   // return address specifies a type (REG or STACK) and a number
 4696   // representing the register number (i.e. - use a register name) or
 4697   // stack slot.
 4698   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4699   // Otherwise, it is above the locks and verification slot and alignment word
 4700   return_addr(STACK - 2 +
 4701               align_up((Compile::current()->in_preserve_stack_slots() +
 4702                         Compile::current()->fixed_slots()),
 4703                        stack_alignment_in_slots()));
 4704 
 4705   // Location of compiled Java return values.  Same as C for now.
 4706   return_value
 4707   %{
 4708     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4709            "only return normal values");
 4710 
 4711     static const int lo[Op_RegL + 1] = {
 4712       0,
 4713       0,
 4714       RAX_num,  // Op_RegN
 4715       RAX_num,  // Op_RegI
 4716       RAX_num,  // Op_RegP
 4717       XMM0_num, // Op_RegF
 4718       XMM0_num, // Op_RegD
 4719       RAX_num   // Op_RegL
 4720     };
 4721     static const int hi[Op_RegL + 1] = {
 4722       0,
 4723       0,
 4724       OptoReg::Bad, // Op_RegN
 4725       OptoReg::Bad, // Op_RegI
 4726       RAX_H_num,    // Op_RegP
 4727       OptoReg::Bad, // Op_RegF
 4728       XMM0b_num,    // Op_RegD
 4729       RAX_H_num     // Op_RegL
 4730     };
 4731     // Excluded flags and vector registers.
 4732     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4733     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4734   %}
 4735 %}
 4736 
 4737 //----------ATTRIBUTES---------------------------------------------------------
 4738 //----------Operand Attributes-------------------------------------------------
 4739 op_attrib op_cost(0);        // Required cost attribute
 4740 
 4741 //----------Instruction Attributes---------------------------------------------
 4742 ins_attrib ins_cost(100);       // Required cost attribute
 4743 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4744 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4745                                 // a non-matching short branch variant
 4746                                 // of some long branch?
 4747 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4748                                 // be a power of 2) specifies the
 4749                                 // alignment that some part of the
 4750                                 // instruction (not necessarily the
 4751                                 // start) requires.  If > 1, a
 4752                                 // compute_padding() function must be
 4753                                 // provided for the instruction
 4754 
 4755 // Whether this node is expanded during code emission into a sequence of
 4756 // instructions and the first instruction can perform an implicit null check.
 4757 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4758 
 4759 //----------OPERANDS-----------------------------------------------------------
 4760 // Operand definitions must precede instruction definitions for correct parsing
 4761 // in the ADLC because operands constitute user defined types which are used in
 4762 // instruction definitions.
 4763 
 4764 //----------Simple Operands----------------------------------------------------
 4765 // Immediate Operands
 4766 // Integer Immediate
 4767 operand immI()
 4768 %{
 4769   match(ConI);
 4770 
 4771   op_cost(10);
 4772   format %{ %}
 4773   interface(CONST_INTER);
 4774 %}
 4775 
 4776 // Constant for test vs zero
 4777 operand immI_0()
 4778 %{
 4779   predicate(n->get_int() == 0);
 4780   match(ConI);
 4781 
 4782   op_cost(0);
 4783   format %{ %}
 4784   interface(CONST_INTER);
 4785 %}
 4786 
 4787 // Constant for increment
 4788 operand immI_1()
 4789 %{
 4790   predicate(n->get_int() == 1);
 4791   match(ConI);
 4792 
 4793   op_cost(0);
 4794   format %{ %}
 4795   interface(CONST_INTER);
 4796 %}
 4797 
 4798 // Constant for decrement
 4799 operand immI_M1()
 4800 %{
 4801   predicate(n->get_int() == -1);
 4802   match(ConI);
 4803 
 4804   op_cost(0);
 4805   format %{ %}
 4806   interface(CONST_INTER);
 4807 %}
 4808 
 4809 operand immI_2()
 4810 %{
 4811   predicate(n->get_int() == 2);
 4812   match(ConI);
 4813 
 4814   op_cost(0);
 4815   format %{ %}
 4816   interface(CONST_INTER);
 4817 %}
 4818 
 4819 operand immI_4()
 4820 %{
 4821   predicate(n->get_int() == 4);
 4822   match(ConI);
 4823 
 4824   op_cost(0);
 4825   format %{ %}
 4826   interface(CONST_INTER);
 4827 %}
 4828 
 4829 operand immI_8()
 4830 %{
 4831   predicate(n->get_int() == 8);
 4832   match(ConI);
 4833 
 4834   op_cost(0);
 4835   format %{ %}
 4836   interface(CONST_INTER);
 4837 %}
 4838 
 4839 // Valid scale values for addressing modes
 4840 operand immI2()
 4841 %{
 4842   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4843   match(ConI);
 4844 
 4845   format %{ %}
 4846   interface(CONST_INTER);
 4847 %}
 4848 
 4849 operand immU7()
 4850 %{
 4851   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4852   match(ConI);
 4853 
 4854   op_cost(5);
 4855   format %{ %}
 4856   interface(CONST_INTER);
 4857 %}
 4858 
 4859 operand immI8()
 4860 %{
 4861   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4862   match(ConI);
 4863 
 4864   op_cost(5);
 4865   format %{ %}
 4866   interface(CONST_INTER);
 4867 %}
 4868 
 4869 operand immU8()
 4870 %{
 4871   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4872   match(ConI);
 4873 
 4874   op_cost(5);
 4875   format %{ %}
 4876   interface(CONST_INTER);
 4877 %}
 4878 
 4879 operand immI16()
 4880 %{
 4881   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4882   match(ConI);
 4883 
 4884   op_cost(10);
 4885   format %{ %}
 4886   interface(CONST_INTER);
 4887 %}
 4888 
 4889 // Int Immediate non-negative
 4890 operand immU31()
 4891 %{
 4892   predicate(n->get_int() >= 0);
 4893   match(ConI);
 4894 
 4895   op_cost(0);
 4896   format %{ %}
 4897   interface(CONST_INTER);
 4898 %}
 4899 
 4900 // Pointer Immediate
 4901 operand immP()
 4902 %{
 4903   match(ConP);
 4904 
 4905   op_cost(10);
 4906   format %{ %}
 4907   interface(CONST_INTER);
 4908 %}
 4909 
 4910 // Null Pointer Immediate
 4911 operand immP0()
 4912 %{
 4913   predicate(n->get_ptr() == 0);
 4914   match(ConP);
 4915 
 4916   op_cost(5);
 4917   format %{ %}
 4918   interface(CONST_INTER);
 4919 %}
 4920 
 4921 // Pointer Immediate
 4922 operand immN() %{
 4923   match(ConN);
 4924 
 4925   op_cost(10);
 4926   format %{ %}
 4927   interface(CONST_INTER);
 4928 %}
 4929 
 4930 operand immNKlass() %{
 4931   match(ConNKlass);
 4932 
 4933   op_cost(10);
 4934   format %{ %}
 4935   interface(CONST_INTER);
 4936 %}
 4937 
 4938 // Null Pointer Immediate
 4939 operand immN0() %{
 4940   predicate(n->get_narrowcon() == 0);
 4941   match(ConN);
 4942 
 4943   op_cost(5);
 4944   format %{ %}
 4945   interface(CONST_INTER);
 4946 %}
 4947 
 4948 operand immP31()
 4949 %{
 4950   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 4951             && (n->get_ptr() >> 31) == 0);
 4952   match(ConP);
 4953 
 4954   op_cost(5);
 4955   format %{ %}
 4956   interface(CONST_INTER);
 4957 %}
 4958 
 4959 
 4960 // Long Immediate
 4961 operand immL()
 4962 %{
 4963   match(ConL);
 4964 
 4965   op_cost(20);
 4966   format %{ %}
 4967   interface(CONST_INTER);
 4968 %}
 4969 
 4970 // Long Immediate 8-bit
 4971 operand immL8()
 4972 %{
 4973   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 4974   match(ConL);
 4975 
 4976   op_cost(5);
 4977   format %{ %}
 4978   interface(CONST_INTER);
 4979 %}
 4980 
 4981 // Long Immediate 32-bit unsigned
 4982 operand immUL32()
 4983 %{
 4984   predicate(n->get_long() == (unsigned int) (n->get_long()));
 4985   match(ConL);
 4986 
 4987   op_cost(10);
 4988   format %{ %}
 4989   interface(CONST_INTER);
 4990 %}
 4991 
 4992 // Long Immediate 32-bit signed
 4993 operand immL32()
 4994 %{
 4995   predicate(n->get_long() == (int) (n->get_long()));
 4996   match(ConL);
 4997 
 4998   op_cost(15);
 4999   format %{ %}
 5000   interface(CONST_INTER);
 5001 %}
 5002 
 5003 operand immL_Pow2()
 5004 %{
 5005   predicate(is_power_of_2((julong)n->get_long()));
 5006   match(ConL);
 5007 
 5008   op_cost(15);
 5009   format %{ %}
 5010   interface(CONST_INTER);
 5011 %}
 5012 
 5013 operand immL_NotPow2()
 5014 %{
 5015   predicate(is_power_of_2((julong)~n->get_long()));
 5016   match(ConL);
 5017 
 5018   op_cost(15);
 5019   format %{ %}
 5020   interface(CONST_INTER);
 5021 %}
 5022 
 5023 // Long Immediate zero
 5024 operand immL0()
 5025 %{
 5026   predicate(n->get_long() == 0L);
 5027   match(ConL);
 5028 
 5029   op_cost(10);
 5030   format %{ %}
 5031   interface(CONST_INTER);
 5032 %}
 5033 
 5034 // Constant for increment
 5035 operand immL1()
 5036 %{
 5037   predicate(n->get_long() == 1);
 5038   match(ConL);
 5039 
 5040   format %{ %}
 5041   interface(CONST_INTER);
 5042 %}
 5043 
 5044 // Constant for decrement
 5045 operand immL_M1()
 5046 %{
 5047   predicate(n->get_long() == -1);
 5048   match(ConL);
 5049 
 5050   format %{ %}
 5051   interface(CONST_INTER);
 5052 %}
 5053 
 5054 // Long Immediate: low 32-bit mask
 5055 operand immL_32bits()
 5056 %{
 5057   predicate(n->get_long() == 0xFFFFFFFFL);
 5058   match(ConL);
 5059   op_cost(20);
 5060 
 5061   format %{ %}
 5062   interface(CONST_INTER);
 5063 %}
 5064 
 5065 // Int Immediate: 2^n-1, positive
 5066 operand immI_Pow2M1()
 5067 %{
 5068   predicate((n->get_int() > 0)
 5069             && is_power_of_2((juint)n->get_int() + 1));
 5070   match(ConI);
 5071 
 5072   op_cost(20);
 5073   format %{ %}
 5074   interface(CONST_INTER);
 5075 %}
 5076 
 5077 // Float Immediate zero
 5078 operand immF0()
 5079 %{
 5080   predicate(jint_cast(n->getf()) == 0);
 5081   match(ConF);
 5082 
 5083   op_cost(5);
 5084   format %{ %}
 5085   interface(CONST_INTER);
 5086 %}
 5087 
 5088 // Float Immediate
 5089 operand immF()
 5090 %{
 5091   match(ConF);
 5092 
 5093   op_cost(15);
 5094   format %{ %}
 5095   interface(CONST_INTER);
 5096 %}
 5097 
 5098 // Half Float Immediate
 5099 operand immH()
 5100 %{
 5101   match(ConH);
 5102 
 5103   op_cost(15);
 5104   format %{ %}
 5105   interface(CONST_INTER);
 5106 %}
 5107 
 5108 // Double Immediate zero
 5109 operand immD0()
 5110 %{
 5111   predicate(jlong_cast(n->getd()) == 0);
 5112   match(ConD);
 5113 
 5114   op_cost(5);
 5115   format %{ %}
 5116   interface(CONST_INTER);
 5117 %}
 5118 
 5119 // Double Immediate
 5120 operand immD()
 5121 %{
 5122   match(ConD);
 5123 
 5124   op_cost(15);
 5125   format %{ %}
 5126   interface(CONST_INTER);
 5127 %}
 5128 
 5129 // Immediates for special shifts (sign extend)
 5130 
 5131 // Constants for increment
 5132 operand immI_16()
 5133 %{
 5134   predicate(n->get_int() == 16);
 5135   match(ConI);
 5136 
 5137   format %{ %}
 5138   interface(CONST_INTER);
 5139 %}
 5140 
 5141 operand immI_24()
 5142 %{
 5143   predicate(n->get_int() == 24);
 5144   match(ConI);
 5145 
 5146   format %{ %}
 5147   interface(CONST_INTER);
 5148 %}
 5149 
 5150 // Constant for byte-wide masking
 5151 operand immI_255()
 5152 %{
 5153   predicate(n->get_int() == 255);
 5154   match(ConI);
 5155 
 5156   format %{ %}
 5157   interface(CONST_INTER);
 5158 %}
 5159 
 5160 // Constant for short-wide masking
 5161 operand immI_65535()
 5162 %{
 5163   predicate(n->get_int() == 65535);
 5164   match(ConI);
 5165 
 5166   format %{ %}
 5167   interface(CONST_INTER);
 5168 %}
 5169 
 5170 // Constant for byte-wide masking
 5171 operand immL_255()
 5172 %{
 5173   predicate(n->get_long() == 255);
 5174   match(ConL);
 5175 
 5176   format %{ %}
 5177   interface(CONST_INTER);
 5178 %}
 5179 
 5180 // Constant for short-wide masking
 5181 operand immL_65535()
 5182 %{
 5183   predicate(n->get_long() == 65535);
 5184   match(ConL);
 5185 
 5186   format %{ %}
 5187   interface(CONST_INTER);
 5188 %}
 5189 
 5190 // AOT Runtime Constants Address
 5191 operand immAOTRuntimeConstantsAddress()
 5192 %{
 5193   // Check if the address is in the range of AOT Runtime Constants
 5194   predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
 5195   match(ConP);
 5196 
 5197   op_cost(0);
 5198   format %{ %}
 5199   interface(CONST_INTER);
 5200 %}
 5201 
 5202 operand kReg()
 5203 %{
 5204   constraint(ALLOC_IN_RC(vectmask_reg));
 5205   match(RegVectMask);
 5206   format %{%}
 5207   interface(REG_INTER);
 5208 %}
 5209 
 5210 // Register Operands
 5211 // Integer Register
 5212 operand rRegI()
 5213 %{
 5214   constraint(ALLOC_IN_RC(int_reg));
 5215   match(RegI);
 5216 
 5217   match(rax_RegI);
 5218   match(rbx_RegI);
 5219   match(rcx_RegI);
 5220   match(rdx_RegI);
 5221   match(rdi_RegI);
 5222 
 5223   format %{ %}
 5224   interface(REG_INTER);
 5225 %}
 5226 
 5227 // Special Registers
 5228 operand rax_RegI()
 5229 %{
 5230   constraint(ALLOC_IN_RC(int_rax_reg));
 5231   match(RegI);
 5232   match(rRegI);
 5233 
 5234   format %{ "RAX" %}
 5235   interface(REG_INTER);
 5236 %}
 5237 
 5238 // Special Registers
 5239 operand rbx_RegI()
 5240 %{
 5241   constraint(ALLOC_IN_RC(int_rbx_reg));
 5242   match(RegI);
 5243   match(rRegI);
 5244 
 5245   format %{ "RBX" %}
 5246   interface(REG_INTER);
 5247 %}
 5248 
 5249 operand rcx_RegI()
 5250 %{
 5251   constraint(ALLOC_IN_RC(int_rcx_reg));
 5252   match(RegI);
 5253   match(rRegI);
 5254 
 5255   format %{ "RCX" %}
 5256   interface(REG_INTER);
 5257 %}
 5258 
 5259 operand rdx_RegI()
 5260 %{
 5261   constraint(ALLOC_IN_RC(int_rdx_reg));
 5262   match(RegI);
 5263   match(rRegI);
 5264 
 5265   format %{ "RDX" %}
 5266   interface(REG_INTER);
 5267 %}
 5268 
 5269 operand rdi_RegI()
 5270 %{
 5271   constraint(ALLOC_IN_RC(int_rdi_reg));
 5272   match(RegI);
 5273   match(rRegI);
 5274 
 5275   format %{ "RDI" %}
 5276   interface(REG_INTER);
 5277 %}
 5278 
 5279 operand no_rax_rdx_RegI()
 5280 %{
 5281   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5282   match(RegI);
 5283   match(rbx_RegI);
 5284   match(rcx_RegI);
 5285   match(rdi_RegI);
 5286 
 5287   format %{ %}
 5288   interface(REG_INTER);
 5289 %}
 5290 
 5291 operand no_rbp_r13_RegI()
 5292 %{
 5293   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5294   match(RegI);
 5295   match(rRegI);
 5296   match(rax_RegI);
 5297   match(rbx_RegI);
 5298   match(rcx_RegI);
 5299   match(rdx_RegI);
 5300   match(rdi_RegI);
 5301 
 5302   format %{ %}
 5303   interface(REG_INTER);
 5304 %}
 5305 
 5306 // Pointer Register
 5307 operand any_RegP()
 5308 %{
 5309   constraint(ALLOC_IN_RC(any_reg));
 5310   match(RegP);
 5311   match(rax_RegP);
 5312   match(rbx_RegP);
 5313   match(rdi_RegP);
 5314   match(rsi_RegP);
 5315   match(rbp_RegP);
 5316   match(r15_RegP);
 5317   match(rRegP);
 5318 
 5319   format %{ %}
 5320   interface(REG_INTER);
 5321 %}
 5322 
 5323 operand rRegP()
 5324 %{
 5325   constraint(ALLOC_IN_RC(ptr_reg));
 5326   match(RegP);
 5327   match(rax_RegP);
 5328   match(rbx_RegP);
 5329   match(rdi_RegP);
 5330   match(rsi_RegP);
 5331   match(rbp_RegP);  // See Q&A below about
 5332   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5333 
 5334   format %{ %}
 5335   interface(REG_INTER);
 5336 %}
 5337 
 5338 operand rRegN() %{
 5339   constraint(ALLOC_IN_RC(int_reg));
 5340   match(RegN);
 5341 
 5342   format %{ %}
 5343   interface(REG_INTER);
 5344 %}
 5345 
 5346 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5347 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5348 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5349 // The output of an instruction is controlled by the allocator, which respects
 5350 // register class masks, not match rules.  Unless an instruction mentions
 5351 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5352 // by the allocator as an input.
 5353 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5354 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5355 // result, RBP is not included in the output of the instruction either.
 5356 
 5357 // This operand is not allowed to use RBP even if
 5358 // RBP is not used to hold the frame pointer.
 5359 operand no_rbp_RegP()
 5360 %{
 5361   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5362   match(RegP);
 5363   match(rbx_RegP);
 5364   match(rsi_RegP);
 5365   match(rdi_RegP);
 5366 
 5367   format %{ %}
 5368   interface(REG_INTER);
 5369 %}
 5370 
 5371 // Special Registers
 5372 // Return a pointer value
 5373 operand rax_RegP()
 5374 %{
 5375   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5376   match(RegP);
 5377   match(rRegP);
 5378 
 5379   format %{ %}
 5380   interface(REG_INTER);
 5381 %}
 5382 
 5383 // Special Registers
 5384 // Return a compressed pointer value
 5385 operand rax_RegN()
 5386 %{
 5387   constraint(ALLOC_IN_RC(int_rax_reg));
 5388   match(RegN);
 5389   match(rRegN);
 5390 
 5391   format %{ %}
 5392   interface(REG_INTER);
 5393 %}
 5394 
 5395 // Used in AtomicAdd
 5396 operand rbx_RegP()
 5397 %{
 5398   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5399   match(RegP);
 5400   match(rRegP);
 5401 
 5402   format %{ %}
 5403   interface(REG_INTER);
 5404 %}
 5405 
 5406 operand rsi_RegP()
 5407 %{
 5408   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5409   match(RegP);
 5410   match(rRegP);
 5411 
 5412   format %{ %}
 5413   interface(REG_INTER);
 5414 %}
 5415 
 5416 operand rbp_RegP()
 5417 %{
 5418   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5419   match(RegP);
 5420   match(rRegP);
 5421 
 5422   format %{ %}
 5423   interface(REG_INTER);
 5424 %}
 5425 
 5426 // Used in rep stosq
 5427 operand rdi_RegP()
 5428 %{
 5429   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5430   match(RegP);
 5431   match(rRegP);
 5432 
 5433   format %{ %}
 5434   interface(REG_INTER);
 5435 %}
 5436 
 5437 operand r15_RegP()
 5438 %{
 5439   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5440   match(RegP);
 5441   match(rRegP);
 5442 
 5443   format %{ %}
 5444   interface(REG_INTER);
 5445 %}
 5446 
 5447 operand rRegL()
 5448 %{
 5449   constraint(ALLOC_IN_RC(long_reg));
 5450   match(RegL);
 5451   match(rax_RegL);
 5452   match(rdx_RegL);
 5453 
 5454   format %{ %}
 5455   interface(REG_INTER);
 5456 %}
 5457 
 5458 // Special Registers
 5459 operand no_rax_rdx_RegL()
 5460 %{
 5461   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5462   match(RegL);
 5463   match(rRegL);
 5464 
 5465   format %{ %}
 5466   interface(REG_INTER);
 5467 %}
 5468 
 5469 operand rax_RegL()
 5470 %{
 5471   constraint(ALLOC_IN_RC(long_rax_reg));
 5472   match(RegL);
 5473   match(rRegL);
 5474 
 5475   format %{ "RAX" %}
 5476   interface(REG_INTER);
 5477 %}
 5478 
 5479 operand rcx_RegL()
 5480 %{
 5481   constraint(ALLOC_IN_RC(long_rcx_reg));
 5482   match(RegL);
 5483   match(rRegL);
 5484 
 5485   format %{ %}
 5486   interface(REG_INTER);
 5487 %}
 5488 
 5489 operand rdx_RegL()
 5490 %{
 5491   constraint(ALLOC_IN_RC(long_rdx_reg));
 5492   match(RegL);
 5493   match(rRegL);
 5494 
 5495   format %{ %}
 5496   interface(REG_INTER);
 5497 %}
 5498 
 5499 operand r11_RegL()
 5500 %{
 5501   constraint(ALLOC_IN_RC(long_r11_reg));
 5502   match(RegL);
 5503   match(rRegL);
 5504 
 5505   format %{ %}
 5506   interface(REG_INTER);
 5507 %}
 5508 
 5509 operand no_rbp_r13_RegL()
 5510 %{
 5511   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5512   match(RegL);
 5513   match(rRegL);
 5514   match(rax_RegL);
 5515   match(rcx_RegL);
 5516   match(rdx_RegL);
 5517 
 5518   format %{ %}
 5519   interface(REG_INTER);
 5520 %}
 5521 
 5522 // Flags register, used as output of compare instructions
 5523 operand rFlagsReg()
 5524 %{
 5525   constraint(ALLOC_IN_RC(int_flags));
 5526   match(RegFlags);
 5527 
 5528   format %{ "RFLAGS" %}
 5529   interface(REG_INTER);
 5530 %}
 5531 
 5532 // Flags register, used as output of FLOATING POINT compare instructions
 5533 operand rFlagsRegU()
 5534 %{
 5535   constraint(ALLOC_IN_RC(int_flags));
 5536   match(RegFlags);
 5537 
 5538   format %{ "RFLAGS_U" %}
 5539   interface(REG_INTER);
 5540 %}
 5541 
 5542 operand rFlagsRegUCF() %{
 5543   constraint(ALLOC_IN_RC(int_flags));
 5544   match(RegFlags);
 5545   predicate(!UseAPX || !VM_Version::supports_avx10_2());
 5546 
 5547   format %{ "RFLAGS_U_CF" %}
 5548   interface(REG_INTER);
 5549 %}
 5550 
 5551 operand rFlagsRegUCFE() %{
 5552   constraint(ALLOC_IN_RC(int_flags));
 5553   match(RegFlags);
 5554   predicate(UseAPX && VM_Version::supports_avx10_2());
 5555 
 5556   format %{ "RFLAGS_U_CFE" %}
 5557   interface(REG_INTER);
 5558 %}
 5559 
 5560 // Float register operands
 5561 operand regF() %{
 5562    constraint(ALLOC_IN_RC(float_reg));
 5563    match(RegF);
 5564 
 5565    format %{ %}
 5566    interface(REG_INTER);
 5567 %}
 5568 
 5569 // Float register operands
 5570 operand legRegF() %{
 5571    constraint(ALLOC_IN_RC(float_reg_legacy));
 5572    match(RegF);
 5573 
 5574    format %{ %}
 5575    interface(REG_INTER);
 5576 %}
 5577 
 5578 // Float register operands
 5579 operand vlRegF() %{
 5580    constraint(ALLOC_IN_RC(float_reg_vl));
 5581    match(RegF);
 5582 
 5583    format %{ %}
 5584    interface(REG_INTER);
 5585 %}
 5586 
 5587 // Double register operands
 5588 operand regD() %{
 5589    constraint(ALLOC_IN_RC(double_reg));
 5590    match(RegD);
 5591 
 5592    format %{ %}
 5593    interface(REG_INTER);
 5594 %}
 5595 
 5596 // Double register operands
 5597 operand legRegD() %{
 5598    constraint(ALLOC_IN_RC(double_reg_legacy));
 5599    match(RegD);
 5600 
 5601    format %{ %}
 5602    interface(REG_INTER);
 5603 %}
 5604 
 5605 // Double register operands
 5606 operand vlRegD() %{
 5607    constraint(ALLOC_IN_RC(double_reg_vl));
 5608    match(RegD);
 5609 
 5610    format %{ %}
 5611    interface(REG_INTER);
 5612 %}
 5613 
 5614 //----------Memory Operands----------------------------------------------------
 5615 // Direct Memory Operand
 5616 // operand direct(immP addr)
 5617 // %{
 5618 //   match(addr);
 5619 
 5620 //   format %{ "[$addr]" %}
 5621 //   interface(MEMORY_INTER) %{
 5622 //     base(0xFFFFFFFF);
 5623 //     index(0x4);
 5624 //     scale(0x0);
 5625 //     disp($addr);
 5626 //   %}
 5627 // %}
 5628 
 5629 // Indirect Memory Operand
 5630 operand indirect(any_RegP reg)
 5631 %{
 5632   constraint(ALLOC_IN_RC(ptr_reg));
 5633   match(reg);
 5634 
 5635   format %{ "[$reg]" %}
 5636   interface(MEMORY_INTER) %{
 5637     base($reg);
 5638     index(0x4);
 5639     scale(0x0);
 5640     disp(0x0);
 5641   %}
 5642 %}
 5643 
 5644 // Indirect Memory Plus Short Offset Operand
 5645 operand indOffset8(any_RegP reg, immL8 off)
 5646 %{
 5647   constraint(ALLOC_IN_RC(ptr_reg));
 5648   match(AddP reg off);
 5649 
 5650   format %{ "[$reg + $off (8-bit)]" %}
 5651   interface(MEMORY_INTER) %{
 5652     base($reg);
 5653     index(0x4);
 5654     scale(0x0);
 5655     disp($off);
 5656   %}
 5657 %}
 5658 
 5659 // Indirect Memory Plus Long Offset Operand
 5660 operand indOffset32(any_RegP reg, immL32 off)
 5661 %{
 5662   constraint(ALLOC_IN_RC(ptr_reg));
 5663   match(AddP reg off);
 5664 
 5665   format %{ "[$reg + $off (32-bit)]" %}
 5666   interface(MEMORY_INTER) %{
 5667     base($reg);
 5668     index(0x4);
 5669     scale(0x0);
 5670     disp($off);
 5671   %}
 5672 %}
 5673 
 5674 // Indirect Memory Plus Index Register Plus Offset Operand
 5675 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5676 %{
 5677   constraint(ALLOC_IN_RC(ptr_reg));
 5678   match(AddP (AddP reg lreg) off);
 5679 
 5680   op_cost(10);
 5681   format %{"[$reg + $off + $lreg]" %}
 5682   interface(MEMORY_INTER) %{
 5683     base($reg);
 5684     index($lreg);
 5685     scale(0x0);
 5686     disp($off);
 5687   %}
 5688 %}
 5689 
 5690 // Indirect Memory Plus Index Register Plus Offset Operand
 5691 operand indIndex(any_RegP reg, rRegL lreg)
 5692 %{
 5693   constraint(ALLOC_IN_RC(ptr_reg));
 5694   match(AddP reg lreg);
 5695 
 5696   op_cost(10);
 5697   format %{"[$reg + $lreg]" %}
 5698   interface(MEMORY_INTER) %{
 5699     base($reg);
 5700     index($lreg);
 5701     scale(0x0);
 5702     disp(0x0);
 5703   %}
 5704 %}
 5705 
 5706 // Indirect Memory Times Scale Plus Index Register
 5707 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5708 %{
 5709   constraint(ALLOC_IN_RC(ptr_reg));
 5710   match(AddP reg (LShiftL lreg scale));
 5711 
 5712   op_cost(10);
 5713   format %{"[$reg + $lreg << $scale]" %}
 5714   interface(MEMORY_INTER) %{
 5715     base($reg);
 5716     index($lreg);
 5717     scale($scale);
 5718     disp(0x0);
 5719   %}
 5720 %}
 5721 
 5722 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5723 %{
 5724   constraint(ALLOC_IN_RC(ptr_reg));
 5725   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5726   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5727 
 5728   op_cost(10);
 5729   format %{"[$reg + pos $idx << $scale]" %}
 5730   interface(MEMORY_INTER) %{
 5731     base($reg);
 5732     index($idx);
 5733     scale($scale);
 5734     disp(0x0);
 5735   %}
 5736 %}
 5737 
 5738 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5739 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5740 %{
 5741   constraint(ALLOC_IN_RC(ptr_reg));
 5742   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5743 
 5744   op_cost(10);
 5745   format %{"[$reg + $off + $lreg << $scale]" %}
 5746   interface(MEMORY_INTER) %{
 5747     base($reg);
 5748     index($lreg);
 5749     scale($scale);
 5750     disp($off);
 5751   %}
 5752 %}
 5753 
 5754 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5755 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5756 %{
 5757   constraint(ALLOC_IN_RC(ptr_reg));
 5758   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5759   match(AddP (AddP reg (ConvI2L idx)) off);
 5760 
 5761   op_cost(10);
 5762   format %{"[$reg + $off + $idx]" %}
 5763   interface(MEMORY_INTER) %{
 5764     base($reg);
 5765     index($idx);
 5766     scale(0x0);
 5767     disp($off);
 5768   %}
 5769 %}
 5770 
 5771 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5772 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5773 %{
 5774   constraint(ALLOC_IN_RC(ptr_reg));
 5775   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5776   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5777 
 5778   op_cost(10);
 5779   format %{"[$reg + $off + $idx << $scale]" %}
 5780   interface(MEMORY_INTER) %{
 5781     base($reg);
 5782     index($idx);
 5783     scale($scale);
 5784     disp($off);
 5785   %}
 5786 %}
 5787 
 5788 // Indirect Narrow Oop Plus Offset Operand
 5789 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5790 // we can't free r12 even with CompressedOops::base() == nullptr.
 5791 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5792   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5793   constraint(ALLOC_IN_RC(ptr_reg));
 5794   match(AddP (DecodeN reg) off);
 5795 
 5796   op_cost(10);
 5797   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5798   interface(MEMORY_INTER) %{
 5799     base(0xc); // R12
 5800     index($reg);
 5801     scale(0x3);
 5802     disp($off);
 5803   %}
 5804 %}
 5805 
 5806 // Indirect Memory Operand
 5807 operand indirectNarrow(rRegN reg)
 5808 %{
 5809   predicate(CompressedOops::shift() == 0);
 5810   constraint(ALLOC_IN_RC(ptr_reg));
 5811   match(DecodeN reg);
 5812 
 5813   format %{ "[$reg]" %}
 5814   interface(MEMORY_INTER) %{
 5815     base($reg);
 5816     index(0x4);
 5817     scale(0x0);
 5818     disp(0x0);
 5819   %}
 5820 %}
 5821 
 5822 // Indirect Memory Plus Short Offset Operand
 5823 operand indOffset8Narrow(rRegN reg, immL8 off)
 5824 %{
 5825   predicate(CompressedOops::shift() == 0);
 5826   constraint(ALLOC_IN_RC(ptr_reg));
 5827   match(AddP (DecodeN reg) off);
 5828 
 5829   format %{ "[$reg + $off (8-bit)]" %}
 5830   interface(MEMORY_INTER) %{
 5831     base($reg);
 5832     index(0x4);
 5833     scale(0x0);
 5834     disp($off);
 5835   %}
 5836 %}
 5837 
 5838 // Indirect Memory Plus Long Offset Operand
 5839 operand indOffset32Narrow(rRegN reg, immL32 off)
 5840 %{
 5841   predicate(CompressedOops::shift() == 0);
 5842   constraint(ALLOC_IN_RC(ptr_reg));
 5843   match(AddP (DecodeN reg) off);
 5844 
 5845   format %{ "[$reg + $off (32-bit)]" %}
 5846   interface(MEMORY_INTER) %{
 5847     base($reg);
 5848     index(0x4);
 5849     scale(0x0);
 5850     disp($off);
 5851   %}
 5852 %}
 5853 
 5854 // Indirect Memory Plus Index Register Plus Offset Operand
 5855 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5856 %{
 5857   predicate(CompressedOops::shift() == 0);
 5858   constraint(ALLOC_IN_RC(ptr_reg));
 5859   match(AddP (AddP (DecodeN reg) lreg) off);
 5860 
 5861   op_cost(10);
 5862   format %{"[$reg + $off + $lreg]" %}
 5863   interface(MEMORY_INTER) %{
 5864     base($reg);
 5865     index($lreg);
 5866     scale(0x0);
 5867     disp($off);
 5868   %}
 5869 %}
 5870 
 5871 // Indirect Memory Plus Index Register Plus Offset Operand
 5872 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5873 %{
 5874   predicate(CompressedOops::shift() == 0);
 5875   constraint(ALLOC_IN_RC(ptr_reg));
 5876   match(AddP (DecodeN reg) lreg);
 5877 
 5878   op_cost(10);
 5879   format %{"[$reg + $lreg]" %}
 5880   interface(MEMORY_INTER) %{
 5881     base($reg);
 5882     index($lreg);
 5883     scale(0x0);
 5884     disp(0x0);
 5885   %}
 5886 %}
 5887 
 5888 // Indirect Memory Times Scale Plus Index Register
 5889 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5890 %{
 5891   predicate(CompressedOops::shift() == 0);
 5892   constraint(ALLOC_IN_RC(ptr_reg));
 5893   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5894 
 5895   op_cost(10);
 5896   format %{"[$reg + $lreg << $scale]" %}
 5897   interface(MEMORY_INTER) %{
 5898     base($reg);
 5899     index($lreg);
 5900     scale($scale);
 5901     disp(0x0);
 5902   %}
 5903 %}
 5904 
 5905 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5906 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5907 %{
 5908   predicate(CompressedOops::shift() == 0);
 5909   constraint(ALLOC_IN_RC(ptr_reg));
 5910   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5911 
 5912   op_cost(10);
 5913   format %{"[$reg + $off + $lreg << $scale]" %}
 5914   interface(MEMORY_INTER) %{
 5915     base($reg);
 5916     index($lreg);
 5917     scale($scale);
 5918     disp($off);
 5919   %}
 5920 %}
 5921 
 5922 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5923 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5924 %{
 5925   constraint(ALLOC_IN_RC(ptr_reg));
 5926   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5927   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5928 
 5929   op_cost(10);
 5930   format %{"[$reg + $off + $idx]" %}
 5931   interface(MEMORY_INTER) %{
 5932     base($reg);
 5933     index($idx);
 5934     scale(0x0);
 5935     disp($off);
 5936   %}
 5937 %}
 5938 
 5939 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5940 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 5941 %{
 5942   constraint(ALLOC_IN_RC(ptr_reg));
 5943   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5944   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 5945 
 5946   op_cost(10);
 5947   format %{"[$reg + $off + $idx << $scale]" %}
 5948   interface(MEMORY_INTER) %{
 5949     base($reg);
 5950     index($idx);
 5951     scale($scale);
 5952     disp($off);
 5953   %}
 5954 %}
 5955 
 5956 //----------Special Memory Operands--------------------------------------------
 5957 // Stack Slot Operand - This operand is used for loading and storing temporary
 5958 //                      values on the stack where a match requires a value to
 5959 //                      flow through memory.
 5960 operand stackSlotP(sRegP reg)
 5961 %{
 5962   constraint(ALLOC_IN_RC(stack_slots));
 5963   // No match rule because this operand is only generated in matching
 5964 
 5965   format %{ "[$reg]" %}
 5966   interface(MEMORY_INTER) %{
 5967     base(0x4);   // RSP
 5968     index(0x4);  // No Index
 5969     scale(0x0);  // No Scale
 5970     disp($reg);  // Stack Offset
 5971   %}
 5972 %}
 5973 
 5974 operand stackSlotI(sRegI reg)
 5975 %{
 5976   constraint(ALLOC_IN_RC(stack_slots));
 5977   // No match rule because this operand is only generated in matching
 5978 
 5979   format %{ "[$reg]" %}
 5980   interface(MEMORY_INTER) %{
 5981     base(0x4);   // RSP
 5982     index(0x4);  // No Index
 5983     scale(0x0);  // No Scale
 5984     disp($reg);  // Stack Offset
 5985   %}
 5986 %}
 5987 
 5988 operand stackSlotF(sRegF reg)
 5989 %{
 5990   constraint(ALLOC_IN_RC(stack_slots));
 5991   // No match rule because this operand is only generated in matching
 5992 
 5993   format %{ "[$reg]" %}
 5994   interface(MEMORY_INTER) %{
 5995     base(0x4);   // RSP
 5996     index(0x4);  // No Index
 5997     scale(0x0);  // No Scale
 5998     disp($reg);  // Stack Offset
 5999   %}
 6000 %}
 6001 
 6002 operand stackSlotD(sRegD reg)
 6003 %{
 6004   constraint(ALLOC_IN_RC(stack_slots));
 6005   // No match rule because this operand is only generated in matching
 6006 
 6007   format %{ "[$reg]" %}
 6008   interface(MEMORY_INTER) %{
 6009     base(0x4);   // RSP
 6010     index(0x4);  // No Index
 6011     scale(0x0);  // No Scale
 6012     disp($reg);  // Stack Offset
 6013   %}
 6014 %}
 6015 operand stackSlotL(sRegL reg)
 6016 %{
 6017   constraint(ALLOC_IN_RC(stack_slots));
 6018   // No match rule because this operand is only generated in matching
 6019 
 6020   format %{ "[$reg]" %}
 6021   interface(MEMORY_INTER) %{
 6022     base(0x4);   // RSP
 6023     index(0x4);  // No Index
 6024     scale(0x0);  // No Scale
 6025     disp($reg);  // Stack Offset
 6026   %}
 6027 %}
 6028 
 6029 //----------Conditional Branch Operands----------------------------------------
 6030 // Comparison Op  - This is the operation of the comparison, and is limited to
 6031 //                  the following set of codes:
 6032 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 6033 //
 6034 // Other attributes of the comparison, such as unsignedness, are specified
 6035 // by the comparison instruction that sets a condition code flags register.
 6036 // That result is represented by a flags operand whose subtype is appropriate
 6037 // to the unsignedness (etc.) of the comparison.
 6038 //
 6039 // Later, the instruction which matches both the Comparison Op (a Bool) and
 6040 // the flags (produced by the Cmp) specifies the coding of the comparison op
 6041 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 6042 
 6043 // Comparison Code
 6044 operand cmpOp()
 6045 %{
 6046   match(Bool);
 6047 
 6048   format %{ "" %}
 6049   interface(COND_INTER) %{
 6050     equal(0x4, "e");
 6051     not_equal(0x5, "ne");
 6052     less(0xc, "l");
 6053     greater_equal(0xd, "ge");
 6054     less_equal(0xe, "le");
 6055     greater(0xf, "g");
 6056     overflow(0x0, "o");
 6057     no_overflow(0x1, "no");
 6058   %}
 6059 %}
 6060 
 6061 // Comparison Code, unsigned compare.  Used by FP also, with
 6062 // C2 (unordered) turned into GT or LT already.  The other bits
 6063 // C0 and C3 are turned into Carry & Zero flags.
 6064 operand cmpOpU()
 6065 %{
 6066   match(Bool);
 6067 
 6068   format %{ "" %}
 6069   interface(COND_INTER) %{
 6070     equal(0x4, "e");
 6071     not_equal(0x5, "ne");
 6072     less(0x2, "b");
 6073     greater_equal(0x3, "ae");
 6074     less_equal(0x6, "be");
 6075     greater(0x7, "a");
 6076     overflow(0x0, "o");
 6077     no_overflow(0x1, "no");
 6078   %}
 6079 %}
 6080 
 6081 
 6082 // Floating comparisons that don't require any fixup for the unordered case,
 6083 // If both inputs of the comparison are the same, ZF is always set so we
 6084 // don't need to use cmpOpUCF2 for eq/ne
 6085 operand cmpOpUCF() %{
 6086   match(Bool);
 6087   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6088             (n->as_Bool()->_test._test == BoolTest::lt ||
 6089              n->as_Bool()->_test._test == BoolTest::ge ||
 6090              n->as_Bool()->_test._test == BoolTest::le ||
 6091              n->as_Bool()->_test._test == BoolTest::gt ||
 6092              n->in(1)->in(1) == n->in(1)->in(2)));
 6093   format %{ "" %}
 6094   interface(COND_INTER) %{
 6095     equal(0xb, "np");
 6096     not_equal(0xa, "p");
 6097     less(0x2, "b");
 6098     greater_equal(0x3, "ae");
 6099     less_equal(0x6, "be");
 6100     greater(0x7, "a");
 6101     overflow(0x0, "o");
 6102     no_overflow(0x1, "no");
 6103   %}
 6104 %}
 6105 
 6106 
 6107 // Floating comparisons that can be fixed up with extra conditional jumps
 6108 operand cmpOpUCF2() %{
 6109   match(Bool);
 6110   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6111             (n->as_Bool()->_test._test == BoolTest::ne ||
 6112              n->as_Bool()->_test._test == BoolTest::eq) &&
 6113             n->in(1)->in(1) != n->in(1)->in(2));
 6114   format %{ "" %}
 6115   interface(COND_INTER) %{
 6116     equal(0x4, "e");
 6117     not_equal(0x5, "ne");
 6118     less(0x2, "b");
 6119     greater_equal(0x3, "ae");
 6120     less_equal(0x6, "be");
 6121     greater(0x7, "a");
 6122     overflow(0x0, "o");
 6123     no_overflow(0x1, "no");
 6124   %}
 6125 %}
 6126 
 6127 
 6128 // Floating point comparisons that set condition flags to test more directly,
 6129 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
 6130 // are used for L (<) and LE (<=) conditions. It's important to convert these
 6131 // latter conditions to ones that use unsigned tests before passing into an
 6132 // instruction because the preceding comparison might be based on a three way
 6133 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
 6134 operand cmpOpUCFE()
 6135 %{
 6136   match(Bool);
 6137   predicate((UseAPX && VM_Version::supports_avx10_2()) &&
 6138             (n->as_Bool()->_test._test == BoolTest::ne ||
 6139              n->as_Bool()->_test._test == BoolTest::eq ||
 6140              n->as_Bool()->_test._test == BoolTest::lt ||
 6141              n->as_Bool()->_test._test == BoolTest::ge ||
 6142              n->as_Bool()->_test._test == BoolTest::le ||
 6143              n->as_Bool()->_test._test == BoolTest::gt));
 6144 
 6145   format %{ "" %}
 6146   interface(COND_INTER) %{
 6147     equal(0x4, "e");
 6148     not_equal(0x5, "ne");
 6149     less(0x2, "b");
 6150     greater_equal(0x3, "ae");
 6151     less_equal(0x6, "be");
 6152     greater(0x7, "a");
 6153     overflow(0x0, "o");
 6154     no_overflow(0x1, "no");
 6155   %}
 6156 %}
 6157 
 6158 // Operands for bound floating pointer register arguments
 6159 operand rxmm0() %{
 6160   constraint(ALLOC_IN_RC(xmm0_reg));
 6161   match(VecX);
 6162   format%{%}
 6163   interface(REG_INTER);
 6164 %}
 6165 
 6166 // Vectors
 6167 
 6168 // Dummy generic vector class. Should be used for all vector operands.
 6169 // Replaced with vec[SDXYZ] during post-selection pass.
 6170 operand vec() %{
 6171   constraint(ALLOC_IN_RC(dynamic));
 6172   match(VecX);
 6173   match(VecY);
 6174   match(VecZ);
 6175   match(VecS);
 6176   match(VecD);
 6177 
 6178   format %{ %}
 6179   interface(REG_INTER);
 6180 %}
 6181 
 6182 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6183 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6184 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6185 // runtime code generation via reg_class_dynamic.
 6186 operand legVec() %{
 6187   constraint(ALLOC_IN_RC(dynamic));
 6188   match(VecX);
 6189   match(VecY);
 6190   match(VecZ);
 6191   match(VecS);
 6192   match(VecD);
 6193 
 6194   format %{ %}
 6195   interface(REG_INTER);
 6196 %}
 6197 
 6198 // Replaces vec during post-selection cleanup. See above.
 6199 operand vecS() %{
 6200   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6201   match(VecS);
 6202 
 6203   format %{ %}
 6204   interface(REG_INTER);
 6205 %}
 6206 
 6207 // Replaces legVec during post-selection cleanup. See above.
 6208 operand legVecS() %{
 6209   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6210   match(VecS);
 6211 
 6212   format %{ %}
 6213   interface(REG_INTER);
 6214 %}
 6215 
 6216 // Replaces vec during post-selection cleanup. See above.
 6217 operand vecD() %{
 6218   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6219   match(VecD);
 6220 
 6221   format %{ %}
 6222   interface(REG_INTER);
 6223 %}
 6224 
 6225 // Replaces legVec during post-selection cleanup. See above.
 6226 operand legVecD() %{
 6227   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6228   match(VecD);
 6229 
 6230   format %{ %}
 6231   interface(REG_INTER);
 6232 %}
 6233 
 6234 // Replaces vec during post-selection cleanup. See above.
 6235 operand vecX() %{
 6236   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6237   match(VecX);
 6238 
 6239   format %{ %}
 6240   interface(REG_INTER);
 6241 %}
 6242 
 6243 // Replaces legVec during post-selection cleanup. See above.
 6244 operand legVecX() %{
 6245   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6246   match(VecX);
 6247 
 6248   format %{ %}
 6249   interface(REG_INTER);
 6250 %}
 6251 
 6252 // Replaces vec during post-selection cleanup. See above.
 6253 operand vecY() %{
 6254   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6255   match(VecY);
 6256 
 6257   format %{ %}
 6258   interface(REG_INTER);
 6259 %}
 6260 
 6261 // Replaces legVec during post-selection cleanup. See above.
 6262 operand legVecY() %{
 6263   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6264   match(VecY);
 6265 
 6266   format %{ %}
 6267   interface(REG_INTER);
 6268 %}
 6269 
 6270 // Replaces vec during post-selection cleanup. See above.
 6271 operand vecZ() %{
 6272   constraint(ALLOC_IN_RC(vectorz_reg));
 6273   match(VecZ);
 6274 
 6275   format %{ %}
 6276   interface(REG_INTER);
 6277 %}
 6278 
 6279 // Replaces legVec during post-selection cleanup. See above.
 6280 operand legVecZ() %{
 6281   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6282   match(VecZ);
 6283 
 6284   format %{ %}
 6285   interface(REG_INTER);
 6286 %}
 6287 
 6288 //----------OPERAND CLASSES----------------------------------------------------
 6289 // Operand Classes are groups of operands that are used as to simplify
 6290 // instruction definitions by not requiring the AD writer to specify separate
 6291 // instructions for every form of operand when the instruction accepts
 6292 // multiple operand types with the same basic encoding and format.  The classic
 6293 // case of this is memory operands.
 6294 
 6295 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6296                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6297                indCompressedOopOffset,
 6298                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6299                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6300                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6301 
 6302 //----------PIPELINE-----------------------------------------------------------
 6303 // Rules which define the behavior of the target architectures pipeline.
 6304 pipeline %{
 6305 
 6306 //----------ATTRIBUTES---------------------------------------------------------
 6307 attributes %{
 6308   variable_size_instructions;        // Fixed size instructions
 6309   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6310   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6311   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6312   instruction_fetch_units = 1;       // of 16 bytes
 6313 %}
 6314 
 6315 //----------RESOURCES----------------------------------------------------------
 6316 // Resources are the functional units available to the machine
 6317 
 6318 // Generic P2/P3 pipeline
 6319 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6320 // 3 instructions decoded per cycle.
 6321 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6322 // 3 ALU op, only ALU0 handles mul instructions.
 6323 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6324            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6325            BR, FPU,
 6326            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6327 
 6328 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6329 // Pipeline Description specifies the stages in the machine's pipeline
 6330 
 6331 // Generic P2/P3 pipeline
 6332 pipe_desc(S0, S1, S2, S3, S4, S5);
 6333 
 6334 //----------PIPELINE CLASSES---------------------------------------------------
 6335 // Pipeline Classes describe the stages in which input and output are
 6336 // referenced by the hardware pipeline.
 6337 
 6338 // Naming convention: ialu or fpu
 6339 // Then: _reg
 6340 // Then: _reg if there is a 2nd register
 6341 // Then: _long if it's a pair of instructions implementing a long
 6342 // Then: _fat if it requires the big decoder
 6343 //   Or: _mem if it requires the big decoder and a memory unit.
 6344 
 6345 // Integer ALU reg operation
 6346 pipe_class ialu_reg(rRegI dst)
 6347 %{
 6348     single_instruction;
 6349     dst    : S4(write);
 6350     dst    : S3(read);
 6351     DECODE : S0;        // any decoder
 6352     ALU    : S3;        // any alu
 6353 %}
 6354 
 6355 // Long ALU reg operation
 6356 pipe_class ialu_reg_long(rRegL dst)
 6357 %{
 6358     instruction_count(2);
 6359     dst    : S4(write);
 6360     dst    : S3(read);
 6361     DECODE : S0(2);     // any 2 decoders
 6362     ALU    : S3(2);     // both alus
 6363 %}
 6364 
 6365 // Integer ALU reg operation using big decoder
 6366 pipe_class ialu_reg_fat(rRegI dst)
 6367 %{
 6368     single_instruction;
 6369     dst    : S4(write);
 6370     dst    : S3(read);
 6371     D0     : S0;        // big decoder only
 6372     ALU    : S3;        // any alu
 6373 %}
 6374 
 6375 // Integer ALU reg-reg operation
 6376 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6377 %{
 6378     single_instruction;
 6379     dst    : S4(write);
 6380     src    : S3(read);
 6381     DECODE : S0;        // any decoder
 6382     ALU    : S3;        // any alu
 6383 %}
 6384 
 6385 // Integer ALU reg-reg operation
 6386 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6387 %{
 6388     single_instruction;
 6389     dst    : S4(write);
 6390     src    : S3(read);
 6391     D0     : S0;        // big decoder only
 6392     ALU    : S3;        // any alu
 6393 %}
 6394 
 6395 // Integer ALU reg-mem operation
 6396 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6397 %{
 6398     single_instruction;
 6399     dst    : S5(write);
 6400     mem    : S3(read);
 6401     D0     : S0;        // big decoder only
 6402     ALU    : S4;        // any alu
 6403     MEM    : S3;        // any mem
 6404 %}
 6405 
 6406 // Integer mem operation (prefetch)
 6407 pipe_class ialu_mem(memory mem)
 6408 %{
 6409     single_instruction;
 6410     mem    : S3(read);
 6411     D0     : S0;        // big decoder only
 6412     MEM    : S3;        // any mem
 6413 %}
 6414 
 6415 // Integer Store to Memory
 6416 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6417 %{
 6418     single_instruction;
 6419     mem    : S3(read);
 6420     src    : S5(read);
 6421     D0     : S0;        // big decoder only
 6422     ALU    : S4;        // any alu
 6423     MEM    : S3;
 6424 %}
 6425 
 6426 // // Long Store to Memory
 6427 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6428 // %{
 6429 //     instruction_count(2);
 6430 //     mem    : S3(read);
 6431 //     src    : S5(read);
 6432 //     D0     : S0(2);          // big decoder only; twice
 6433 //     ALU    : S4(2);     // any 2 alus
 6434 //     MEM    : S3(2);  // Both mems
 6435 // %}
 6436 
 6437 // Integer Store to Memory
 6438 pipe_class ialu_mem_imm(memory mem)
 6439 %{
 6440     single_instruction;
 6441     mem    : S3(read);
 6442     D0     : S0;        // big decoder only
 6443     ALU    : S4;        // any alu
 6444     MEM    : S3;
 6445 %}
 6446 
 6447 // Integer ALU0 reg-reg operation
 6448 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6449 %{
 6450     single_instruction;
 6451     dst    : S4(write);
 6452     src    : S3(read);
 6453     D0     : S0;        // Big decoder only
 6454     ALU0   : S3;        // only alu0
 6455 %}
 6456 
 6457 // Integer ALU0 reg-mem operation
 6458 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6459 %{
 6460     single_instruction;
 6461     dst    : S5(write);
 6462     mem    : S3(read);
 6463     D0     : S0;        // big decoder only
 6464     ALU0   : S4;        // ALU0 only
 6465     MEM    : S3;        // any mem
 6466 %}
 6467 
 6468 // Integer ALU reg-reg operation
 6469 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6470 %{
 6471     single_instruction;
 6472     cr     : S4(write);
 6473     src1   : S3(read);
 6474     src2   : S3(read);
 6475     DECODE : S0;        // any decoder
 6476     ALU    : S3;        // any alu
 6477 %}
 6478 
 6479 // Integer ALU reg-imm operation
 6480 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6481 %{
 6482     single_instruction;
 6483     cr     : S4(write);
 6484     src1   : S3(read);
 6485     DECODE : S0;        // any decoder
 6486     ALU    : S3;        // any alu
 6487 %}
 6488 
 6489 // Integer ALU reg-mem operation
 6490 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6491 %{
 6492     single_instruction;
 6493     cr     : S4(write);
 6494     src1   : S3(read);
 6495     src2   : S3(read);
 6496     D0     : S0;        // big decoder only
 6497     ALU    : S4;        // any alu
 6498     MEM    : S3;
 6499 %}
 6500 
 6501 // Conditional move reg-reg
 6502 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6503 %{
 6504     instruction_count(4);
 6505     y      : S4(read);
 6506     q      : S3(read);
 6507     p      : S3(read);
 6508     DECODE : S0(4);     // any decoder
 6509 %}
 6510 
 6511 // Conditional move reg-reg
 6512 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6513 %{
 6514     single_instruction;
 6515     dst    : S4(write);
 6516     src    : S3(read);
 6517     cr     : S3(read);
 6518     DECODE : S0;        // any decoder
 6519 %}
 6520 
 6521 // Conditional move reg-mem
 6522 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6523 %{
 6524     single_instruction;
 6525     dst    : S4(write);
 6526     src    : S3(read);
 6527     cr     : S3(read);
 6528     DECODE : S0;        // any decoder
 6529     MEM    : S3;
 6530 %}
 6531 
 6532 // Conditional move reg-reg long
 6533 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6534 %{
 6535     single_instruction;
 6536     dst    : S4(write);
 6537     src    : S3(read);
 6538     cr     : S3(read);
 6539     DECODE : S0(2);     // any 2 decoders
 6540 %}
 6541 
 6542 // Float reg-reg operation
 6543 pipe_class fpu_reg(regD dst)
 6544 %{
 6545     instruction_count(2);
 6546     dst    : S3(read);
 6547     DECODE : S0(2);     // any 2 decoders
 6548     FPU    : S3;
 6549 %}
 6550 
 6551 // Float reg-reg operation
 6552 pipe_class fpu_reg_reg(regD dst, regD src)
 6553 %{
 6554     instruction_count(2);
 6555     dst    : S4(write);
 6556     src    : S3(read);
 6557     DECODE : S0(2);     // any 2 decoders
 6558     FPU    : S3;
 6559 %}
 6560 
 6561 // Float reg-reg operation
 6562 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6563 %{
 6564     instruction_count(3);
 6565     dst    : S4(write);
 6566     src1   : S3(read);
 6567     src2   : S3(read);
 6568     DECODE : S0(3);     // any 3 decoders
 6569     FPU    : S3(2);
 6570 %}
 6571 
 6572 // Float reg-reg operation
 6573 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6574 %{
 6575     instruction_count(4);
 6576     dst    : S4(write);
 6577     src1   : S3(read);
 6578     src2   : S3(read);
 6579     src3   : S3(read);
 6580     DECODE : S0(4);     // any 3 decoders
 6581     FPU    : S3(2);
 6582 %}
 6583 
 6584 // Float reg-reg operation
 6585 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6586 %{
 6587     instruction_count(4);
 6588     dst    : S4(write);
 6589     src1   : S3(read);
 6590     src2   : S3(read);
 6591     src3   : S3(read);
 6592     DECODE : S1(3);     // any 3 decoders
 6593     D0     : S0;        // Big decoder only
 6594     FPU    : S3(2);
 6595     MEM    : S3;
 6596 %}
 6597 
 6598 // Float reg-mem operation
 6599 pipe_class fpu_reg_mem(regD dst, memory mem)
 6600 %{
 6601     instruction_count(2);
 6602     dst    : S5(write);
 6603     mem    : S3(read);
 6604     D0     : S0;        // big decoder only
 6605     DECODE : S1;        // any decoder for FPU POP
 6606     FPU    : S4;
 6607     MEM    : S3;        // any mem
 6608 %}
 6609 
 6610 // Float reg-mem operation
 6611 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6612 %{
 6613     instruction_count(3);
 6614     dst    : S5(write);
 6615     src1   : S3(read);
 6616     mem    : S3(read);
 6617     D0     : S0;        // big decoder only
 6618     DECODE : S1(2);     // any decoder for FPU POP
 6619     FPU    : S4;
 6620     MEM    : S3;        // any mem
 6621 %}
 6622 
 6623 // Float mem-reg operation
 6624 pipe_class fpu_mem_reg(memory mem, regD src)
 6625 %{
 6626     instruction_count(2);
 6627     src    : S5(read);
 6628     mem    : S3(read);
 6629     DECODE : S0;        // any decoder for FPU PUSH
 6630     D0     : S1;        // big decoder only
 6631     FPU    : S4;
 6632     MEM    : S3;        // any mem
 6633 %}
 6634 
 6635 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6636 %{
 6637     instruction_count(3);
 6638     src1   : S3(read);
 6639     src2   : S3(read);
 6640     mem    : S3(read);
 6641     DECODE : S0(2);     // any decoder for FPU PUSH
 6642     D0     : S1;        // big decoder only
 6643     FPU    : S4;
 6644     MEM    : S3;        // any mem
 6645 %}
 6646 
 6647 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6648 %{
 6649     instruction_count(3);
 6650     src1   : S3(read);
 6651     src2   : S3(read);
 6652     mem    : S4(read);
 6653     DECODE : S0;        // any decoder for FPU PUSH
 6654     D0     : S0(2);     // big decoder only
 6655     FPU    : S4;
 6656     MEM    : S3(2);     // any mem
 6657 %}
 6658 
 6659 pipe_class fpu_mem_mem(memory dst, memory src1)
 6660 %{
 6661     instruction_count(2);
 6662     src1   : S3(read);
 6663     dst    : S4(read);
 6664     D0     : S0(2);     // big decoder only
 6665     MEM    : S3(2);     // any mem
 6666 %}
 6667 
 6668 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6669 %{
 6670     instruction_count(3);
 6671     src1   : S3(read);
 6672     src2   : S3(read);
 6673     dst    : S4(read);
 6674     D0     : S0(3);     // big decoder only
 6675     FPU    : S4;
 6676     MEM    : S3(3);     // any mem
 6677 %}
 6678 
 6679 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6680 %{
 6681     instruction_count(3);
 6682     src1   : S4(read);
 6683     mem    : S4(read);
 6684     DECODE : S0;        // any decoder for FPU PUSH
 6685     D0     : S0(2);     // big decoder only
 6686     FPU    : S4;
 6687     MEM    : S3(2);     // any mem
 6688 %}
 6689 
 6690 // Float load constant
 6691 pipe_class fpu_reg_con(regD dst)
 6692 %{
 6693     instruction_count(2);
 6694     dst    : S5(write);
 6695     D0     : S0;        // big decoder only for the load
 6696     DECODE : S1;        // any decoder for FPU POP
 6697     FPU    : S4;
 6698     MEM    : S3;        // any mem
 6699 %}
 6700 
 6701 // Float load constant
 6702 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6703 %{
 6704     instruction_count(3);
 6705     dst    : S5(write);
 6706     src    : S3(read);
 6707     D0     : S0;        // big decoder only for the load
 6708     DECODE : S1(2);     // any decoder for FPU POP
 6709     FPU    : S4;
 6710     MEM    : S3;        // any mem
 6711 %}
 6712 
 6713 // UnConditional branch
 6714 pipe_class pipe_jmp(label labl)
 6715 %{
 6716     single_instruction;
 6717     BR   : S3;
 6718 %}
 6719 
 6720 // Conditional branch
 6721 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6722 %{
 6723     single_instruction;
 6724     cr    : S1(read);
 6725     BR    : S3;
 6726 %}
 6727 
 6728 // Allocation idiom
 6729 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6730 %{
 6731     instruction_count(1); force_serialization;
 6732     fixed_latency(6);
 6733     heap_ptr : S3(read);
 6734     DECODE   : S0(3);
 6735     D0       : S2;
 6736     MEM      : S3;
 6737     ALU      : S3(2);
 6738     dst      : S5(write);
 6739     BR       : S5;
 6740 %}
 6741 
 6742 // Generic big/slow expanded idiom
 6743 pipe_class pipe_slow()
 6744 %{
 6745     instruction_count(10); multiple_bundles; force_serialization;
 6746     fixed_latency(100);
 6747     D0  : S0(2);
 6748     MEM : S3(2);
 6749 %}
 6750 
 6751 // The real do-nothing guy
 6752 pipe_class empty()
 6753 %{
 6754     instruction_count(0);
 6755 %}
 6756 
 6757 // Define the class for the Nop node
 6758 define
 6759 %{
 6760    MachNop = empty;
 6761 %}
 6762 
 6763 %}
 6764 
 6765 //----------INSTRUCTIONS-------------------------------------------------------
 6766 //
 6767 // match      -- States which machine-independent subtree may be replaced
 6768 //               by this instruction.
 6769 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6770 //               selection to identify a minimum cost tree of machine
 6771 //               instructions that matches a tree of machine-independent
 6772 //               instructions.
 6773 // format     -- A string providing the disassembly for this instruction.
 6774 //               The value of an instruction's operand may be inserted
 6775 //               by referring to it with a '$' prefix.
 6776 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6777 //               to within an encode class as $primary, $secondary, and $tertiary
 6778 //               rrspectively.  The primary opcode is commonly used to
 6779 //               indicate the type of machine instruction, while secondary
 6780 //               and tertiary are often used for prefix options or addressing
 6781 //               modes.
 6782 // ins_encode -- A list of encode classes with parameters. The encode class
 6783 //               name must have been defined in an 'enc_class' specification
 6784 //               in the encode section of the architecture description.
 6785 
 6786 // ============================================================================
 6787 
 6788 instruct ShouldNotReachHere() %{
 6789   match(Halt);
 6790   format %{ "stop\t# ShouldNotReachHere" %}
 6791   ins_encode %{
 6792     if (is_reachable()) {
 6793       const char* str = __ code_string(_halt_reason);
 6794       __ stop(str);
 6795     }
 6796   %}
 6797   ins_pipe(pipe_slow);
 6798 %}
 6799 
 6800 // ============================================================================
 6801 
 6802 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6803 // Load Float
 6804 instruct MoveF2VL(vlRegF dst, regF src) %{
 6805   match(Set dst src);
 6806   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6807   ins_encode %{
 6808     ShouldNotReachHere();
 6809   %}
 6810   ins_pipe( fpu_reg_reg );
 6811 %}
 6812 
 6813 // Load Float
 6814 instruct MoveF2LEG(legRegF dst, regF src) %{
 6815   match(Set dst src);
 6816   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6817   ins_encode %{
 6818     ShouldNotReachHere();
 6819   %}
 6820   ins_pipe( fpu_reg_reg );
 6821 %}
 6822 
 6823 // Load Float
 6824 instruct MoveVL2F(regF dst, vlRegF src) %{
 6825   match(Set dst src);
 6826   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6827   ins_encode %{
 6828     ShouldNotReachHere();
 6829   %}
 6830   ins_pipe( fpu_reg_reg );
 6831 %}
 6832 
 6833 // Load Float
 6834 instruct MoveLEG2F(regF dst, legRegF src) %{
 6835   match(Set dst src);
 6836   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6837   ins_encode %{
 6838     ShouldNotReachHere();
 6839   %}
 6840   ins_pipe( fpu_reg_reg );
 6841 %}
 6842 
 6843 // Load Double
 6844 instruct MoveD2VL(vlRegD dst, regD src) %{
 6845   match(Set dst src);
 6846   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6847   ins_encode %{
 6848     ShouldNotReachHere();
 6849   %}
 6850   ins_pipe( fpu_reg_reg );
 6851 %}
 6852 
 6853 // Load Double
 6854 instruct MoveD2LEG(legRegD dst, regD src) %{
 6855   match(Set dst src);
 6856   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6857   ins_encode %{
 6858     ShouldNotReachHere();
 6859   %}
 6860   ins_pipe( fpu_reg_reg );
 6861 %}
 6862 
 6863 // Load Double
 6864 instruct MoveVL2D(regD dst, vlRegD src) %{
 6865   match(Set dst src);
 6866   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6867   ins_encode %{
 6868     ShouldNotReachHere();
 6869   %}
 6870   ins_pipe( fpu_reg_reg );
 6871 %}
 6872 
 6873 // Load Double
 6874 instruct MoveLEG2D(regD dst, legRegD src) %{
 6875   match(Set dst src);
 6876   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6877   ins_encode %{
 6878     ShouldNotReachHere();
 6879   %}
 6880   ins_pipe( fpu_reg_reg );
 6881 %}
 6882 
 6883 //----------Load/Store/Move Instructions---------------------------------------
 6884 //----------Load Instructions--------------------------------------------------
 6885 
 6886 // Load Byte (8 bit signed)
 6887 instruct loadB(rRegI dst, memory mem)
 6888 %{
 6889   match(Set dst (LoadB mem));
 6890 
 6891   ins_cost(125);
 6892   format %{ "movsbl  $dst, $mem\t# byte" %}
 6893 
 6894   ins_encode %{
 6895     __ movsbl($dst$$Register, $mem$$Address);
 6896   %}
 6897 
 6898   ins_pipe(ialu_reg_mem);
 6899 %}
 6900 
 6901 // Load Byte (8 bit signed) into Long Register
 6902 instruct loadB2L(rRegL dst, memory mem)
 6903 %{
 6904   match(Set dst (ConvI2L (LoadB mem)));
 6905 
 6906   ins_cost(125);
 6907   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6908 
 6909   ins_encode %{
 6910     __ movsbq($dst$$Register, $mem$$Address);
 6911   %}
 6912 
 6913   ins_pipe(ialu_reg_mem);
 6914 %}
 6915 
 6916 // Load Unsigned Byte (8 bit UNsigned)
 6917 instruct loadUB(rRegI dst, memory mem)
 6918 %{
 6919   match(Set dst (LoadUB mem));
 6920 
 6921   ins_cost(125);
 6922   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6923 
 6924   ins_encode %{
 6925     __ movzbl($dst$$Register, $mem$$Address);
 6926   %}
 6927 
 6928   ins_pipe(ialu_reg_mem);
 6929 %}
 6930 
 6931 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6932 instruct loadUB2L(rRegL dst, memory mem)
 6933 %{
 6934   match(Set dst (ConvI2L (LoadUB mem)));
 6935 
 6936   ins_cost(125);
 6937   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 6938 
 6939   ins_encode %{
 6940     __ movzbq($dst$$Register, $mem$$Address);
 6941   %}
 6942 
 6943   ins_pipe(ialu_reg_mem);
 6944 %}
 6945 
 6946 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 6947 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 6948   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 6949   effect(KILL cr);
 6950 
 6951   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 6952             "andl    $dst, right_n_bits($mask, 8)" %}
 6953   ins_encode %{
 6954     Register Rdst = $dst$$Register;
 6955     __ movzbq(Rdst, $mem$$Address);
 6956     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 6957   %}
 6958   ins_pipe(ialu_reg_mem);
 6959 %}
 6960 
 6961 // Load Short (16 bit signed)
 6962 instruct loadS(rRegI dst, memory mem)
 6963 %{
 6964   match(Set dst (LoadS mem));
 6965 
 6966   ins_cost(125);
 6967   format %{ "movswl $dst, $mem\t# short" %}
 6968 
 6969   ins_encode %{
 6970     __ movswl($dst$$Register, $mem$$Address);
 6971   %}
 6972 
 6973   ins_pipe(ialu_reg_mem);
 6974 %}
 6975 
 6976 // Load Short (16 bit signed) to Byte (8 bit signed)
 6977 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6978   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 6979 
 6980   ins_cost(125);
 6981   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 6982   ins_encode %{
 6983     __ movsbl($dst$$Register, $mem$$Address);
 6984   %}
 6985   ins_pipe(ialu_reg_mem);
 6986 %}
 6987 
 6988 // Load Short (16 bit signed) into Long Register
 6989 instruct loadS2L(rRegL dst, memory mem)
 6990 %{
 6991   match(Set dst (ConvI2L (LoadS mem)));
 6992 
 6993   ins_cost(125);
 6994   format %{ "movswq $dst, $mem\t# short -> long" %}
 6995 
 6996   ins_encode %{
 6997     __ movswq($dst$$Register, $mem$$Address);
 6998   %}
 6999 
 7000   ins_pipe(ialu_reg_mem);
 7001 %}
 7002 
 7003 // Load Unsigned Short/Char (16 bit UNsigned)
 7004 instruct loadUS(rRegI dst, memory mem)
 7005 %{
 7006   match(Set dst (LoadUS mem));
 7007 
 7008   ins_cost(125);
 7009   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 7010 
 7011   ins_encode %{
 7012     __ movzwl($dst$$Register, $mem$$Address);
 7013   %}
 7014 
 7015   ins_pipe(ialu_reg_mem);
 7016 %}
 7017 
 7018 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 7019 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7020   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 7021 
 7022   ins_cost(125);
 7023   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 7024   ins_encode %{
 7025     __ movsbl($dst$$Register, $mem$$Address);
 7026   %}
 7027   ins_pipe(ialu_reg_mem);
 7028 %}
 7029 
 7030 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 7031 instruct loadUS2L(rRegL dst, memory mem)
 7032 %{
 7033   match(Set dst (ConvI2L (LoadUS mem)));
 7034 
 7035   ins_cost(125);
 7036   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 7037 
 7038   ins_encode %{
 7039     __ movzwq($dst$$Register, $mem$$Address);
 7040   %}
 7041 
 7042   ins_pipe(ialu_reg_mem);
 7043 %}
 7044 
 7045 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 7046 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7047   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7048 
 7049   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 7050   ins_encode %{
 7051     __ movzbq($dst$$Register, $mem$$Address);
 7052   %}
 7053   ins_pipe(ialu_reg_mem);
 7054 %}
 7055 
 7056 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 7057 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7058   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7059   effect(KILL cr);
 7060 
 7061   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 7062             "andl    $dst, right_n_bits($mask, 16)" %}
 7063   ins_encode %{
 7064     Register Rdst = $dst$$Register;
 7065     __ movzwq(Rdst, $mem$$Address);
 7066     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 7067   %}
 7068   ins_pipe(ialu_reg_mem);
 7069 %}
 7070 
 7071 // Load Integer
 7072 instruct loadI(rRegI dst, memory mem)
 7073 %{
 7074   match(Set dst (LoadI mem));
 7075 
 7076   ins_cost(125);
 7077   format %{ "movl    $dst, $mem\t# int" %}
 7078 
 7079   ins_encode %{
 7080     __ movl($dst$$Register, $mem$$Address);
 7081   %}
 7082 
 7083   ins_pipe(ialu_reg_mem);
 7084 %}
 7085 
 7086 // Load Integer (32 bit signed) to Byte (8 bit signed)
 7087 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7088   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 7089 
 7090   ins_cost(125);
 7091   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 7092   ins_encode %{
 7093     __ movsbl($dst$$Register, $mem$$Address);
 7094   %}
 7095   ins_pipe(ialu_reg_mem);
 7096 %}
 7097 
 7098 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 7099 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 7100   match(Set dst (AndI (LoadI mem) mask));
 7101 
 7102   ins_cost(125);
 7103   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 7104   ins_encode %{
 7105     __ movzbl($dst$$Register, $mem$$Address);
 7106   %}
 7107   ins_pipe(ialu_reg_mem);
 7108 %}
 7109 
 7110 // Load Integer (32 bit signed) to Short (16 bit signed)
 7111 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 7112   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 7113 
 7114   ins_cost(125);
 7115   format %{ "movswl  $dst, $mem\t# int -> short" %}
 7116   ins_encode %{
 7117     __ movswl($dst$$Register, $mem$$Address);
 7118   %}
 7119   ins_pipe(ialu_reg_mem);
 7120 %}
 7121 
 7122 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 7123 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 7124   match(Set dst (AndI (LoadI mem) mask));
 7125 
 7126   ins_cost(125);
 7127   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7128   ins_encode %{
 7129     __ movzwl($dst$$Register, $mem$$Address);
 7130   %}
 7131   ins_pipe(ialu_reg_mem);
 7132 %}
 7133 
 7134 // Load Integer into Long Register
 7135 instruct loadI2L(rRegL dst, memory mem)
 7136 %{
 7137   match(Set dst (ConvI2L (LoadI mem)));
 7138 
 7139   ins_cost(125);
 7140   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7141 
 7142   ins_encode %{
 7143     __ movslq($dst$$Register, $mem$$Address);
 7144   %}
 7145 
 7146   ins_pipe(ialu_reg_mem);
 7147 %}
 7148 
 7149 // Load Integer with mask 0xFF into Long Register
 7150 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7151   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7152 
 7153   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7154   ins_encode %{
 7155     __ movzbq($dst$$Register, $mem$$Address);
 7156   %}
 7157   ins_pipe(ialu_reg_mem);
 7158 %}
 7159 
 7160 // Load Integer with mask 0xFFFF into Long Register
 7161 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7162   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7163 
 7164   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7165   ins_encode %{
 7166     __ movzwq($dst$$Register, $mem$$Address);
 7167   %}
 7168   ins_pipe(ialu_reg_mem);
 7169 %}
 7170 
 7171 // Load Integer with a 31-bit mask into Long Register
 7172 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7173   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7174   effect(KILL cr);
 7175 
 7176   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7177             "andl    $dst, $mask" %}
 7178   ins_encode %{
 7179     Register Rdst = $dst$$Register;
 7180     __ movl(Rdst, $mem$$Address);
 7181     __ andl(Rdst, $mask$$constant);
 7182   %}
 7183   ins_pipe(ialu_reg_mem);
 7184 %}
 7185 
 7186 // Load Unsigned Integer into Long Register
 7187 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7188 %{
 7189   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7190 
 7191   ins_cost(125);
 7192   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7193 
 7194   ins_encode %{
 7195     __ movl($dst$$Register, $mem$$Address);
 7196   %}
 7197 
 7198   ins_pipe(ialu_reg_mem);
 7199 %}
 7200 
 7201 // Load Long
 7202 instruct loadL(rRegL dst, memory mem)
 7203 %{
 7204   match(Set dst (LoadL mem));
 7205 
 7206   ins_cost(125);
 7207   format %{ "movq    $dst, $mem\t# long" %}
 7208 
 7209   ins_encode %{
 7210     __ movq($dst$$Register, $mem$$Address);
 7211   %}
 7212 
 7213   ins_pipe(ialu_reg_mem); // XXX
 7214 %}
 7215 
 7216 // Load Range
 7217 instruct loadRange(rRegI dst, memory mem)
 7218 %{
 7219   match(Set dst (LoadRange mem));
 7220 
 7221   ins_cost(125); // XXX
 7222   format %{ "movl    $dst, $mem\t# range" %}
 7223   ins_encode %{
 7224     __ movl($dst$$Register, $mem$$Address);
 7225   %}
 7226   ins_pipe(ialu_reg_mem);
 7227 %}
 7228 
 7229 // Load Pointer
 7230 instruct loadP(rRegP dst, memory mem)
 7231 %{
 7232   match(Set dst (LoadP mem));
 7233   predicate(n->as_Load()->barrier_data() == 0);
 7234 
 7235   ins_cost(125); // XXX
 7236   format %{ "movq    $dst, $mem\t# ptr" %}
 7237   ins_encode %{
 7238     __ movq($dst$$Register, $mem$$Address);
 7239   %}
 7240   ins_pipe(ialu_reg_mem); // XXX
 7241 %}
 7242 
 7243 // Load Compressed Pointer
 7244 instruct loadN(rRegN dst, memory mem)
 7245 %{
 7246    predicate(n->as_Load()->barrier_data() == 0);
 7247    match(Set dst (LoadN mem));
 7248 
 7249    ins_cost(125); // XXX
 7250    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7251    ins_encode %{
 7252      __ movl($dst$$Register, $mem$$Address);
 7253    %}
 7254    ins_pipe(ialu_reg_mem); // XXX
 7255 %}
 7256 
 7257 
 7258 // Load Klass Pointer
 7259 instruct loadKlass(rRegP dst, memory mem)
 7260 %{
 7261   match(Set dst (LoadKlass mem));
 7262 
 7263   ins_cost(125); // XXX
 7264   format %{ "movq    $dst, $mem\t# class" %}
 7265   ins_encode %{
 7266     __ movq($dst$$Register, $mem$$Address);
 7267   %}
 7268   ins_pipe(ialu_reg_mem); // XXX
 7269 %}
 7270 
 7271 // Load narrow Klass Pointer
 7272 instruct loadNKlass(rRegN dst, memory mem)
 7273 %{
 7274   predicate(!UseCompactObjectHeaders);
 7275   match(Set dst (LoadNKlass mem));
 7276 
 7277   ins_cost(125); // XXX
 7278   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7279   ins_encode %{
 7280     __ movl($dst$$Register, $mem$$Address);
 7281   %}
 7282   ins_pipe(ialu_reg_mem); // XXX
 7283 %}
 7284 
 7285 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7286 %{
 7287   predicate(UseCompactObjectHeaders);
 7288   match(Set dst (LoadNKlass mem));
 7289   effect(KILL cr);
 7290   ins_cost(125);
 7291   format %{
 7292     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7293     "shrl    $dst, markWord::klass_shift_at_offset"
 7294   %}
 7295   ins_encode %{
 7296     if (UseAPX) {
 7297       __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
 7298     }
 7299     else {
 7300       __ movl($dst$$Register, $mem$$Address);
 7301       __ shrl($dst$$Register, markWord::klass_shift_at_offset);
 7302     }
 7303   %}
 7304   ins_pipe(ialu_reg_mem);
 7305 %}
 7306 
 7307 // Load Float
 7308 instruct loadF(regF dst, memory mem)
 7309 %{
 7310   match(Set dst (LoadF mem));
 7311 
 7312   ins_cost(145); // XXX
 7313   format %{ "movss   $dst, $mem\t# float" %}
 7314   ins_encode %{
 7315     __ movflt($dst$$XMMRegister, $mem$$Address);
 7316   %}
 7317   ins_pipe(pipe_slow); // XXX
 7318 %}
 7319 
 7320 // Load Double
 7321 instruct loadD_partial(regD dst, memory mem)
 7322 %{
 7323   predicate(!UseXmmLoadAndClearUpper);
 7324   match(Set dst (LoadD mem));
 7325 
 7326   ins_cost(145); // XXX
 7327   format %{ "movlpd  $dst, $mem\t# double" %}
 7328   ins_encode %{
 7329     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7330   %}
 7331   ins_pipe(pipe_slow); // XXX
 7332 %}
 7333 
 7334 instruct loadD(regD dst, memory mem)
 7335 %{
 7336   predicate(UseXmmLoadAndClearUpper);
 7337   match(Set dst (LoadD mem));
 7338 
 7339   ins_cost(145); // XXX
 7340   format %{ "movsd   $dst, $mem\t# double" %}
 7341   ins_encode %{
 7342     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7343   %}
 7344   ins_pipe(pipe_slow); // XXX
 7345 %}
 7346 
 7347 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
 7348 %{
 7349   match(Set dst con);
 7350 
 7351   format %{ "leaq  $dst, $con\t# AOT Runtime Constants Address" %}
 7352 
 7353   ins_encode %{
 7354     __ load_aotrc_address($dst$$Register, (address)$con$$constant);
 7355   %}
 7356 
 7357   ins_pipe(ialu_reg_fat);
 7358 %}
 7359 
 7360 // max = java.lang.Math.max(float a, float b)
 7361 instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
 7362   predicate(VM_Version::supports_avx10_2());
 7363   match(Set dst (MaxF a b));
 7364   format %{ "maxF $dst, $a, $b" %}
 7365   ins_encode %{
 7366     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
 7367   %}
 7368   ins_pipe( pipe_slow );
 7369 %}
 7370 
 7371 // max = java.lang.Math.max(float a, float b)
 7372 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7373   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7374   match(Set dst (MaxF a b));
 7375   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7376   format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7377   ins_encode %{
 7378     __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7379   %}
 7380   ins_pipe( pipe_slow );
 7381 %}
 7382 
 7383 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7384   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7385   match(Set dst (MaxF a b));
 7386   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7387 
 7388   format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
 7389   ins_encode %{
 7390     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7391                     false /*min*/, true /*single*/);
 7392   %}
 7393   ins_pipe( pipe_slow );
 7394 %}
 7395 
 7396 // max = java.lang.Math.max(double a, double b)
 7397 instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
 7398   predicate(VM_Version::supports_avx10_2());
 7399   match(Set dst (MaxD a b));
 7400   format %{ "maxD $dst, $a, $b" %}
 7401   ins_encode %{
 7402     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
 7403   %}
 7404   ins_pipe( pipe_slow );
 7405 %}
 7406 
 7407 // max = java.lang.Math.max(double a, double b)
 7408 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7409   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7410   match(Set dst (MaxD a b));
 7411   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7412   format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7413   ins_encode %{
 7414     __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7415   %}
 7416   ins_pipe( pipe_slow );
 7417 %}
 7418 
 7419 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7420   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7421   match(Set dst (MaxD a b));
 7422   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7423 
 7424   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7425   ins_encode %{
 7426     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7427                     false /*min*/, false /*single*/);
 7428   %}
 7429   ins_pipe( pipe_slow );
 7430 %}
 7431 
 7432 // max = java.lang.Math.min(float a, float b)
 7433 instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
 7434   predicate(VM_Version::supports_avx10_2());
 7435   match(Set dst (MinF a b));
 7436   format %{ "minF $dst, $a, $b" %}
 7437   ins_encode %{
 7438     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
 7439   %}
 7440   ins_pipe( pipe_slow );
 7441 %}
 7442 
 7443 // min = java.lang.Math.min(float a, float b)
 7444 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7445   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7446   match(Set dst (MinF a b));
 7447   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7448   format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7449   ins_encode %{
 7450     __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7451   %}
 7452   ins_pipe( pipe_slow );
 7453 %}
 7454 
 7455 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7456   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7457   match(Set dst (MinF a b));
 7458   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7459 
 7460   format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7461   ins_encode %{
 7462     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7463                     true /*min*/, true /*single*/);
 7464   %}
 7465   ins_pipe( pipe_slow );
 7466 %}
 7467 
 7468 // max = java.lang.Math.min(double a, double b)
 7469 instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
 7470   predicate(VM_Version::supports_avx10_2());
 7471   match(Set dst (MinD a b));
 7472   format %{ "minD $dst, $a, $b" %}
 7473   ins_encode %{
 7474     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
 7475   %}
 7476   ins_pipe( pipe_slow );
 7477 %}
 7478 
 7479 // min = java.lang.Math.min(double a, double b)
 7480 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7481   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7482   match(Set dst (MinD a b));
 7483   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7484     format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7485   ins_encode %{
 7486     __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7487   %}
 7488   ins_pipe( pipe_slow );
 7489 %}
 7490 
 7491 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7492   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7493   match(Set dst (MinD a b));
 7494   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7495 
 7496   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7497   ins_encode %{
 7498     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7499                     true /*min*/, false /*single*/);
 7500   %}
 7501   ins_pipe( pipe_slow );
 7502 %}
 7503 
 7504 // Load Effective Address
 7505 instruct leaP8(rRegP dst, indOffset8 mem)
 7506 %{
 7507   match(Set dst mem);
 7508 
 7509   ins_cost(110); // XXX
 7510   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7511   ins_encode %{
 7512     __ leaq($dst$$Register, $mem$$Address);
 7513   %}
 7514   ins_pipe(ialu_reg_reg_fat);
 7515 %}
 7516 
 7517 instruct leaP32(rRegP dst, indOffset32 mem)
 7518 %{
 7519   match(Set dst mem);
 7520 
 7521   ins_cost(110);
 7522   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7523   ins_encode %{
 7524     __ leaq($dst$$Register, $mem$$Address);
 7525   %}
 7526   ins_pipe(ialu_reg_reg_fat);
 7527 %}
 7528 
 7529 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7530 %{
 7531   match(Set dst mem);
 7532 
 7533   ins_cost(110);
 7534   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7535   ins_encode %{
 7536     __ leaq($dst$$Register, $mem$$Address);
 7537   %}
 7538   ins_pipe(ialu_reg_reg_fat);
 7539 %}
 7540 
 7541 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7542 %{
 7543   match(Set dst mem);
 7544 
 7545   ins_cost(110);
 7546   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7547   ins_encode %{
 7548     __ leaq($dst$$Register, $mem$$Address);
 7549   %}
 7550   ins_pipe(ialu_reg_reg_fat);
 7551 %}
 7552 
 7553 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7554 %{
 7555   match(Set dst mem);
 7556 
 7557   ins_cost(110);
 7558   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7559   ins_encode %{
 7560     __ leaq($dst$$Register, $mem$$Address);
 7561   %}
 7562   ins_pipe(ialu_reg_reg_fat);
 7563 %}
 7564 
 7565 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7566 %{
 7567   match(Set dst mem);
 7568 
 7569   ins_cost(110);
 7570   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7571   ins_encode %{
 7572     __ leaq($dst$$Register, $mem$$Address);
 7573   %}
 7574   ins_pipe(ialu_reg_reg_fat);
 7575 %}
 7576 
 7577 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7578 %{
 7579   match(Set dst mem);
 7580 
 7581   ins_cost(110);
 7582   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7583   ins_encode %{
 7584     __ leaq($dst$$Register, $mem$$Address);
 7585   %}
 7586   ins_pipe(ialu_reg_reg_fat);
 7587 %}
 7588 
 7589 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7590 %{
 7591   match(Set dst mem);
 7592 
 7593   ins_cost(110);
 7594   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7595   ins_encode %{
 7596     __ leaq($dst$$Register, $mem$$Address);
 7597   %}
 7598   ins_pipe(ialu_reg_reg_fat);
 7599 %}
 7600 
 7601 // Load Effective Address which uses Narrow (32-bits) oop
 7602 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7603 %{
 7604   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7605   match(Set dst mem);
 7606 
 7607   ins_cost(110);
 7608   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7609   ins_encode %{
 7610     __ leaq($dst$$Register, $mem$$Address);
 7611   %}
 7612   ins_pipe(ialu_reg_reg_fat);
 7613 %}
 7614 
 7615 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7616 %{
 7617   predicate(CompressedOops::shift() == 0);
 7618   match(Set dst mem);
 7619 
 7620   ins_cost(110); // XXX
 7621   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7622   ins_encode %{
 7623     __ leaq($dst$$Register, $mem$$Address);
 7624   %}
 7625   ins_pipe(ialu_reg_reg_fat);
 7626 %}
 7627 
 7628 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7629 %{
 7630   predicate(CompressedOops::shift() == 0);
 7631   match(Set dst mem);
 7632 
 7633   ins_cost(110);
 7634   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7635   ins_encode %{
 7636     __ leaq($dst$$Register, $mem$$Address);
 7637   %}
 7638   ins_pipe(ialu_reg_reg_fat);
 7639 %}
 7640 
 7641 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7642 %{
 7643   predicate(CompressedOops::shift() == 0);
 7644   match(Set dst mem);
 7645 
 7646   ins_cost(110);
 7647   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7648   ins_encode %{
 7649     __ leaq($dst$$Register, $mem$$Address);
 7650   %}
 7651   ins_pipe(ialu_reg_reg_fat);
 7652 %}
 7653 
 7654 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7655 %{
 7656   predicate(CompressedOops::shift() == 0);
 7657   match(Set dst mem);
 7658 
 7659   ins_cost(110);
 7660   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7661   ins_encode %{
 7662     __ leaq($dst$$Register, $mem$$Address);
 7663   %}
 7664   ins_pipe(ialu_reg_reg_fat);
 7665 %}
 7666 
 7667 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7668 %{
 7669   predicate(CompressedOops::shift() == 0);
 7670   match(Set dst mem);
 7671 
 7672   ins_cost(110);
 7673   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7674   ins_encode %{
 7675     __ leaq($dst$$Register, $mem$$Address);
 7676   %}
 7677   ins_pipe(ialu_reg_reg_fat);
 7678 %}
 7679 
 7680 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7681 %{
 7682   predicate(CompressedOops::shift() == 0);
 7683   match(Set dst mem);
 7684 
 7685   ins_cost(110);
 7686   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7687   ins_encode %{
 7688     __ leaq($dst$$Register, $mem$$Address);
 7689   %}
 7690   ins_pipe(ialu_reg_reg_fat);
 7691 %}
 7692 
 7693 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7694 %{
 7695   predicate(CompressedOops::shift() == 0);
 7696   match(Set dst mem);
 7697 
 7698   ins_cost(110);
 7699   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7700   ins_encode %{
 7701     __ leaq($dst$$Register, $mem$$Address);
 7702   %}
 7703   ins_pipe(ialu_reg_reg_fat);
 7704 %}
 7705 
 7706 instruct loadConI(rRegI dst, immI src)
 7707 %{
 7708   match(Set dst src);
 7709 
 7710   format %{ "movl    $dst, $src\t# int" %}
 7711   ins_encode %{
 7712     __ movl($dst$$Register, $src$$constant);
 7713   %}
 7714   ins_pipe(ialu_reg_fat); // XXX
 7715 %}
 7716 
 7717 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7718 %{
 7719   match(Set dst src);
 7720   effect(KILL cr);
 7721 
 7722   ins_cost(50);
 7723   format %{ "xorl    $dst, $dst\t# int" %}
 7724   ins_encode %{
 7725     __ xorl($dst$$Register, $dst$$Register);
 7726   %}
 7727   ins_pipe(ialu_reg);
 7728 %}
 7729 
 7730 instruct loadConL(rRegL dst, immL src)
 7731 %{
 7732   match(Set dst src);
 7733 
 7734   ins_cost(150);
 7735   format %{ "movq    $dst, $src\t# long" %}
 7736   ins_encode %{
 7737     __ mov64($dst$$Register, $src$$constant);
 7738   %}
 7739   ins_pipe(ialu_reg);
 7740 %}
 7741 
 7742 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7743 %{
 7744   match(Set dst src);
 7745   effect(KILL cr);
 7746 
 7747   ins_cost(50);
 7748   format %{ "xorl    $dst, $dst\t# long" %}
 7749   ins_encode %{
 7750     __ xorl($dst$$Register, $dst$$Register);
 7751   %}
 7752   ins_pipe(ialu_reg); // XXX
 7753 %}
 7754 
 7755 instruct loadConUL32(rRegL dst, immUL32 src)
 7756 %{
 7757   match(Set dst src);
 7758 
 7759   ins_cost(60);
 7760   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7761   ins_encode %{
 7762     __ movl($dst$$Register, $src$$constant);
 7763   %}
 7764   ins_pipe(ialu_reg);
 7765 %}
 7766 
 7767 instruct loadConL32(rRegL dst, immL32 src)
 7768 %{
 7769   match(Set dst src);
 7770 
 7771   ins_cost(70);
 7772   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7773   ins_encode %{
 7774     __ movq($dst$$Register, $src$$constant);
 7775   %}
 7776   ins_pipe(ialu_reg);
 7777 %}
 7778 
 7779 instruct loadConP(rRegP dst, immP con) %{
 7780   match(Set dst con);
 7781 
 7782   format %{ "movq    $dst, $con\t# ptr" %}
 7783   ins_encode %{
 7784     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7785   %}
 7786   ins_pipe(ialu_reg_fat); // XXX
 7787 %}
 7788 
 7789 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7790 %{
 7791   match(Set dst src);
 7792   effect(KILL cr);
 7793 
 7794   ins_cost(50);
 7795   format %{ "xorl    $dst, $dst\t# ptr" %}
 7796   ins_encode %{
 7797     __ xorl($dst$$Register, $dst$$Register);
 7798   %}
 7799   ins_pipe(ialu_reg);
 7800 %}
 7801 
 7802 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7803 %{
 7804   match(Set dst src);
 7805   effect(KILL cr);
 7806 
 7807   ins_cost(60);
 7808   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7809   ins_encode %{
 7810     __ movl($dst$$Register, $src$$constant);
 7811   %}
 7812   ins_pipe(ialu_reg);
 7813 %}
 7814 
 7815 instruct loadConF(regF dst, immF con) %{
 7816   match(Set dst con);
 7817   ins_cost(125);
 7818   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7819   ins_encode %{
 7820     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7821   %}
 7822   ins_pipe(pipe_slow);
 7823 %}
 7824 
 7825 instruct loadConH(regF dst, immH con) %{
 7826   match(Set dst con);
 7827   ins_cost(125);
 7828   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7829   ins_encode %{
 7830     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7831   %}
 7832   ins_pipe(pipe_slow);
 7833 %}
 7834 
 7835 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7836   match(Set dst src);
 7837   effect(KILL cr);
 7838   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7839   ins_encode %{
 7840     __ xorq($dst$$Register, $dst$$Register);
 7841   %}
 7842   ins_pipe(ialu_reg);
 7843 %}
 7844 
 7845 instruct loadConN(rRegN dst, immN src) %{
 7846   match(Set dst src);
 7847 
 7848   ins_cost(125);
 7849   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7850   ins_encode %{
 7851     address con = (address)$src$$constant;
 7852     if (con == nullptr) {
 7853       ShouldNotReachHere();
 7854     } else {
 7855       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7856     }
 7857   %}
 7858   ins_pipe(ialu_reg_fat); // XXX
 7859 %}
 7860 
 7861 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7862   match(Set dst src);
 7863 
 7864   ins_cost(125);
 7865   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7866   ins_encode %{
 7867     address con = (address)$src$$constant;
 7868     if (con == nullptr) {
 7869       ShouldNotReachHere();
 7870     } else {
 7871       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7872     }
 7873   %}
 7874   ins_pipe(ialu_reg_fat); // XXX
 7875 %}
 7876 
 7877 instruct loadConF0(regF dst, immF0 src)
 7878 %{
 7879   match(Set dst src);
 7880   ins_cost(100);
 7881 
 7882   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7883   ins_encode %{
 7884     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7885   %}
 7886   ins_pipe(pipe_slow);
 7887 %}
 7888 
 7889 // Use the same format since predicate() can not be used here.
 7890 instruct loadConD(regD dst, immD con) %{
 7891   match(Set dst con);
 7892   ins_cost(125);
 7893   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7894   ins_encode %{
 7895     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7896   %}
 7897   ins_pipe(pipe_slow);
 7898 %}
 7899 
 7900 instruct loadConD0(regD dst, immD0 src)
 7901 %{
 7902   match(Set dst src);
 7903   ins_cost(100);
 7904 
 7905   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7906   ins_encode %{
 7907     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7908   %}
 7909   ins_pipe(pipe_slow);
 7910 %}
 7911 
 7912 instruct loadSSI(rRegI dst, stackSlotI src)
 7913 %{
 7914   match(Set dst src);
 7915 
 7916   ins_cost(125);
 7917   format %{ "movl    $dst, $src\t# int stk" %}
 7918   ins_encode %{
 7919     __ movl($dst$$Register, $src$$Address);
 7920   %}
 7921   ins_pipe(ialu_reg_mem);
 7922 %}
 7923 
 7924 instruct loadSSL(rRegL dst, stackSlotL src)
 7925 %{
 7926   match(Set dst src);
 7927 
 7928   ins_cost(125);
 7929   format %{ "movq    $dst, $src\t# long stk" %}
 7930   ins_encode %{
 7931     __ movq($dst$$Register, $src$$Address);
 7932   %}
 7933   ins_pipe(ialu_reg_mem);
 7934 %}
 7935 
 7936 instruct loadSSP(rRegP dst, stackSlotP src)
 7937 %{
 7938   match(Set dst src);
 7939 
 7940   ins_cost(125);
 7941   format %{ "movq    $dst, $src\t# ptr stk" %}
 7942   ins_encode %{
 7943     __ movq($dst$$Register, $src$$Address);
 7944   %}
 7945   ins_pipe(ialu_reg_mem);
 7946 %}
 7947 
 7948 instruct loadSSF(regF dst, stackSlotF src)
 7949 %{
 7950   match(Set dst src);
 7951 
 7952   ins_cost(125);
 7953   format %{ "movss   $dst, $src\t# float stk" %}
 7954   ins_encode %{
 7955     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 7956   %}
 7957   ins_pipe(pipe_slow); // XXX
 7958 %}
 7959 
 7960 // Use the same format since predicate() can not be used here.
 7961 instruct loadSSD(regD dst, stackSlotD src)
 7962 %{
 7963   match(Set dst src);
 7964 
 7965   ins_cost(125);
 7966   format %{ "movsd   $dst, $src\t# double stk" %}
 7967   ins_encode  %{
 7968     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 7969   %}
 7970   ins_pipe(pipe_slow); // XXX
 7971 %}
 7972 
 7973 // Prefetch instructions for allocation.
 7974 // Must be safe to execute with invalid address (cannot fault).
 7975 
 7976 instruct prefetchAlloc( memory mem ) %{
 7977   predicate(AllocatePrefetchInstr==3);
 7978   match(PrefetchAllocation mem);
 7979   ins_cost(125);
 7980 
 7981   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 7982   ins_encode %{
 7983     __ prefetchw($mem$$Address);
 7984   %}
 7985   ins_pipe(ialu_mem);
 7986 %}
 7987 
 7988 instruct prefetchAllocNTA( memory mem ) %{
 7989   predicate(AllocatePrefetchInstr==0);
 7990   match(PrefetchAllocation mem);
 7991   ins_cost(125);
 7992 
 7993   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 7994   ins_encode %{
 7995     __ prefetchnta($mem$$Address);
 7996   %}
 7997   ins_pipe(ialu_mem);
 7998 %}
 7999 
 8000 instruct prefetchAllocT0( memory mem ) %{
 8001   predicate(AllocatePrefetchInstr==1);
 8002   match(PrefetchAllocation mem);
 8003   ins_cost(125);
 8004 
 8005   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 8006   ins_encode %{
 8007     __ prefetcht0($mem$$Address);
 8008   %}
 8009   ins_pipe(ialu_mem);
 8010 %}
 8011 
 8012 instruct prefetchAllocT2( memory mem ) %{
 8013   predicate(AllocatePrefetchInstr==2);
 8014   match(PrefetchAllocation mem);
 8015   ins_cost(125);
 8016 
 8017   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 8018   ins_encode %{
 8019     __ prefetcht2($mem$$Address);
 8020   %}
 8021   ins_pipe(ialu_mem);
 8022 %}
 8023 
 8024 //----------Store Instructions-------------------------------------------------
 8025 
 8026 // Store Byte
 8027 instruct storeB(memory mem, rRegI src)
 8028 %{
 8029   match(Set mem (StoreB mem src));
 8030 
 8031   ins_cost(125); // XXX
 8032   format %{ "movb    $mem, $src\t# byte" %}
 8033   ins_encode %{
 8034     __ movb($mem$$Address, $src$$Register);
 8035   %}
 8036   ins_pipe(ialu_mem_reg);
 8037 %}
 8038 
 8039 // Store Char/Short
 8040 instruct storeC(memory mem, rRegI src)
 8041 %{
 8042   match(Set mem (StoreC mem src));
 8043 
 8044   ins_cost(125); // XXX
 8045   format %{ "movw    $mem, $src\t# char/short" %}
 8046   ins_encode %{
 8047     __ movw($mem$$Address, $src$$Register);
 8048   %}
 8049   ins_pipe(ialu_mem_reg);
 8050 %}
 8051 
 8052 // Store Integer
 8053 instruct storeI(memory mem, rRegI src)
 8054 %{
 8055   match(Set mem (StoreI mem src));
 8056 
 8057   ins_cost(125); // XXX
 8058   format %{ "movl    $mem, $src\t# int" %}
 8059   ins_encode %{
 8060     __ movl($mem$$Address, $src$$Register);
 8061   %}
 8062   ins_pipe(ialu_mem_reg);
 8063 %}
 8064 
 8065 // Store Long
 8066 instruct storeL(memory mem, rRegL src)
 8067 %{
 8068   match(Set mem (StoreL mem src));
 8069 
 8070   ins_cost(125); // XXX
 8071   format %{ "movq    $mem, $src\t# long" %}
 8072   ins_encode %{
 8073     __ movq($mem$$Address, $src$$Register);
 8074   %}
 8075   ins_pipe(ialu_mem_reg); // XXX
 8076 %}
 8077 
 8078 // Store Pointer
 8079 instruct storeP(memory mem, any_RegP src)
 8080 %{
 8081   predicate(n->as_Store()->barrier_data() == 0);
 8082   match(Set mem (StoreP mem src));
 8083 
 8084   ins_cost(125); // XXX
 8085   format %{ "movq    $mem, $src\t# ptr" %}
 8086   ins_encode %{
 8087     __ movq($mem$$Address, $src$$Register);
 8088   %}
 8089   ins_pipe(ialu_mem_reg);
 8090 %}
 8091 
 8092 instruct storeImmP0(memory mem, immP0 zero)
 8093 %{
 8094   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 8095   match(Set mem (StoreP mem zero));
 8096 
 8097   ins_cost(125); // XXX
 8098   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 8099   ins_encode %{
 8100     __ movq($mem$$Address, r12);
 8101   %}
 8102   ins_pipe(ialu_mem_reg);
 8103 %}
 8104 
 8105 // Store Null Pointer, mark word, or other simple pointer constant.
 8106 instruct storeImmP(memory mem, immP31 src)
 8107 %{
 8108   predicate(n->as_Store()->barrier_data() == 0);
 8109   match(Set mem (StoreP mem src));
 8110 
 8111   ins_cost(150); // XXX
 8112   format %{ "movq    $mem, $src\t# ptr" %}
 8113   ins_encode %{
 8114     __ movq($mem$$Address, $src$$constant);
 8115   %}
 8116   ins_pipe(ialu_mem_imm);
 8117 %}
 8118 
 8119 // Store Compressed Pointer
 8120 instruct storeN(memory mem, rRegN src)
 8121 %{
 8122   predicate(n->as_Store()->barrier_data() == 0);
 8123   match(Set mem (StoreN mem src));
 8124 
 8125   ins_cost(125); // XXX
 8126   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8127   ins_encode %{
 8128     __ movl($mem$$Address, $src$$Register);
 8129   %}
 8130   ins_pipe(ialu_mem_reg);
 8131 %}
 8132 
 8133 instruct storeNKlass(memory mem, rRegN src)
 8134 %{
 8135   match(Set mem (StoreNKlass mem src));
 8136 
 8137   ins_cost(125); // XXX
 8138   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8139   ins_encode %{
 8140     __ movl($mem$$Address, $src$$Register);
 8141   %}
 8142   ins_pipe(ialu_mem_reg);
 8143 %}
 8144 
 8145 instruct storeImmN0(memory mem, immN0 zero)
 8146 %{
 8147   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8148   match(Set mem (StoreN mem zero));
 8149 
 8150   ins_cost(125); // XXX
 8151   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8152   ins_encode %{
 8153     __ movl($mem$$Address, r12);
 8154   %}
 8155   ins_pipe(ialu_mem_reg);
 8156 %}
 8157 
 8158 instruct storeImmN(memory mem, immN src)
 8159 %{
 8160   predicate(n->as_Store()->barrier_data() == 0);
 8161   match(Set mem (StoreN mem src));
 8162 
 8163   ins_cost(150); // XXX
 8164   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8165   ins_encode %{
 8166     address con = (address)$src$$constant;
 8167     if (con == nullptr) {
 8168       __ movl($mem$$Address, 0);
 8169     } else {
 8170       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8171     }
 8172   %}
 8173   ins_pipe(ialu_mem_imm);
 8174 %}
 8175 
 8176 instruct storeImmNKlass(memory mem, immNKlass src)
 8177 %{
 8178   match(Set mem (StoreNKlass mem src));
 8179 
 8180   ins_cost(150); // XXX
 8181   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8182   ins_encode %{
 8183     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8184   %}
 8185   ins_pipe(ialu_mem_imm);
 8186 %}
 8187 
 8188 // Store Integer Immediate
 8189 instruct storeImmI0(memory mem, immI_0 zero)
 8190 %{
 8191   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8192   match(Set mem (StoreI mem zero));
 8193 
 8194   ins_cost(125); // XXX
 8195   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8196   ins_encode %{
 8197     __ movl($mem$$Address, r12);
 8198   %}
 8199   ins_pipe(ialu_mem_reg);
 8200 %}
 8201 
 8202 instruct storeImmI(memory mem, immI src)
 8203 %{
 8204   match(Set mem (StoreI mem src));
 8205 
 8206   ins_cost(150);
 8207   format %{ "movl    $mem, $src\t# int" %}
 8208   ins_encode %{
 8209     __ movl($mem$$Address, $src$$constant);
 8210   %}
 8211   ins_pipe(ialu_mem_imm);
 8212 %}
 8213 
 8214 // Store Long Immediate
 8215 instruct storeImmL0(memory mem, immL0 zero)
 8216 %{
 8217   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8218   match(Set mem (StoreL mem zero));
 8219 
 8220   ins_cost(125); // XXX
 8221   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8222   ins_encode %{
 8223     __ movq($mem$$Address, r12);
 8224   %}
 8225   ins_pipe(ialu_mem_reg);
 8226 %}
 8227 
 8228 instruct storeImmL(memory mem, immL32 src)
 8229 %{
 8230   match(Set mem (StoreL mem src));
 8231 
 8232   ins_cost(150);
 8233   format %{ "movq    $mem, $src\t# long" %}
 8234   ins_encode %{
 8235     __ movq($mem$$Address, $src$$constant);
 8236   %}
 8237   ins_pipe(ialu_mem_imm);
 8238 %}
 8239 
 8240 // Store Short/Char Immediate
 8241 instruct storeImmC0(memory mem, immI_0 zero)
 8242 %{
 8243   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8244   match(Set mem (StoreC mem zero));
 8245 
 8246   ins_cost(125); // XXX
 8247   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8248   ins_encode %{
 8249     __ movw($mem$$Address, r12);
 8250   %}
 8251   ins_pipe(ialu_mem_reg);
 8252 %}
 8253 
 8254 instruct storeImmI16(memory mem, immI16 src)
 8255 %{
 8256   predicate(UseStoreImmI16);
 8257   match(Set mem (StoreC mem src));
 8258 
 8259   ins_cost(150);
 8260   format %{ "movw    $mem, $src\t# short/char" %}
 8261   ins_encode %{
 8262     __ movw($mem$$Address, $src$$constant);
 8263   %}
 8264   ins_pipe(ialu_mem_imm);
 8265 %}
 8266 
 8267 // Store Byte Immediate
 8268 instruct storeImmB0(memory mem, immI_0 zero)
 8269 %{
 8270   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8271   match(Set mem (StoreB mem zero));
 8272 
 8273   ins_cost(125); // XXX
 8274   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8275   ins_encode %{
 8276     __ movb($mem$$Address, r12);
 8277   %}
 8278   ins_pipe(ialu_mem_reg);
 8279 %}
 8280 
 8281 instruct storeImmB(memory mem, immI8 src)
 8282 %{
 8283   match(Set mem (StoreB mem src));
 8284 
 8285   ins_cost(150); // XXX
 8286   format %{ "movb    $mem, $src\t# byte" %}
 8287   ins_encode %{
 8288     __ movb($mem$$Address, $src$$constant);
 8289   %}
 8290   ins_pipe(ialu_mem_imm);
 8291 %}
 8292 
 8293 // Store Float
 8294 instruct storeF(memory mem, regF src)
 8295 %{
 8296   match(Set mem (StoreF mem src));
 8297 
 8298   ins_cost(95); // XXX
 8299   format %{ "movss   $mem, $src\t# float" %}
 8300   ins_encode %{
 8301     __ movflt($mem$$Address, $src$$XMMRegister);
 8302   %}
 8303   ins_pipe(pipe_slow); // XXX
 8304 %}
 8305 
 8306 // Store immediate Float value (it is faster than store from XMM register)
 8307 instruct storeF0(memory mem, immF0 zero)
 8308 %{
 8309   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8310   match(Set mem (StoreF mem zero));
 8311 
 8312   ins_cost(25); // XXX
 8313   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8314   ins_encode %{
 8315     __ movl($mem$$Address, r12);
 8316   %}
 8317   ins_pipe(ialu_mem_reg);
 8318 %}
 8319 
 8320 instruct storeF_imm(memory mem, immF src)
 8321 %{
 8322   match(Set mem (StoreF mem src));
 8323 
 8324   ins_cost(50);
 8325   format %{ "movl    $mem, $src\t# float" %}
 8326   ins_encode %{
 8327     __ movl($mem$$Address, jint_cast($src$$constant));
 8328   %}
 8329   ins_pipe(ialu_mem_imm);
 8330 %}
 8331 
 8332 // Store Double
 8333 instruct storeD(memory mem, regD src)
 8334 %{
 8335   match(Set mem (StoreD mem src));
 8336 
 8337   ins_cost(95); // XXX
 8338   format %{ "movsd   $mem, $src\t# double" %}
 8339   ins_encode %{
 8340     __ movdbl($mem$$Address, $src$$XMMRegister);
 8341   %}
 8342   ins_pipe(pipe_slow); // XXX
 8343 %}
 8344 
 8345 // Store immediate double 0.0 (it is faster than store from XMM register)
 8346 instruct storeD0_imm(memory mem, immD0 src)
 8347 %{
 8348   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8349   match(Set mem (StoreD mem src));
 8350 
 8351   ins_cost(50);
 8352   format %{ "movq    $mem, $src\t# double 0." %}
 8353   ins_encode %{
 8354     __ movq($mem$$Address, $src$$constant);
 8355   %}
 8356   ins_pipe(ialu_mem_imm);
 8357 %}
 8358 
 8359 instruct storeD0(memory mem, immD0 zero)
 8360 %{
 8361   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8362   match(Set mem (StoreD mem zero));
 8363 
 8364   ins_cost(25); // XXX
 8365   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8366   ins_encode %{
 8367     __ movq($mem$$Address, r12);
 8368   %}
 8369   ins_pipe(ialu_mem_reg);
 8370 %}
 8371 
 8372 instruct storeSSI(stackSlotI dst, rRegI src)
 8373 %{
 8374   match(Set dst src);
 8375 
 8376   ins_cost(100);
 8377   format %{ "movl    $dst, $src\t# int stk" %}
 8378   ins_encode %{
 8379     __ movl($dst$$Address, $src$$Register);
 8380   %}
 8381   ins_pipe( ialu_mem_reg );
 8382 %}
 8383 
 8384 instruct storeSSL(stackSlotL dst, rRegL src)
 8385 %{
 8386   match(Set dst src);
 8387 
 8388   ins_cost(100);
 8389   format %{ "movq    $dst, $src\t# long stk" %}
 8390   ins_encode %{
 8391     __ movq($dst$$Address, $src$$Register);
 8392   %}
 8393   ins_pipe(ialu_mem_reg);
 8394 %}
 8395 
 8396 instruct storeSSP(stackSlotP dst, rRegP src)
 8397 %{
 8398   match(Set dst src);
 8399 
 8400   ins_cost(100);
 8401   format %{ "movq    $dst, $src\t# ptr stk" %}
 8402   ins_encode %{
 8403     __ movq($dst$$Address, $src$$Register);
 8404   %}
 8405   ins_pipe(ialu_mem_reg);
 8406 %}
 8407 
 8408 instruct storeSSF(stackSlotF dst, regF src)
 8409 %{
 8410   match(Set dst src);
 8411 
 8412   ins_cost(95); // XXX
 8413   format %{ "movss   $dst, $src\t# float stk" %}
 8414   ins_encode %{
 8415     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8416   %}
 8417   ins_pipe(pipe_slow); // XXX
 8418 %}
 8419 
 8420 instruct storeSSD(stackSlotD dst, regD src)
 8421 %{
 8422   match(Set dst src);
 8423 
 8424   ins_cost(95); // XXX
 8425   format %{ "movsd   $dst, $src\t# double stk" %}
 8426   ins_encode %{
 8427     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8428   %}
 8429   ins_pipe(pipe_slow); // XXX
 8430 %}
 8431 
 8432 instruct cacheWB(indirect addr)
 8433 %{
 8434   predicate(VM_Version::supports_data_cache_line_flush());
 8435   match(CacheWB addr);
 8436 
 8437   ins_cost(100);
 8438   format %{"cache wb $addr" %}
 8439   ins_encode %{
 8440     assert($addr->index_position() < 0, "should be");
 8441     assert($addr$$disp == 0, "should be");
 8442     __ cache_wb(Address($addr$$base$$Register, 0));
 8443   %}
 8444   ins_pipe(pipe_slow); // XXX
 8445 %}
 8446 
 8447 instruct cacheWBPreSync()
 8448 %{
 8449   predicate(VM_Version::supports_data_cache_line_flush());
 8450   match(CacheWBPreSync);
 8451 
 8452   ins_cost(100);
 8453   format %{"cache wb presync" %}
 8454   ins_encode %{
 8455     __ cache_wbsync(true);
 8456   %}
 8457   ins_pipe(pipe_slow); // XXX
 8458 %}
 8459 
 8460 instruct cacheWBPostSync()
 8461 %{
 8462   predicate(VM_Version::supports_data_cache_line_flush());
 8463   match(CacheWBPostSync);
 8464 
 8465   ins_cost(100);
 8466   format %{"cache wb postsync" %}
 8467   ins_encode %{
 8468     __ cache_wbsync(false);
 8469   %}
 8470   ins_pipe(pipe_slow); // XXX
 8471 %}
 8472 
 8473 //----------BSWAP Instructions-------------------------------------------------
 8474 instruct bytes_reverse_int(rRegI dst) %{
 8475   match(Set dst (ReverseBytesI dst));
 8476 
 8477   format %{ "bswapl  $dst" %}
 8478   ins_encode %{
 8479     __ bswapl($dst$$Register);
 8480   %}
 8481   ins_pipe( ialu_reg );
 8482 %}
 8483 
 8484 instruct bytes_reverse_long(rRegL dst) %{
 8485   match(Set dst (ReverseBytesL dst));
 8486 
 8487   format %{ "bswapq  $dst" %}
 8488   ins_encode %{
 8489     __ bswapq($dst$$Register);
 8490   %}
 8491   ins_pipe( ialu_reg);
 8492 %}
 8493 
 8494 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8495   match(Set dst (ReverseBytesUS dst));
 8496   effect(KILL cr);
 8497 
 8498   format %{ "bswapl  $dst\n\t"
 8499             "shrl    $dst,16\n\t" %}
 8500   ins_encode %{
 8501     __ bswapl($dst$$Register);
 8502     __ shrl($dst$$Register, 16);
 8503   %}
 8504   ins_pipe( ialu_reg );
 8505 %}
 8506 
 8507 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8508   match(Set dst (ReverseBytesS dst));
 8509   effect(KILL cr);
 8510 
 8511   format %{ "bswapl  $dst\n\t"
 8512             "sar     $dst,16\n\t" %}
 8513   ins_encode %{
 8514     __ bswapl($dst$$Register);
 8515     __ sarl($dst$$Register, 16);
 8516   %}
 8517   ins_pipe( ialu_reg );
 8518 %}
 8519 
 8520 //---------- Zeros Count Instructions ------------------------------------------
 8521 
 8522 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8523   predicate(UseCountLeadingZerosInstruction);
 8524   match(Set dst (CountLeadingZerosI src));
 8525   effect(KILL cr);
 8526 
 8527   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8528   ins_encode %{
 8529     __ lzcntl($dst$$Register, $src$$Register);
 8530   %}
 8531   ins_pipe(ialu_reg);
 8532 %}
 8533 
 8534 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8535   predicate(UseCountLeadingZerosInstruction);
 8536   match(Set dst (CountLeadingZerosI (LoadI src)));
 8537   effect(KILL cr);
 8538   ins_cost(175);
 8539   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8540   ins_encode %{
 8541     __ lzcntl($dst$$Register, $src$$Address);
 8542   %}
 8543   ins_pipe(ialu_reg_mem);
 8544 %}
 8545 
 8546 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8547   predicate(!UseCountLeadingZerosInstruction);
 8548   match(Set dst (CountLeadingZerosI src));
 8549   effect(KILL cr);
 8550 
 8551   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8552             "jnz     skip\n\t"
 8553             "movl    $dst, -1\n"
 8554       "skip:\n\t"
 8555             "negl    $dst\n\t"
 8556             "addl    $dst, 31" %}
 8557   ins_encode %{
 8558     Register Rdst = $dst$$Register;
 8559     Register Rsrc = $src$$Register;
 8560     Label skip;
 8561     __ bsrl(Rdst, Rsrc);
 8562     __ jccb(Assembler::notZero, skip);
 8563     __ movl(Rdst, -1);
 8564     __ bind(skip);
 8565     __ negl(Rdst);
 8566     __ addl(Rdst, BitsPerInt - 1);
 8567   %}
 8568   ins_pipe(ialu_reg);
 8569 %}
 8570 
 8571 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8572   predicate(UseCountLeadingZerosInstruction);
 8573   match(Set dst (CountLeadingZerosL src));
 8574   effect(KILL cr);
 8575 
 8576   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8577   ins_encode %{
 8578     __ lzcntq($dst$$Register, $src$$Register);
 8579   %}
 8580   ins_pipe(ialu_reg);
 8581 %}
 8582 
 8583 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8584   predicate(UseCountLeadingZerosInstruction);
 8585   match(Set dst (CountLeadingZerosL (LoadL src)));
 8586   effect(KILL cr);
 8587   ins_cost(175);
 8588   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8589   ins_encode %{
 8590     __ lzcntq($dst$$Register, $src$$Address);
 8591   %}
 8592   ins_pipe(ialu_reg_mem);
 8593 %}
 8594 
 8595 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8596   predicate(!UseCountLeadingZerosInstruction);
 8597   match(Set dst (CountLeadingZerosL src));
 8598   effect(KILL cr);
 8599 
 8600   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8601             "jnz     skip\n\t"
 8602             "movl    $dst, -1\n"
 8603       "skip:\n\t"
 8604             "negl    $dst\n\t"
 8605             "addl    $dst, 63" %}
 8606   ins_encode %{
 8607     Register Rdst = $dst$$Register;
 8608     Register Rsrc = $src$$Register;
 8609     Label skip;
 8610     __ bsrq(Rdst, Rsrc);
 8611     __ jccb(Assembler::notZero, skip);
 8612     __ movl(Rdst, -1);
 8613     __ bind(skip);
 8614     __ negl(Rdst);
 8615     __ addl(Rdst, BitsPerLong - 1);
 8616   %}
 8617   ins_pipe(ialu_reg);
 8618 %}
 8619 
 8620 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8621   predicate(UseCountTrailingZerosInstruction);
 8622   match(Set dst (CountTrailingZerosI src));
 8623   effect(KILL cr);
 8624 
 8625   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8626   ins_encode %{
 8627     __ tzcntl($dst$$Register, $src$$Register);
 8628   %}
 8629   ins_pipe(ialu_reg);
 8630 %}
 8631 
 8632 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8633   predicate(UseCountTrailingZerosInstruction);
 8634   match(Set dst (CountTrailingZerosI (LoadI src)));
 8635   effect(KILL cr);
 8636   ins_cost(175);
 8637   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8638   ins_encode %{
 8639     __ tzcntl($dst$$Register, $src$$Address);
 8640   %}
 8641   ins_pipe(ialu_reg_mem);
 8642 %}
 8643 
 8644 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8645   predicate(!UseCountTrailingZerosInstruction);
 8646   match(Set dst (CountTrailingZerosI src));
 8647   effect(KILL cr);
 8648 
 8649   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8650             "jnz     done\n\t"
 8651             "movl    $dst, 32\n"
 8652       "done:" %}
 8653   ins_encode %{
 8654     Register Rdst = $dst$$Register;
 8655     Label done;
 8656     __ bsfl(Rdst, $src$$Register);
 8657     __ jccb(Assembler::notZero, done);
 8658     __ movl(Rdst, BitsPerInt);
 8659     __ bind(done);
 8660   %}
 8661   ins_pipe(ialu_reg);
 8662 %}
 8663 
 8664 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8665   predicate(UseCountTrailingZerosInstruction);
 8666   match(Set dst (CountTrailingZerosL src));
 8667   effect(KILL cr);
 8668 
 8669   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8670   ins_encode %{
 8671     __ tzcntq($dst$$Register, $src$$Register);
 8672   %}
 8673   ins_pipe(ialu_reg);
 8674 %}
 8675 
 8676 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8677   predicate(UseCountTrailingZerosInstruction);
 8678   match(Set dst (CountTrailingZerosL (LoadL src)));
 8679   effect(KILL cr);
 8680   ins_cost(175);
 8681   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8682   ins_encode %{
 8683     __ tzcntq($dst$$Register, $src$$Address);
 8684   %}
 8685   ins_pipe(ialu_reg_mem);
 8686 %}
 8687 
 8688 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8689   predicate(!UseCountTrailingZerosInstruction);
 8690   match(Set dst (CountTrailingZerosL src));
 8691   effect(KILL cr);
 8692 
 8693   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8694             "jnz     done\n\t"
 8695             "movl    $dst, 64\n"
 8696       "done:" %}
 8697   ins_encode %{
 8698     Register Rdst = $dst$$Register;
 8699     Label done;
 8700     __ bsfq(Rdst, $src$$Register);
 8701     __ jccb(Assembler::notZero, done);
 8702     __ movl(Rdst, BitsPerLong);
 8703     __ bind(done);
 8704   %}
 8705   ins_pipe(ialu_reg);
 8706 %}
 8707 
 8708 //--------------- Reverse Operation Instructions ----------------
 8709 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8710   predicate(!VM_Version::supports_gfni());
 8711   match(Set dst (ReverseI src));
 8712   effect(TEMP dst, TEMP rtmp, KILL cr);
 8713   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8714   ins_encode %{
 8715     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8716   %}
 8717   ins_pipe( ialu_reg );
 8718 %}
 8719 
 8720 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8721   predicate(VM_Version::supports_gfni());
 8722   match(Set dst (ReverseI src));
 8723   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8724   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8725   ins_encode %{
 8726     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8727   %}
 8728   ins_pipe( ialu_reg );
 8729 %}
 8730 
 8731 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8732   predicate(!VM_Version::supports_gfni());
 8733   match(Set dst (ReverseL src));
 8734   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8735   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8736   ins_encode %{
 8737     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8738   %}
 8739   ins_pipe( ialu_reg );
 8740 %}
 8741 
 8742 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8743   predicate(VM_Version::supports_gfni());
 8744   match(Set dst (ReverseL src));
 8745   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8746   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8747   ins_encode %{
 8748     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8749   %}
 8750   ins_pipe( ialu_reg );
 8751 %}
 8752 
 8753 //---------- Population Count Instructions -------------------------------------
 8754 
 8755 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8756   predicate(UsePopCountInstruction);
 8757   match(Set dst (PopCountI src));
 8758   effect(KILL cr);
 8759 
 8760   format %{ "popcnt  $dst, $src" %}
 8761   ins_encode %{
 8762     __ popcntl($dst$$Register, $src$$Register);
 8763   %}
 8764   ins_pipe(ialu_reg);
 8765 %}
 8766 
 8767 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8768   predicate(UsePopCountInstruction);
 8769   match(Set dst (PopCountI (LoadI mem)));
 8770   effect(KILL cr);
 8771 
 8772   format %{ "popcnt  $dst, $mem" %}
 8773   ins_encode %{
 8774     __ popcntl($dst$$Register, $mem$$Address);
 8775   %}
 8776   ins_pipe(ialu_reg);
 8777 %}
 8778 
 8779 // Note: Long.bitCount(long) returns an int.
 8780 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8781   predicate(UsePopCountInstruction);
 8782   match(Set dst (PopCountL src));
 8783   effect(KILL cr);
 8784 
 8785   format %{ "popcnt  $dst, $src" %}
 8786   ins_encode %{
 8787     __ popcntq($dst$$Register, $src$$Register);
 8788   %}
 8789   ins_pipe(ialu_reg);
 8790 %}
 8791 
 8792 // Note: Long.bitCount(long) returns an int.
 8793 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8794   predicate(UsePopCountInstruction);
 8795   match(Set dst (PopCountL (LoadL mem)));
 8796   effect(KILL cr);
 8797 
 8798   format %{ "popcnt  $dst, $mem" %}
 8799   ins_encode %{
 8800     __ popcntq($dst$$Register, $mem$$Address);
 8801   %}
 8802   ins_pipe(ialu_reg);
 8803 %}
 8804 
 8805 
 8806 //----------MemBar Instructions-----------------------------------------------
 8807 // Memory barrier flavors
 8808 
 8809 instruct membar_acquire()
 8810 %{
 8811   match(MemBarAcquire);
 8812   match(LoadFence);
 8813   ins_cost(0);
 8814 
 8815   size(0);
 8816   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8817   ins_encode();
 8818   ins_pipe(empty);
 8819 %}
 8820 
 8821 instruct membar_acquire_lock()
 8822 %{
 8823   match(MemBarAcquireLock);
 8824   ins_cost(0);
 8825 
 8826   size(0);
 8827   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8828   ins_encode();
 8829   ins_pipe(empty);
 8830 %}
 8831 
 8832 instruct membar_release()
 8833 %{
 8834   match(MemBarRelease);
 8835   match(StoreFence);
 8836   ins_cost(0);
 8837 
 8838   size(0);
 8839   format %{ "MEMBAR-release ! (empty encoding)" %}
 8840   ins_encode();
 8841   ins_pipe(empty);
 8842 %}
 8843 
 8844 instruct membar_release_lock()
 8845 %{
 8846   match(MemBarReleaseLock);
 8847   ins_cost(0);
 8848 
 8849   size(0);
 8850   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8851   ins_encode();
 8852   ins_pipe(empty);
 8853 %}
 8854 
 8855 instruct membar_storeload(rFlagsReg cr) %{
 8856   match(MemBarStoreLoad);
 8857   effect(KILL cr);
 8858   ins_cost(400);
 8859 
 8860   format %{
 8861     $$template
 8862     $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
 8863   %}
 8864   ins_encode %{
 8865     __ membar(Assembler::StoreLoad);
 8866   %}
 8867   ins_pipe(pipe_slow);
 8868 %}
 8869 
 8870 instruct membar_volatile(rFlagsReg cr) %{
 8871   match(MemBarVolatile);
 8872   effect(KILL cr);
 8873   ins_cost(400);
 8874 
 8875   format %{
 8876     $$template
 8877     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8878   %}
 8879   ins_encode %{
 8880     __ membar(Assembler::StoreLoad);
 8881   %}
 8882   ins_pipe(pipe_slow);
 8883 %}
 8884 
 8885 instruct unnecessary_membar_volatile()
 8886 %{
 8887   match(MemBarVolatile);
 8888   predicate(Matcher::post_store_load_barrier(n));
 8889   ins_cost(0);
 8890 
 8891   size(0);
 8892   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8893   ins_encode();
 8894   ins_pipe(empty);
 8895 %}
 8896 
 8897 instruct membar_full(rFlagsReg cr) %{
 8898   match(MemBarFull);
 8899   effect(KILL cr);
 8900   ins_cost(400);
 8901 
 8902   format %{
 8903     $$template
 8904     $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
 8905   %}
 8906   ins_encode %{
 8907     __ membar(Assembler::StoreLoad);
 8908   %}
 8909   ins_pipe(pipe_slow);
 8910 %}
 8911 
 8912 instruct membar_storestore() %{
 8913   match(MemBarStoreStore);
 8914   match(StoreStoreFence);
 8915   ins_cost(0);
 8916 
 8917   size(0);
 8918   format %{ "MEMBAR-storestore (empty encoding)" %}
 8919   ins_encode( );
 8920   ins_pipe(empty);
 8921 %}
 8922 
 8923 //----------Move Instructions--------------------------------------------------
 8924 
 8925 instruct castX2P(rRegP dst, rRegL src)
 8926 %{
 8927   match(Set dst (CastX2P src));
 8928 
 8929   format %{ "movq    $dst, $src\t# long->ptr" %}
 8930   ins_encode %{
 8931     if ($dst$$reg != $src$$reg) {
 8932       __ movptr($dst$$Register, $src$$Register);
 8933     }
 8934   %}
 8935   ins_pipe(ialu_reg_reg); // XXX
 8936 %}
 8937 
 8938 instruct castP2X(rRegL dst, rRegP src)
 8939 %{
 8940   match(Set dst (CastP2X src));
 8941 
 8942   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8943   ins_encode %{
 8944     if ($dst$$reg != $src$$reg) {
 8945       __ movptr($dst$$Register, $src$$Register);
 8946     }
 8947   %}
 8948   ins_pipe(ialu_reg_reg); // XXX
 8949 %}
 8950 
 8951 // Convert oop into int for vectors alignment masking
 8952 instruct convP2I(rRegI dst, rRegP src)
 8953 %{
 8954   match(Set dst (ConvL2I (CastP2X src)));
 8955 
 8956   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8957   ins_encode %{
 8958     __ movl($dst$$Register, $src$$Register);
 8959   %}
 8960   ins_pipe(ialu_reg_reg); // XXX
 8961 %}
 8962 
 8963 // Convert compressed oop into int for vectors alignment masking
 8964 // in case of 32bit oops (heap < 4Gb).
 8965 instruct convN2I(rRegI dst, rRegN src)
 8966 %{
 8967   predicate(CompressedOops::shift() == 0);
 8968   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 8969 
 8970   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 8971   ins_encode %{
 8972     __ movl($dst$$Register, $src$$Register);
 8973   %}
 8974   ins_pipe(ialu_reg_reg); // XXX
 8975 %}
 8976 
 8977 // Convert oop pointer into compressed form
 8978 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 8979   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 8980   match(Set dst (EncodeP src));
 8981   effect(KILL cr);
 8982   format %{ "encode_heap_oop $dst,$src" %}
 8983   ins_encode %{
 8984     Register s = $src$$Register;
 8985     Register d = $dst$$Register;
 8986     if (s != d) {
 8987       __ movq(d, s);
 8988     }
 8989     __ encode_heap_oop(d);
 8990   %}
 8991   ins_pipe(ialu_reg_long);
 8992 %}
 8993 
 8994 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8995   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 8996   match(Set dst (EncodeP src));
 8997   effect(KILL cr);
 8998   format %{ "encode_heap_oop_not_null $dst,$src" %}
 8999   ins_encode %{
 9000     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 9001   %}
 9002   ins_pipe(ialu_reg_long);
 9003 %}
 9004 
 9005 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 9006   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 9007             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 9008   match(Set dst (DecodeN src));
 9009   effect(KILL cr);
 9010   format %{ "decode_heap_oop $dst,$src" %}
 9011   ins_encode %{
 9012     Register s = $src$$Register;
 9013     Register d = $dst$$Register;
 9014     if (s != d) {
 9015       __ movq(d, s);
 9016     }
 9017     __ decode_heap_oop(d);
 9018   %}
 9019   ins_pipe(ialu_reg_long);
 9020 %}
 9021 
 9022 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9023   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 9024             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 9025   match(Set dst (DecodeN src));
 9026   effect(KILL cr);
 9027   format %{ "decode_heap_oop_not_null $dst,$src" %}
 9028   ins_encode %{
 9029     Register s = $src$$Register;
 9030     Register d = $dst$$Register;
 9031     if (s != d) {
 9032       __ decode_heap_oop_not_null(d, s);
 9033     } else {
 9034       __ decode_heap_oop_not_null(d);
 9035     }
 9036   %}
 9037   ins_pipe(ialu_reg_long);
 9038 %}
 9039 
 9040 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9041   match(Set dst (EncodePKlass src));
 9042   effect(TEMP dst, KILL cr);
 9043   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 9044   ins_encode %{
 9045     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9046   %}
 9047   ins_pipe(ialu_reg_long);
 9048 %}
 9049 
 9050 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9051   match(Set dst (DecodeNKlass src));
 9052   effect(TEMP dst, KILL cr);
 9053   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 9054   ins_encode %{
 9055     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9056   %}
 9057   ins_pipe(ialu_reg_long);
 9058 %}
 9059 
 9060 //----------Conditional Move---------------------------------------------------
 9061 // Jump
 9062 // dummy instruction for generating temp registers
 9063 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 9064   match(Jump (LShiftL switch_val shift));
 9065   ins_cost(350);
 9066   predicate(false);
 9067   effect(TEMP dest);
 9068 
 9069   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9070             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 9071   ins_encode %{
 9072     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9073     // to do that and the compiler is using that register as one it can allocate.
 9074     // So we build it all by hand.
 9075     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 9076     // ArrayAddress dispatch(table, index);
 9077     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 9078     __ lea($dest$$Register, $constantaddress);
 9079     __ jmp(dispatch);
 9080   %}
 9081   ins_pipe(pipe_jmp);
 9082 %}
 9083 
 9084 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 9085   match(Jump (AddL (LShiftL switch_val shift) offset));
 9086   ins_cost(350);
 9087   effect(TEMP dest);
 9088 
 9089   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9090             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 9091   ins_encode %{
 9092     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9093     // to do that and the compiler is using that register as one it can allocate.
 9094     // So we build it all by hand.
 9095     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9096     // ArrayAddress dispatch(table, index);
 9097     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9098     __ lea($dest$$Register, $constantaddress);
 9099     __ jmp(dispatch);
 9100   %}
 9101   ins_pipe(pipe_jmp);
 9102 %}
 9103 
 9104 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 9105   match(Jump switch_val);
 9106   ins_cost(350);
 9107   effect(TEMP dest);
 9108 
 9109   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9110             "jmp     [$dest + $switch_val]\n\t" %}
 9111   ins_encode %{
 9112     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9113     // to do that and the compiler is using that register as one it can allocate.
 9114     // So we build it all by hand.
 9115     // Address index(noreg, switch_reg, Address::times_1);
 9116     // ArrayAddress dispatch(table, index);
 9117     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 9118     __ lea($dest$$Register, $constantaddress);
 9119     __ jmp(dispatch);
 9120   %}
 9121   ins_pipe(pipe_jmp);
 9122 %}
 9123 
 9124 // Conditional move
 9125 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 9126 %{
 9127   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9128   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9129 
 9130   ins_cost(100); // XXX
 9131   format %{ "setbn$cop $dst\t# signed, int" %}
 9132   ins_encode %{
 9133     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9134     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9135   %}
 9136   ins_pipe(ialu_reg);
 9137 %}
 9138 
 9139 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 9140 %{
 9141   predicate(!UseAPX);
 9142   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9143 
 9144   ins_cost(200); // XXX
 9145   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9146   ins_encode %{
 9147     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9148   %}
 9149   ins_pipe(pipe_cmov_reg);
 9150 %}
 9151 
 9152 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 9153 %{
 9154   predicate(UseAPX);
 9155   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9156 
 9157   ins_cost(200);
 9158   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9159   ins_encode %{
 9160     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9161   %}
 9162   ins_pipe(pipe_cmov_reg);
 9163 %}
 9164 
 9165 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 9166 %{
 9167   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9168   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9169 
 9170   ins_cost(100); // XXX
 9171   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9172   ins_encode %{
 9173     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9174     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9175   %}
 9176   ins_pipe(ialu_reg);
 9177 %}
 9178 
 9179 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9180   predicate(!UseAPX);
 9181   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9182 
 9183   ins_cost(200); // XXX
 9184   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9185   ins_encode %{
 9186     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9187   %}
 9188   ins_pipe(pipe_cmov_reg);
 9189 %}
 9190 
 9191 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9192   predicate(UseAPX);
 9193   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9194 
 9195   ins_cost(200);
 9196   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9197   ins_encode %{
 9198     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9199   %}
 9200   ins_pipe(pipe_cmov_reg);
 9201 %}
 9202 
 9203 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9204 %{
 9205   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9206   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9207 
 9208   ins_cost(100); // XXX
 9209   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9210   ins_encode %{
 9211     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9212     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9213   %}
 9214   ins_pipe(ialu_reg);
 9215 %}
 9216 
 9217 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9218 %{
 9219   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9220   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9221 
 9222   ins_cost(100); // XXX
 9223   format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
 9224   ins_encode %{
 9225     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9226     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9227   %}
 9228   ins_pipe(ialu_reg);
 9229 %}
 9230 
 9231 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9232   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9233 
 9234   ins_cost(200);
 9235   expand %{
 9236     cmovI_regU(cop, cr, dst, src);
 9237   %}
 9238 %}
 9239 
 9240 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
 9241   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9242 
 9243   ins_cost(200);
 9244   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9245   ins_encode %{
 9246     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9247   %}
 9248   ins_pipe(pipe_cmov_reg);
 9249 %}
 9250 
 9251 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9252   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9253   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9254 
 9255   ins_cost(200); // XXX
 9256   format %{ "cmovpl  $dst, $src\n\t"
 9257             "cmovnel $dst, $src" %}
 9258   ins_encode %{
 9259     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9260     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9261   %}
 9262   ins_pipe(pipe_cmov_reg);
 9263 %}
 9264 
 9265 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9266 // inputs of the CMove
 9267 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9268   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9269   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9270   effect(TEMP dst);
 9271 
 9272   ins_cost(200); // XXX
 9273   format %{ "cmovpl  $dst, $src\n\t"
 9274             "cmovnel $dst, $src" %}
 9275   ins_encode %{
 9276     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9277     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9278   %}
 9279   ins_pipe(pipe_cmov_reg);
 9280 %}
 9281 
 9282 // Conditional move
 9283 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9284   predicate(!UseAPX);
 9285   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9286 
 9287   ins_cost(250); // XXX
 9288   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9289   ins_encode %{
 9290     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9291   %}
 9292   ins_pipe(pipe_cmov_mem);
 9293 %}
 9294 
 9295 // Conditional move
 9296 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
 9297 %{
 9298   predicate(UseAPX);
 9299   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9300 
 9301   ins_cost(250);
 9302   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9303   ins_encode %{
 9304     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9305   %}
 9306   ins_pipe(pipe_cmov_mem);
 9307 %}
 9308 
 9309 // Conditional move
 9310 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9311 %{
 9312   predicate(!UseAPX);
 9313   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9314 
 9315   ins_cost(250); // XXX
 9316   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9317   ins_encode %{
 9318     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9319   %}
 9320   ins_pipe(pipe_cmov_mem);
 9321 %}
 9322 
 9323 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9324   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9325 
 9326   ins_cost(250);
 9327   expand %{
 9328     cmovI_memU(cop, cr, dst, src);
 9329   %}
 9330 %}
 9331 
 9332 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
 9333 %{
 9334   predicate(UseAPX);
 9335   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9336 
 9337   ins_cost(250);
 9338   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9339   ins_encode %{
 9340     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9341   %}
 9342   ins_pipe(pipe_cmov_mem);
 9343 %}
 9344 
 9345 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
 9346 %{
 9347   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9348 
 9349   ins_cost(250);
 9350   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9351   ins_encode %{
 9352     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9353   %}
 9354   ins_pipe(pipe_cmov_mem);
 9355 %}
 9356 
 9357 // Conditional move
 9358 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9359 %{
 9360   predicate(!UseAPX);
 9361   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9362 
 9363   ins_cost(200); // XXX
 9364   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9365   ins_encode %{
 9366     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9367   %}
 9368   ins_pipe(pipe_cmov_reg);
 9369 %}
 9370 
 9371 // Conditional move ndd
 9372 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9373 %{
 9374   predicate(UseAPX);
 9375   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9376 
 9377   ins_cost(200);
 9378   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9379   ins_encode %{
 9380     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9381   %}
 9382   ins_pipe(pipe_cmov_reg);
 9383 %}
 9384 
 9385 // Conditional move
 9386 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9387 %{
 9388   predicate(!UseAPX);
 9389   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9390 
 9391   ins_cost(200); // XXX
 9392   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9393   ins_encode %{
 9394     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9395   %}
 9396   ins_pipe(pipe_cmov_reg);
 9397 %}
 9398 
 9399 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9400   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9401 
 9402   ins_cost(200);
 9403   expand %{
 9404     cmovN_regU(cop, cr, dst, src);
 9405   %}
 9406 %}
 9407 
 9408 // Conditional move ndd
 9409 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9410 %{
 9411   predicate(UseAPX);
 9412   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9413 
 9414   ins_cost(200);
 9415   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9416   ins_encode %{
 9417     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9418   %}
 9419   ins_pipe(pipe_cmov_reg);
 9420 %}
 9421 
 9422 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
 9423   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9424 
 9425   ins_cost(200);
 9426   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
 9427   ins_encode %{
 9428     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9429   %}
 9430   ins_pipe(pipe_cmov_reg);
 9431 %}
 9432 
 9433 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9434   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9435   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9436 
 9437   ins_cost(200); // XXX
 9438   format %{ "cmovpl  $dst, $src\n\t"
 9439             "cmovnel $dst, $src" %}
 9440   ins_encode %{
 9441     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9442     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9443   %}
 9444   ins_pipe(pipe_cmov_reg);
 9445 %}
 9446 
 9447 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9448 // inputs of the CMove
 9449 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9450   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9451   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9452 
 9453   ins_cost(200); // XXX
 9454   format %{ "cmovpl  $dst, $src\n\t"
 9455             "cmovnel $dst, $src" %}
 9456   ins_encode %{
 9457     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9458     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9459   %}
 9460   ins_pipe(pipe_cmov_reg);
 9461 %}
 9462 
 9463 // Conditional move
 9464 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9465 %{
 9466   predicate(!UseAPX);
 9467   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9468 
 9469   ins_cost(200); // XXX
 9470   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9471   ins_encode %{
 9472     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9473   %}
 9474   ins_pipe(pipe_cmov_reg);  // XXX
 9475 %}
 9476 
 9477 // Conditional move ndd
 9478 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9479 %{
 9480   predicate(UseAPX);
 9481   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9482 
 9483   ins_cost(200);
 9484   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9485   ins_encode %{
 9486     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9487   %}
 9488   ins_pipe(pipe_cmov_reg);
 9489 %}
 9490 
 9491 // Conditional move
 9492 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9493 %{
 9494   predicate(!UseAPX);
 9495   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9496 
 9497   ins_cost(200); // XXX
 9498   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9499   ins_encode %{
 9500     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9501   %}
 9502   ins_pipe(pipe_cmov_reg); // XXX
 9503 %}
 9504 
 9505 // Conditional move ndd
 9506 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9507 %{
 9508   predicate(UseAPX);
 9509   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9510 
 9511   ins_cost(200);
 9512   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9513   ins_encode %{
 9514     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9515   %}
 9516   ins_pipe(pipe_cmov_reg);
 9517 %}
 9518 
 9519 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9520   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9521 
 9522   ins_cost(200);
 9523   expand %{
 9524     cmovP_regU(cop, cr, dst, src);
 9525   %}
 9526 %}
 9527 
 9528 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
 9529   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9530 
 9531   ins_cost(200);
 9532   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
 9533   ins_encode %{
 9534     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9535   %}
 9536   ins_pipe(pipe_cmov_reg);
 9537 %}
 9538 
 9539 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9540   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9541   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9542 
 9543   ins_cost(200); // XXX
 9544   format %{ "cmovpq  $dst, $src\n\t"
 9545             "cmovneq $dst, $src" %}
 9546   ins_encode %{
 9547     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9548     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9549   %}
 9550   ins_pipe(pipe_cmov_reg);
 9551 %}
 9552 
 9553 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9554 // inputs of the CMove
 9555 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9556   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9557   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9558 
 9559   ins_cost(200); // XXX
 9560   format %{ "cmovpq  $dst, $src\n\t"
 9561             "cmovneq $dst, $src" %}
 9562   ins_encode %{
 9563     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9564     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9565   %}
 9566   ins_pipe(pipe_cmov_reg);
 9567 %}
 9568 
 9569 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9570 %{
 9571   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9572   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9573 
 9574   ins_cost(100); // XXX
 9575   format %{ "setbn$cop $dst\t# signed, long" %}
 9576   ins_encode %{
 9577     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9578     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9579   %}
 9580   ins_pipe(ialu_reg);
 9581 %}
 9582 
 9583 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9584 %{
 9585   predicate(!UseAPX);
 9586   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9587 
 9588   ins_cost(200); // XXX
 9589   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9590   ins_encode %{
 9591     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9592   %}
 9593   ins_pipe(pipe_cmov_reg);  // XXX
 9594 %}
 9595 
 9596 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9597 %{
 9598   predicate(UseAPX);
 9599   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9600 
 9601   ins_cost(200);
 9602   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9603   ins_encode %{
 9604     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9605   %}
 9606   ins_pipe(pipe_cmov_reg);
 9607 %}
 9608 
 9609 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9610 %{
 9611   predicate(!UseAPX);
 9612   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9613 
 9614   ins_cost(200); // XXX
 9615   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9616   ins_encode %{
 9617     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9618   %}
 9619   ins_pipe(pipe_cmov_mem);  // XXX
 9620 %}
 9621 
 9622 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
 9623 %{
 9624   predicate(UseAPX);
 9625   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9626 
 9627   ins_cost(200);
 9628   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9629   ins_encode %{
 9630     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9631   %}
 9632   ins_pipe(pipe_cmov_mem);
 9633 %}
 9634 
 9635 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9636 %{
 9637   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9638   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9639 
 9640   ins_cost(100); // XXX
 9641   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9642   ins_encode %{
 9643     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9644     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9645   %}
 9646   ins_pipe(ialu_reg);
 9647 %}
 9648 
 9649 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9650 %{
 9651   predicate(!UseAPX);
 9652   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9653 
 9654   ins_cost(200); // XXX
 9655   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9656   ins_encode %{
 9657     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9658   %}
 9659   ins_pipe(pipe_cmov_reg); // XXX
 9660 %}
 9661 
 9662 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9663 %{
 9664   predicate(UseAPX);
 9665   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9666 
 9667   ins_cost(200);
 9668   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9669   ins_encode %{
 9670     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9671   %}
 9672   ins_pipe(pipe_cmov_reg);
 9673 %}
 9674 
 9675 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9676 %{
 9677   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9678   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9679 
 9680   ins_cost(100); // XXX
 9681   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9682   ins_encode %{
 9683     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9684     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9685   %}
 9686   ins_pipe(ialu_reg);
 9687 %}
 9688 
 9689 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9690 %{
 9691   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9692   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9693 
 9694   ins_cost(100); // XXX
 9695   format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
 9696   ins_encode %{
 9697     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9698     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9699   %}
 9700   ins_pipe(ialu_reg);
 9701 %}
 9702 
 9703 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9704   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9705 
 9706   ins_cost(200);
 9707   expand %{
 9708     cmovL_regU(cop, cr, dst, src);
 9709   %}
 9710 %}
 9711 
 9712 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
 9713 %{
 9714   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9715 
 9716   ins_cost(200);
 9717   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9718   ins_encode %{
 9719     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9720   %}
 9721   ins_pipe(pipe_cmov_reg);
 9722 %}
 9723 
 9724 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9725   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9726   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9727 
 9728   ins_cost(200); // XXX
 9729   format %{ "cmovpq  $dst, $src\n\t"
 9730             "cmovneq $dst, $src" %}
 9731   ins_encode %{
 9732     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9733     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9734   %}
 9735   ins_pipe(pipe_cmov_reg);
 9736 %}
 9737 
 9738 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9739 // inputs of the CMove
 9740 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9741   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9742   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9743 
 9744   ins_cost(200); // XXX
 9745   format %{ "cmovpq  $dst, $src\n\t"
 9746             "cmovneq $dst, $src" %}
 9747   ins_encode %{
 9748     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9749     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9750   %}
 9751   ins_pipe(pipe_cmov_reg);
 9752 %}
 9753 
 9754 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9755 %{
 9756   predicate(!UseAPX);
 9757   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9758 
 9759   ins_cost(200); // XXX
 9760   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9761   ins_encode %{
 9762     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9763   %}
 9764   ins_pipe(pipe_cmov_mem); // XXX
 9765 %}
 9766 
 9767 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9768   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9769 
 9770   ins_cost(200);
 9771   expand %{
 9772     cmovL_memU(cop, cr, dst, src);
 9773   %}
 9774 %}
 9775 
 9776 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
 9777 %{
 9778   predicate(UseAPX);
 9779   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9780 
 9781   ins_cost(200);
 9782   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9783   ins_encode %{
 9784     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9785   %}
 9786   ins_pipe(pipe_cmov_mem);
 9787 %}
 9788 
 9789 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
 9790 %{
 9791   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9792 
 9793   ins_cost(200);
 9794   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9795   ins_encode %{
 9796     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9797   %}
 9798   ins_pipe(pipe_cmov_mem);
 9799 %}
 9800 
 9801 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9802 %{
 9803   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9804 
 9805   ins_cost(200); // XXX
 9806   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9807             "movss     $dst, $src\n"
 9808     "skip:" %}
 9809   ins_encode %{
 9810     Label Lskip;
 9811     // Invert sense of branch from sense of CMOV
 9812     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9813     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9814     __ bind(Lskip);
 9815   %}
 9816   ins_pipe(pipe_slow);
 9817 %}
 9818 
 9819 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9820 %{
 9821   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9822 
 9823   ins_cost(200); // XXX
 9824   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9825             "movss     $dst, $src\n"
 9826     "skip:" %}
 9827   ins_encode %{
 9828     Label Lskip;
 9829     // Invert sense of branch from sense of CMOV
 9830     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9831     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9832     __ bind(Lskip);
 9833   %}
 9834   ins_pipe(pipe_slow);
 9835 %}
 9836 
 9837 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9838   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9839 
 9840   ins_cost(200);
 9841   expand %{
 9842     cmovF_regU(cop, cr, dst, src);
 9843   %}
 9844 %}
 9845 
 9846 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
 9847 %{
 9848   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9849 
 9850   ins_cost(200); // XXX
 9851   format %{ "jn$cop    skip\t# signed, unsigned cmove float\n\t"
 9852             "movss     $dst, $src\n"
 9853     "skip:" %}
 9854   ins_encode %{
 9855     Label Lskip;
 9856     // Invert sense of branch from sense of CMOV
 9857     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9858     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9859     __ bind(Lskip);
 9860   %}
 9861   ins_pipe(pipe_slow);
 9862 %}
 9863 
 9864 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9865 %{
 9866   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9867 
 9868   ins_cost(200); // XXX
 9869   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9870             "movsd     $dst, $src\n"
 9871     "skip:" %}
 9872   ins_encode %{
 9873     Label Lskip;
 9874     // Invert sense of branch from sense of CMOV
 9875     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9876     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9877     __ bind(Lskip);
 9878   %}
 9879   ins_pipe(pipe_slow);
 9880 %}
 9881 
 9882 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9883 %{
 9884   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9885 
 9886   ins_cost(200); // XXX
 9887   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9888             "movsd     $dst, $src\n"
 9889     "skip:" %}
 9890   ins_encode %{
 9891     Label Lskip;
 9892     // Invert sense of branch from sense of CMOV
 9893     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9894     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9895     __ bind(Lskip);
 9896   %}
 9897   ins_pipe(pipe_slow);
 9898 %}
 9899 
 9900 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9901   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9902 
 9903   ins_cost(200);
 9904   expand %{
 9905     cmovD_regU(cop, cr, dst, src);
 9906   %}
 9907 %}
 9908 
 9909 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
 9910 %{
 9911   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9912 
 9913   ins_cost(200); // XXX
 9914   format %{ "jn$cop    skip\t# signed, unsigned cmove double\n\t"
 9915             "movsd     $dst, $src\n"
 9916     "skip:" %}
 9917   ins_encode %{
 9918     Label Lskip;
 9919     // Invert sense of branch from sense of CMOV
 9920     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9921     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9922     __ bind(Lskip);
 9923   %}
 9924   ins_pipe(pipe_slow);
 9925 %}
 9926 
 9927 //----------Arithmetic Instructions--------------------------------------------
 9928 //----------Addition Instructions----------------------------------------------
 9929 
 9930 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9931 %{
 9932   predicate(!UseAPX);
 9933   match(Set dst (AddI dst src));
 9934   effect(KILL cr);
 9935   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9936   format %{ "addl    $dst, $src\t# int" %}
 9937   ins_encode %{
 9938     __ addl($dst$$Register, $src$$Register);
 9939   %}
 9940   ins_pipe(ialu_reg_reg);
 9941 %}
 9942 
 9943 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
 9944 %{
 9945   predicate(UseAPX);
 9946   match(Set dst (AddI src1 src2));
 9947   effect(KILL cr);
 9948   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
 9949 
 9950   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9951   ins_encode %{
 9952     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
 9953   %}
 9954   ins_pipe(ialu_reg_reg);
 9955 %}
 9956 
 9957 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9958 %{
 9959   predicate(!UseAPX);
 9960   match(Set dst (AddI dst src));
 9961   effect(KILL cr);
 9962   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9963 
 9964   format %{ "addl    $dst, $src\t# int" %}
 9965   ins_encode %{
 9966     __ addl($dst$$Register, $src$$constant);
 9967   %}
 9968   ins_pipe( ialu_reg );
 9969 %}
 9970 
 9971 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
 9972 %{
 9973   predicate(UseAPX);
 9974   match(Set dst (AddI src1 src2));
 9975   effect(KILL cr);
 9976   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
 9977 
 9978   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9979   ins_encode %{
 9980     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
 9981   %}
 9982   ins_pipe( ialu_reg );
 9983 %}
 9984 
 9985 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
 9986 %{
 9987   predicate(UseAPX);
 9988   match(Set dst (AddI (LoadI src1) src2));
 9989   effect(KILL cr);
 9990   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9991 
 9992   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9993   ins_encode %{
 9994     __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
 9995   %}
 9996   ins_pipe( ialu_reg );
 9997 %}
 9998 
 9999 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10000 %{
10001   predicate(!UseAPX);
10002   match(Set dst (AddI dst (LoadI src)));
10003   effect(KILL cr);
10004   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10005 
10006   ins_cost(150); // XXX
10007   format %{ "addl    $dst, $src\t# int" %}
10008   ins_encode %{
10009     __ addl($dst$$Register, $src$$Address);
10010   %}
10011   ins_pipe(ialu_reg_mem);
10012 %}
10013 
10014 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10015 %{
10016   predicate(UseAPX);
10017   match(Set dst (AddI src1 (LoadI src2)));
10018   effect(KILL cr);
10019   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10020 
10021   ins_cost(150);
10022   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10023   ins_encode %{
10024     __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10025   %}
10026   ins_pipe(ialu_reg_mem);
10027 %}
10028 
10029 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10030 %{
10031   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10032   effect(KILL cr);
10033   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10034 
10035   ins_cost(150); // XXX
10036   format %{ "addl    $dst, $src\t# int" %}
10037   ins_encode %{
10038     __ addl($dst$$Address, $src$$Register);
10039   %}
10040   ins_pipe(ialu_mem_reg);
10041 %}
10042 
10043 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10044 %{
10045   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10046   effect(KILL cr);
10047   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10048 
10049 
10050   ins_cost(125); // XXX
10051   format %{ "addl    $dst, $src\t# int" %}
10052   ins_encode %{
10053     __ addl($dst$$Address, $src$$constant);
10054   %}
10055   ins_pipe(ialu_mem_imm);
10056 %}
10057 
10058 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10059 %{
10060   predicate(!UseAPX && UseIncDec);
10061   match(Set dst (AddI dst src));
10062   effect(KILL cr);
10063 
10064   format %{ "incl    $dst\t# int" %}
10065   ins_encode %{
10066     __ incrementl($dst$$Register);
10067   %}
10068   ins_pipe(ialu_reg);
10069 %}
10070 
10071 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10072 %{
10073   predicate(UseAPX && UseIncDec);
10074   match(Set dst (AddI src val));
10075   effect(KILL cr);
10076   flag(PD::Flag_ndd_demotable_opr1);
10077 
10078   format %{ "eincl    $dst, $src\t# int ndd" %}
10079   ins_encode %{
10080     __ eincl($dst$$Register, $src$$Register, false);
10081   %}
10082   ins_pipe(ialu_reg);
10083 %}
10084 
10085 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10086 %{
10087   predicate(UseAPX && UseIncDec);
10088   match(Set dst (AddI (LoadI src) val));
10089   effect(KILL cr);
10090 
10091   format %{ "eincl    $dst, $src\t# int ndd" %}
10092   ins_encode %{
10093     __ eincl($dst$$Register, $src$$Address, false);
10094   %}
10095   ins_pipe(ialu_reg);
10096 %}
10097 
10098 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10099 %{
10100   predicate(UseIncDec);
10101   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10102   effect(KILL cr);
10103 
10104   ins_cost(125); // XXX
10105   format %{ "incl    $dst\t# int" %}
10106   ins_encode %{
10107     __ incrementl($dst$$Address);
10108   %}
10109   ins_pipe(ialu_mem_imm);
10110 %}
10111 
10112 // XXX why does that use AddI
10113 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10114 %{
10115   predicate(!UseAPX && UseIncDec);
10116   match(Set dst (AddI dst src));
10117   effect(KILL cr);
10118 
10119   format %{ "decl    $dst\t# int" %}
10120   ins_encode %{
10121     __ decrementl($dst$$Register);
10122   %}
10123   ins_pipe(ialu_reg);
10124 %}
10125 
10126 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10127 %{
10128   predicate(UseAPX && UseIncDec);
10129   match(Set dst (AddI src val));
10130   effect(KILL cr);
10131   flag(PD::Flag_ndd_demotable_opr1);
10132 
10133   format %{ "edecl    $dst, $src\t# int ndd" %}
10134   ins_encode %{
10135     __ edecl($dst$$Register, $src$$Register, false);
10136   %}
10137   ins_pipe(ialu_reg);
10138 %}
10139 
10140 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10141 %{
10142   predicate(UseAPX && UseIncDec);
10143   match(Set dst (AddI (LoadI src) val));
10144   effect(KILL cr);
10145 
10146   format %{ "edecl    $dst, $src\t# int ndd" %}
10147   ins_encode %{
10148     __ edecl($dst$$Register, $src$$Address, false);
10149   %}
10150   ins_pipe(ialu_reg);
10151 %}
10152 
10153 // XXX why does that use AddI
10154 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10155 %{
10156   predicate(UseIncDec);
10157   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10158   effect(KILL cr);
10159 
10160   ins_cost(125); // XXX
10161   format %{ "decl    $dst\t# int" %}
10162   ins_encode %{
10163     __ decrementl($dst$$Address);
10164   %}
10165   ins_pipe(ialu_mem_imm);
10166 %}
10167 
10168 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10169 %{
10170   predicate(VM_Version::supports_fast_2op_lea());
10171   match(Set dst (AddI (LShiftI index scale) disp));
10172 
10173   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10174   ins_encode %{
10175     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10176     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10177   %}
10178   ins_pipe(ialu_reg_reg);
10179 %}
10180 
10181 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10182 %{
10183   predicate(VM_Version::supports_fast_3op_lea());
10184   match(Set dst (AddI (AddI base index) disp));
10185 
10186   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10187   ins_encode %{
10188     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10189   %}
10190   ins_pipe(ialu_reg_reg);
10191 %}
10192 
10193 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10194 %{
10195   predicate(VM_Version::supports_fast_2op_lea());
10196   match(Set dst (AddI base (LShiftI index scale)));
10197 
10198   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10199   ins_encode %{
10200     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10201     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10202   %}
10203   ins_pipe(ialu_reg_reg);
10204 %}
10205 
10206 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10207 %{
10208   predicate(VM_Version::supports_fast_3op_lea());
10209   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10210 
10211   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10212   ins_encode %{
10213     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10214     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10215   %}
10216   ins_pipe(ialu_reg_reg);
10217 %}
10218 
10219 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10220 %{
10221   predicate(!UseAPX);
10222   match(Set dst (AddL dst src));
10223   effect(KILL cr);
10224   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10225 
10226   format %{ "addq    $dst, $src\t# long" %}
10227   ins_encode %{
10228     __ addq($dst$$Register, $src$$Register);
10229   %}
10230   ins_pipe(ialu_reg_reg);
10231 %}
10232 
10233 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10234 %{
10235   predicate(UseAPX);
10236   match(Set dst (AddL src1 src2));
10237   effect(KILL cr);
10238   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10239 
10240   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10241   ins_encode %{
10242     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10243   %}
10244   ins_pipe(ialu_reg_reg);
10245 %}
10246 
10247 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10248 %{
10249   predicate(!UseAPX);
10250   match(Set dst (AddL dst src));
10251   effect(KILL cr);
10252   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10253 
10254   format %{ "addq    $dst, $src\t# long" %}
10255   ins_encode %{
10256     __ addq($dst$$Register, $src$$constant);
10257   %}
10258   ins_pipe( ialu_reg );
10259 %}
10260 
10261 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10262 %{
10263   predicate(UseAPX);
10264   match(Set dst (AddL src1 src2));
10265   effect(KILL cr);
10266   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10267 
10268   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10269   ins_encode %{
10270     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10271   %}
10272   ins_pipe( ialu_reg );
10273 %}
10274 
10275 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10276 %{
10277   predicate(UseAPX);
10278   match(Set dst (AddL (LoadL src1) src2));
10279   effect(KILL cr);
10280   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10281 
10282   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10283   ins_encode %{
10284     __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10285   %}
10286   ins_pipe( ialu_reg );
10287 %}
10288 
10289 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10290 %{
10291   predicate(!UseAPX);
10292   match(Set dst (AddL dst (LoadL src)));
10293   effect(KILL cr);
10294   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10295 
10296   ins_cost(150); // XXX
10297   format %{ "addq    $dst, $src\t# long" %}
10298   ins_encode %{
10299     __ addq($dst$$Register, $src$$Address);
10300   %}
10301   ins_pipe(ialu_reg_mem);
10302 %}
10303 
10304 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10305 %{
10306   predicate(UseAPX);
10307   match(Set dst (AddL src1 (LoadL src2)));
10308   effect(KILL cr);
10309   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10310 
10311   ins_cost(150);
10312   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10313   ins_encode %{
10314     __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10315   %}
10316   ins_pipe(ialu_reg_mem);
10317 %}
10318 
10319 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10320 %{
10321   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10322   effect(KILL cr);
10323   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10324 
10325   ins_cost(150); // XXX
10326   format %{ "addq    $dst, $src\t# long" %}
10327   ins_encode %{
10328     __ addq($dst$$Address, $src$$Register);
10329   %}
10330   ins_pipe(ialu_mem_reg);
10331 %}
10332 
10333 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10334 %{
10335   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10336   effect(KILL cr);
10337   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10338 
10339   ins_cost(125); // XXX
10340   format %{ "addq    $dst, $src\t# long" %}
10341   ins_encode %{
10342     __ addq($dst$$Address, $src$$constant);
10343   %}
10344   ins_pipe(ialu_mem_imm);
10345 %}
10346 
10347 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10348 %{
10349   predicate(!UseAPX && UseIncDec);
10350   match(Set dst (AddL dst src));
10351   effect(KILL cr);
10352 
10353   format %{ "incq    $dst\t# long" %}
10354   ins_encode %{
10355     __ incrementq($dst$$Register);
10356   %}
10357   ins_pipe(ialu_reg);
10358 %}
10359 
10360 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10361 %{
10362   predicate(UseAPX && UseIncDec);
10363   match(Set dst (AddL src val));
10364   effect(KILL cr);
10365   flag(PD::Flag_ndd_demotable_opr1);
10366 
10367   format %{ "eincq    $dst, $src\t# long ndd" %}
10368   ins_encode %{
10369     __ eincq($dst$$Register, $src$$Register, false);
10370   %}
10371   ins_pipe(ialu_reg);
10372 %}
10373 
10374 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10375 %{
10376   predicate(UseAPX && UseIncDec);
10377   match(Set dst (AddL (LoadL src) val));
10378   effect(KILL cr);
10379 
10380   format %{ "eincq    $dst, $src\t# long ndd" %}
10381   ins_encode %{
10382     __ eincq($dst$$Register, $src$$Address, false);
10383   %}
10384   ins_pipe(ialu_reg);
10385 %}
10386 
10387 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10388 %{
10389   predicate(UseIncDec);
10390   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10391   effect(KILL cr);
10392 
10393   ins_cost(125); // XXX
10394   format %{ "incq    $dst\t# long" %}
10395   ins_encode %{
10396     __ incrementq($dst$$Address);
10397   %}
10398   ins_pipe(ialu_mem_imm);
10399 %}
10400 
10401 // XXX why does that use AddL
10402 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10403 %{
10404   predicate(!UseAPX && UseIncDec);
10405   match(Set dst (AddL dst src));
10406   effect(KILL cr);
10407 
10408   format %{ "decq    $dst\t# long" %}
10409   ins_encode %{
10410     __ decrementq($dst$$Register);
10411   %}
10412   ins_pipe(ialu_reg);
10413 %}
10414 
10415 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10416 %{
10417   predicate(UseAPX && UseIncDec);
10418   match(Set dst (AddL src val));
10419   effect(KILL cr);
10420   flag(PD::Flag_ndd_demotable_opr1);
10421 
10422   format %{ "edecq    $dst, $src\t# long ndd" %}
10423   ins_encode %{
10424     __ edecq($dst$$Register, $src$$Register, false);
10425   %}
10426   ins_pipe(ialu_reg);
10427 %}
10428 
10429 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10430 %{
10431   predicate(UseAPX && UseIncDec);
10432   match(Set dst (AddL (LoadL src) val));
10433   effect(KILL cr);
10434 
10435   format %{ "edecq    $dst, $src\t# long ndd" %}
10436   ins_encode %{
10437     __ edecq($dst$$Register, $src$$Address, false);
10438   %}
10439   ins_pipe(ialu_reg);
10440 %}
10441 
10442 // XXX why does that use AddL
10443 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10444 %{
10445   predicate(UseIncDec);
10446   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10447   effect(KILL cr);
10448 
10449   ins_cost(125); // XXX
10450   format %{ "decq    $dst\t# long" %}
10451   ins_encode %{
10452     __ decrementq($dst$$Address);
10453   %}
10454   ins_pipe(ialu_mem_imm);
10455 %}
10456 
10457 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10458 %{
10459   predicate(VM_Version::supports_fast_2op_lea());
10460   match(Set dst (AddL (LShiftL index scale) disp));
10461 
10462   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10463   ins_encode %{
10464     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10465     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10466   %}
10467   ins_pipe(ialu_reg_reg);
10468 %}
10469 
10470 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10471 %{
10472   predicate(VM_Version::supports_fast_3op_lea());
10473   match(Set dst (AddL (AddL base index) disp));
10474 
10475   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10476   ins_encode %{
10477     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10478   %}
10479   ins_pipe(ialu_reg_reg);
10480 %}
10481 
10482 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10483 %{
10484   predicate(VM_Version::supports_fast_2op_lea());
10485   match(Set dst (AddL base (LShiftL index scale)));
10486 
10487   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10488   ins_encode %{
10489     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10490     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10491   %}
10492   ins_pipe(ialu_reg_reg);
10493 %}
10494 
10495 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10496 %{
10497   predicate(VM_Version::supports_fast_3op_lea());
10498   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10499 
10500   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10501   ins_encode %{
10502     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10503     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10504   %}
10505   ins_pipe(ialu_reg_reg);
10506 %}
10507 
10508 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10509 %{
10510   match(Set dst (AddP dst src));
10511   effect(KILL cr);
10512   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10513 
10514   format %{ "addq    $dst, $src\t# ptr" %}
10515   ins_encode %{
10516     __ addq($dst$$Register, $src$$Register);
10517   %}
10518   ins_pipe(ialu_reg_reg);
10519 %}
10520 
10521 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10522 %{
10523   match(Set dst (AddP dst src));
10524   effect(KILL cr);
10525   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10526 
10527   format %{ "addq    $dst, $src\t# ptr" %}
10528   ins_encode %{
10529     __ addq($dst$$Register, $src$$constant);
10530   %}
10531   ins_pipe( ialu_reg );
10532 %}
10533 
10534 // XXX addP mem ops ????
10535 
10536 instruct checkCastPP(rRegP dst)
10537 %{
10538   match(Set dst (CheckCastPP dst));
10539 
10540   size(0);
10541   format %{ "# checkcastPP of $dst" %}
10542   ins_encode(/* empty encoding */);
10543   ins_pipe(empty);
10544 %}
10545 
10546 instruct castPP(rRegP dst)
10547 %{
10548   match(Set dst (CastPP dst));
10549 
10550   size(0);
10551   format %{ "# castPP of $dst" %}
10552   ins_encode(/* empty encoding */);
10553   ins_pipe(empty);
10554 %}
10555 
10556 instruct castII(rRegI dst)
10557 %{
10558   predicate(VerifyConstraintCasts == 0);
10559   match(Set dst (CastII dst));
10560 
10561   size(0);
10562   format %{ "# castII of $dst" %}
10563   ins_encode(/* empty encoding */);
10564   ins_cost(0);
10565   ins_pipe(empty);
10566 %}
10567 
10568 instruct castII_checked(rRegI dst, rFlagsReg cr)
10569 %{
10570   predicate(VerifyConstraintCasts > 0);
10571   match(Set dst (CastII dst));
10572 
10573   effect(KILL cr);
10574   format %{ "# cast_checked_II $dst" %}
10575   ins_encode %{
10576     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10577   %}
10578   ins_pipe(pipe_slow);
10579 %}
10580 
10581 instruct castLL(rRegL dst)
10582 %{
10583   predicate(VerifyConstraintCasts == 0);
10584   match(Set dst (CastLL dst));
10585 
10586   size(0);
10587   format %{ "# castLL of $dst" %}
10588   ins_encode(/* empty encoding */);
10589   ins_cost(0);
10590   ins_pipe(empty);
10591 %}
10592 
10593 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10594 %{
10595   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10596   match(Set dst (CastLL dst));
10597 
10598   effect(KILL cr);
10599   format %{ "# cast_checked_LL $dst" %}
10600   ins_encode %{
10601     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10602   %}
10603   ins_pipe(pipe_slow);
10604 %}
10605 
10606 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10607 %{
10608   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10609   match(Set dst (CastLL dst));
10610 
10611   effect(KILL cr, TEMP tmp);
10612   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10613   ins_encode %{
10614     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10615   %}
10616   ins_pipe(pipe_slow);
10617 %}
10618 
10619 instruct castFF(regF dst)
10620 %{
10621   match(Set dst (CastFF dst));
10622 
10623   size(0);
10624   format %{ "# castFF of $dst" %}
10625   ins_encode(/* empty encoding */);
10626   ins_cost(0);
10627   ins_pipe(empty);
10628 %}
10629 
10630 instruct castHH(regF dst)
10631 %{
10632   match(Set dst (CastHH dst));
10633 
10634   size(0);
10635   format %{ "# castHH of $dst" %}
10636   ins_encode(/* empty encoding */);
10637   ins_cost(0);
10638   ins_pipe(empty);
10639 %}
10640 
10641 instruct castDD(regD dst)
10642 %{
10643   match(Set dst (CastDD dst));
10644 
10645   size(0);
10646   format %{ "# castDD of $dst" %}
10647   ins_encode(/* empty encoding */);
10648   ins_cost(0);
10649   ins_pipe(empty);
10650 %}
10651 
10652 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10653 instruct compareAndSwapP(rRegI res,
10654                          memory mem_ptr,
10655                          rax_RegP oldval, rRegP newval,
10656                          rFlagsReg cr)
10657 %{
10658   predicate(n->as_LoadStore()->barrier_data() == 0);
10659   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10660   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10661   effect(KILL cr, KILL oldval);
10662 
10663   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10664             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10665             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10666   ins_encode %{
10667     __ lock();
10668     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10669     __ setcc(Assembler::equal, $res$$Register);
10670   %}
10671   ins_pipe( pipe_cmpxchg );
10672 %}
10673 
10674 instruct compareAndSwapL(rRegI res,
10675                          memory mem_ptr,
10676                          rax_RegL oldval, rRegL newval,
10677                          rFlagsReg cr)
10678 %{
10679   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10680   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10681   effect(KILL cr, KILL oldval);
10682 
10683   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10684             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10685             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10686   ins_encode %{
10687     __ lock();
10688     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10689     __ setcc(Assembler::equal, $res$$Register);
10690   %}
10691   ins_pipe( pipe_cmpxchg );
10692 %}
10693 
10694 instruct compareAndSwapI(rRegI res,
10695                          memory mem_ptr,
10696                          rax_RegI oldval, rRegI newval,
10697                          rFlagsReg cr)
10698 %{
10699   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10700   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10701   effect(KILL cr, KILL oldval);
10702 
10703   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10704             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10705             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10706   ins_encode %{
10707     __ lock();
10708     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10709     __ setcc(Assembler::equal, $res$$Register);
10710   %}
10711   ins_pipe( pipe_cmpxchg );
10712 %}
10713 
10714 instruct compareAndSwapB(rRegI res,
10715                          memory mem_ptr,
10716                          rax_RegI oldval, rRegI newval,
10717                          rFlagsReg cr)
10718 %{
10719   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10720   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10721   effect(KILL cr, KILL oldval);
10722 
10723   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10724             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10725             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10726   ins_encode %{
10727     __ lock();
10728     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10729     __ setcc(Assembler::equal, $res$$Register);
10730   %}
10731   ins_pipe( pipe_cmpxchg );
10732 %}
10733 
10734 instruct compareAndSwapS(rRegI res,
10735                          memory mem_ptr,
10736                          rax_RegI oldval, rRegI newval,
10737                          rFlagsReg cr)
10738 %{
10739   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10740   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10741   effect(KILL cr, KILL oldval);
10742 
10743   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10744             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10745             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10746   ins_encode %{
10747     __ lock();
10748     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10749     __ setcc(Assembler::equal, $res$$Register);
10750   %}
10751   ins_pipe( pipe_cmpxchg );
10752 %}
10753 
10754 instruct compareAndSwapN(rRegI res,
10755                           memory mem_ptr,
10756                           rax_RegN oldval, rRegN newval,
10757                           rFlagsReg cr) %{
10758   predicate(n->as_LoadStore()->barrier_data() == 0);
10759   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10760   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10761   effect(KILL cr, KILL oldval);
10762 
10763   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10764             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10765             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10766   ins_encode %{
10767     __ lock();
10768     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10769     __ setcc(Assembler::equal, $res$$Register);
10770   %}
10771   ins_pipe( pipe_cmpxchg );
10772 %}
10773 
10774 instruct compareAndExchangeB(
10775                          memory mem_ptr,
10776                          rax_RegI oldval, rRegI newval,
10777                          rFlagsReg cr)
10778 %{
10779   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10780   effect(KILL cr);
10781 
10782   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10783             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10784   ins_encode %{
10785     __ lock();
10786     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10787   %}
10788   ins_pipe( pipe_cmpxchg );
10789 %}
10790 
10791 instruct compareAndExchangeS(
10792                          memory mem_ptr,
10793                          rax_RegI oldval, rRegI newval,
10794                          rFlagsReg cr)
10795 %{
10796   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10797   effect(KILL cr);
10798 
10799   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10800             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10801   ins_encode %{
10802     __ lock();
10803     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10804   %}
10805   ins_pipe( pipe_cmpxchg );
10806 %}
10807 
10808 instruct compareAndExchangeI(
10809                          memory mem_ptr,
10810                          rax_RegI oldval, rRegI newval,
10811                          rFlagsReg cr)
10812 %{
10813   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10814   effect(KILL cr);
10815 
10816   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10817             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10818   ins_encode %{
10819     __ lock();
10820     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10821   %}
10822   ins_pipe( pipe_cmpxchg );
10823 %}
10824 
10825 instruct compareAndExchangeL(
10826                          memory mem_ptr,
10827                          rax_RegL oldval, rRegL newval,
10828                          rFlagsReg cr)
10829 %{
10830   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10831   effect(KILL cr);
10832 
10833   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10834             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10835   ins_encode %{
10836     __ lock();
10837     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10838   %}
10839   ins_pipe( pipe_cmpxchg );
10840 %}
10841 
10842 instruct compareAndExchangeN(
10843                           memory mem_ptr,
10844                           rax_RegN oldval, rRegN newval,
10845                           rFlagsReg cr) %{
10846   predicate(n->as_LoadStore()->barrier_data() == 0);
10847   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10848   effect(KILL cr);
10849 
10850   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10851             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10852   ins_encode %{
10853     __ lock();
10854     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10855   %}
10856   ins_pipe( pipe_cmpxchg );
10857 %}
10858 
10859 instruct compareAndExchangeP(
10860                          memory mem_ptr,
10861                          rax_RegP oldval, rRegP newval,
10862                          rFlagsReg cr)
10863 %{
10864   predicate(n->as_LoadStore()->barrier_data() == 0);
10865   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10866   effect(KILL cr);
10867 
10868   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10869             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10870   ins_encode %{
10871     __ lock();
10872     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10873   %}
10874   ins_pipe( pipe_cmpxchg );
10875 %}
10876 
10877 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10878   predicate(n->as_LoadStore()->result_not_used());
10879   match(Set dummy (GetAndAddB mem add));
10880   effect(KILL cr);
10881   format %{ "addb_lock   $mem, $add" %}
10882   ins_encode %{
10883     __ lock();
10884     __ addb($mem$$Address, $add$$Register);
10885   %}
10886   ins_pipe(pipe_cmpxchg);
10887 %}
10888 
10889 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10890   predicate(n->as_LoadStore()->result_not_used());
10891   match(Set dummy (GetAndAddB mem add));
10892   effect(KILL cr);
10893   format %{ "addb_lock   $mem, $add" %}
10894   ins_encode %{
10895     __ lock();
10896     __ addb($mem$$Address, $add$$constant);
10897   %}
10898   ins_pipe(pipe_cmpxchg);
10899 %}
10900 
10901 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10902   predicate(!n->as_LoadStore()->result_not_used());
10903   match(Set newval (GetAndAddB mem newval));
10904   effect(KILL cr);
10905   format %{ "xaddb_lock  $mem, $newval" %}
10906   ins_encode %{
10907     __ lock();
10908     __ xaddb($mem$$Address, $newval$$Register);
10909   %}
10910   ins_pipe(pipe_cmpxchg);
10911 %}
10912 
10913 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10914   predicate(n->as_LoadStore()->result_not_used());
10915   match(Set dummy (GetAndAddS mem add));
10916   effect(KILL cr);
10917   format %{ "addw_lock   $mem, $add" %}
10918   ins_encode %{
10919     __ lock();
10920     __ addw($mem$$Address, $add$$Register);
10921   %}
10922   ins_pipe(pipe_cmpxchg);
10923 %}
10924 
10925 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10926   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10927   match(Set dummy (GetAndAddS mem add));
10928   effect(KILL cr);
10929   format %{ "addw_lock   $mem, $add" %}
10930   ins_encode %{
10931     __ lock();
10932     __ addw($mem$$Address, $add$$constant);
10933   %}
10934   ins_pipe(pipe_cmpxchg);
10935 %}
10936 
10937 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10938   predicate(!n->as_LoadStore()->result_not_used());
10939   match(Set newval (GetAndAddS mem newval));
10940   effect(KILL cr);
10941   format %{ "xaddw_lock  $mem, $newval" %}
10942   ins_encode %{
10943     __ lock();
10944     __ xaddw($mem$$Address, $newval$$Register);
10945   %}
10946   ins_pipe(pipe_cmpxchg);
10947 %}
10948 
10949 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10950   predicate(n->as_LoadStore()->result_not_used());
10951   match(Set dummy (GetAndAddI mem add));
10952   effect(KILL cr);
10953   format %{ "addl_lock   $mem, $add" %}
10954   ins_encode %{
10955     __ lock();
10956     __ addl($mem$$Address, $add$$Register);
10957   %}
10958   ins_pipe(pipe_cmpxchg);
10959 %}
10960 
10961 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10962   predicate(n->as_LoadStore()->result_not_used());
10963   match(Set dummy (GetAndAddI mem add));
10964   effect(KILL cr);
10965   format %{ "addl_lock   $mem, $add" %}
10966   ins_encode %{
10967     __ lock();
10968     __ addl($mem$$Address, $add$$constant);
10969   %}
10970   ins_pipe(pipe_cmpxchg);
10971 %}
10972 
10973 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10974   predicate(!n->as_LoadStore()->result_not_used());
10975   match(Set newval (GetAndAddI mem newval));
10976   effect(KILL cr);
10977   format %{ "xaddl_lock  $mem, $newval" %}
10978   ins_encode %{
10979     __ lock();
10980     __ xaddl($mem$$Address, $newval$$Register);
10981   %}
10982   ins_pipe(pipe_cmpxchg);
10983 %}
10984 
10985 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10986   predicate(n->as_LoadStore()->result_not_used());
10987   match(Set dummy (GetAndAddL mem add));
10988   effect(KILL cr);
10989   format %{ "addq_lock   $mem, $add" %}
10990   ins_encode %{
10991     __ lock();
10992     __ addq($mem$$Address, $add$$Register);
10993   %}
10994   ins_pipe(pipe_cmpxchg);
10995 %}
10996 
10997 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10998   predicate(n->as_LoadStore()->result_not_used());
10999   match(Set dummy (GetAndAddL mem add));
11000   effect(KILL cr);
11001   format %{ "addq_lock   $mem, $add" %}
11002   ins_encode %{
11003     __ lock();
11004     __ addq($mem$$Address, $add$$constant);
11005   %}
11006   ins_pipe(pipe_cmpxchg);
11007 %}
11008 
11009 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11010   predicate(!n->as_LoadStore()->result_not_used());
11011   match(Set newval (GetAndAddL mem newval));
11012   effect(KILL cr);
11013   format %{ "xaddq_lock  $mem, $newval" %}
11014   ins_encode %{
11015     __ lock();
11016     __ xaddq($mem$$Address, $newval$$Register);
11017   %}
11018   ins_pipe(pipe_cmpxchg);
11019 %}
11020 
11021 instruct xchgB( memory mem, rRegI newval) %{
11022   match(Set newval (GetAndSetB mem newval));
11023   format %{ "XCHGB  $newval,[$mem]" %}
11024   ins_encode %{
11025     __ xchgb($newval$$Register, $mem$$Address);
11026   %}
11027   ins_pipe( pipe_cmpxchg );
11028 %}
11029 
11030 instruct xchgS( memory mem, rRegI newval) %{
11031   match(Set newval (GetAndSetS mem newval));
11032   format %{ "XCHGW  $newval,[$mem]" %}
11033   ins_encode %{
11034     __ xchgw($newval$$Register, $mem$$Address);
11035   %}
11036   ins_pipe( pipe_cmpxchg );
11037 %}
11038 
11039 instruct xchgI( memory mem, rRegI newval) %{
11040   match(Set newval (GetAndSetI mem newval));
11041   format %{ "XCHGL  $newval,[$mem]" %}
11042   ins_encode %{
11043     __ xchgl($newval$$Register, $mem$$Address);
11044   %}
11045   ins_pipe( pipe_cmpxchg );
11046 %}
11047 
11048 instruct xchgL( memory mem, rRegL newval) %{
11049   match(Set newval (GetAndSetL mem newval));
11050   format %{ "XCHGL  $newval,[$mem]" %}
11051   ins_encode %{
11052     __ xchgq($newval$$Register, $mem$$Address);
11053   %}
11054   ins_pipe( pipe_cmpxchg );
11055 %}
11056 
11057 instruct xchgP( memory mem, rRegP newval) %{
11058   match(Set newval (GetAndSetP mem newval));
11059   predicate(n->as_LoadStore()->barrier_data() == 0);
11060   format %{ "XCHGQ  $newval,[$mem]" %}
11061   ins_encode %{
11062     __ xchgq($newval$$Register, $mem$$Address);
11063   %}
11064   ins_pipe( pipe_cmpxchg );
11065 %}
11066 
11067 instruct xchgN( memory mem, rRegN newval) %{
11068   predicate(n->as_LoadStore()->barrier_data() == 0);
11069   match(Set newval (GetAndSetN mem newval));
11070   format %{ "XCHGL  $newval,$mem]" %}
11071   ins_encode %{
11072     __ xchgl($newval$$Register, $mem$$Address);
11073   %}
11074   ins_pipe( pipe_cmpxchg );
11075 %}
11076 
11077 //----------Abs Instructions-------------------------------------------
11078 
11079 // Integer Absolute Instructions
11080 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11081 %{
11082   match(Set dst (AbsI src));
11083   effect(TEMP dst, KILL cr);
11084   format %{ "xorl    $dst, $dst\t# abs int\n\t"
11085             "subl    $dst, $src\n\t"
11086             "cmovll  $dst, $src" %}
11087   ins_encode %{
11088     __ xorl($dst$$Register, $dst$$Register);
11089     __ subl($dst$$Register, $src$$Register);
11090     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11091   %}
11092 
11093   ins_pipe(ialu_reg_reg);
11094 %}
11095 
11096 // Long Absolute Instructions
11097 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11098 %{
11099   match(Set dst (AbsL src));
11100   effect(TEMP dst, KILL cr);
11101   format %{ "xorl    $dst, $dst\t# abs long\n\t"
11102             "subq    $dst, $src\n\t"
11103             "cmovlq  $dst, $src" %}
11104   ins_encode %{
11105     __ xorl($dst$$Register, $dst$$Register);
11106     __ subq($dst$$Register, $src$$Register);
11107     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11108   %}
11109 
11110   ins_pipe(ialu_reg_reg);
11111 %}
11112 
11113 //----------Subtraction Instructions-------------------------------------------
11114 
11115 // Integer Subtraction Instructions
11116 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11117 %{
11118   predicate(!UseAPX);
11119   match(Set dst (SubI dst src));
11120   effect(KILL cr);
11121   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11122 
11123   format %{ "subl    $dst, $src\t# int" %}
11124   ins_encode %{
11125     __ subl($dst$$Register, $src$$Register);
11126   %}
11127   ins_pipe(ialu_reg_reg);
11128 %}
11129 
11130 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11131 %{
11132   predicate(UseAPX);
11133   match(Set dst (SubI src1 src2));
11134   effect(KILL cr);
11135   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11136 
11137   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11138   ins_encode %{
11139     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11140   %}
11141   ins_pipe(ialu_reg_reg);
11142 %}
11143 
11144 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11145 %{
11146   predicate(UseAPX);
11147   match(Set dst (SubI src1 src2));
11148   effect(KILL cr);
11149   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11150 
11151   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11152   ins_encode %{
11153     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11154   %}
11155   ins_pipe(ialu_reg_reg);
11156 %}
11157 
11158 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11159 %{
11160   predicate(UseAPX);
11161   match(Set dst (SubI (LoadI src1) src2));
11162   effect(KILL cr);
11163   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11164 
11165   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11166   ins_encode %{
11167     __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11168   %}
11169   ins_pipe(ialu_reg_reg);
11170 %}
11171 
11172 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11173 %{
11174   predicate(!UseAPX);
11175   match(Set dst (SubI dst (LoadI src)));
11176   effect(KILL cr);
11177   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11178 
11179   ins_cost(150);
11180   format %{ "subl    $dst, $src\t# int" %}
11181   ins_encode %{
11182     __ subl($dst$$Register, $src$$Address);
11183   %}
11184   ins_pipe(ialu_reg_mem);
11185 %}
11186 
11187 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11188 %{
11189   predicate(UseAPX);
11190   match(Set dst (SubI src1 (LoadI src2)));
11191   effect(KILL cr);
11192   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11193 
11194   ins_cost(150);
11195   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11196   ins_encode %{
11197     __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11198   %}
11199   ins_pipe(ialu_reg_mem);
11200 %}
11201 
11202 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11203 %{
11204   predicate(UseAPX);
11205   match(Set dst (SubI (LoadI src1) src2));
11206   effect(KILL cr);
11207   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11208 
11209   ins_cost(150);
11210   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11211   ins_encode %{
11212     __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11213   %}
11214   ins_pipe(ialu_reg_mem);
11215 %}
11216 
11217 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11218 %{
11219   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11220   effect(KILL cr);
11221   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11222 
11223   ins_cost(150);
11224   format %{ "subl    $dst, $src\t# int" %}
11225   ins_encode %{
11226     __ subl($dst$$Address, $src$$Register);
11227   %}
11228   ins_pipe(ialu_mem_reg);
11229 %}
11230 
11231 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11232 %{
11233   predicate(!UseAPX);
11234   match(Set dst (SubL dst src));
11235   effect(KILL cr);
11236   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11237 
11238   format %{ "subq    $dst, $src\t# long" %}
11239   ins_encode %{
11240     __ subq($dst$$Register, $src$$Register);
11241   %}
11242   ins_pipe(ialu_reg_reg);
11243 %}
11244 
11245 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11246 %{
11247   predicate(UseAPX);
11248   match(Set dst (SubL src1 src2));
11249   effect(KILL cr);
11250   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11251 
11252   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11253   ins_encode %{
11254     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11255   %}
11256   ins_pipe(ialu_reg_reg);
11257 %}
11258 
11259 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11260 %{
11261   predicate(UseAPX);
11262   match(Set dst (SubL src1 src2));
11263   effect(KILL cr);
11264   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11265 
11266   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11267   ins_encode %{
11268     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11269   %}
11270   ins_pipe(ialu_reg_reg);
11271 %}
11272 
11273 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11274 %{
11275   predicate(UseAPX);
11276   match(Set dst (SubL (LoadL src1) src2));
11277   effect(KILL cr);
11278   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11279 
11280   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11281   ins_encode %{
11282     __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11283   %}
11284   ins_pipe(ialu_reg_reg);
11285 %}
11286 
11287 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11288 %{
11289   predicate(!UseAPX);
11290   match(Set dst (SubL dst (LoadL src)));
11291   effect(KILL cr);
11292   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11293 
11294   ins_cost(150);
11295   format %{ "subq    $dst, $src\t# long" %}
11296   ins_encode %{
11297     __ subq($dst$$Register, $src$$Address);
11298   %}
11299   ins_pipe(ialu_reg_mem);
11300 %}
11301 
11302 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11303 %{
11304   predicate(UseAPX);
11305   match(Set dst (SubL src1 (LoadL src2)));
11306   effect(KILL cr);
11307   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11308 
11309   ins_cost(150);
11310   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11311   ins_encode %{
11312     __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11313   %}
11314   ins_pipe(ialu_reg_mem);
11315 %}
11316 
11317 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11318 %{
11319   predicate(UseAPX);
11320   match(Set dst (SubL (LoadL src1) src2));
11321   effect(KILL cr);
11322   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11323 
11324   ins_cost(150);
11325   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11326   ins_encode %{
11327     __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11328   %}
11329   ins_pipe(ialu_reg_mem);
11330 %}
11331 
11332 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11333 %{
11334   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11335   effect(KILL cr);
11336   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11337 
11338   ins_cost(150);
11339   format %{ "subq    $dst, $src\t# long" %}
11340   ins_encode %{
11341     __ subq($dst$$Address, $src$$Register);
11342   %}
11343   ins_pipe(ialu_mem_reg);
11344 %}
11345 
11346 // Subtract from a pointer
11347 // XXX hmpf???
11348 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11349 %{
11350   match(Set dst (AddP dst (SubI zero src)));
11351   effect(KILL cr);
11352 
11353   format %{ "subq    $dst, $src\t# ptr - int" %}
11354   ins_encode %{
11355     __ subq($dst$$Register, $src$$Register);
11356   %}
11357   ins_pipe(ialu_reg_reg);
11358 %}
11359 
11360 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11361 %{
11362   predicate(!UseAPX);
11363   match(Set dst (SubI zero dst));
11364   effect(KILL cr);
11365   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11366 
11367   format %{ "negl    $dst\t# int" %}
11368   ins_encode %{
11369     __ negl($dst$$Register);
11370   %}
11371   ins_pipe(ialu_reg);
11372 %}
11373 
11374 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11375 %{
11376   predicate(UseAPX);
11377   match(Set dst (SubI zero src));
11378   effect(KILL cr);
11379   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11380 
11381   format %{ "enegl    $dst, $src\t# int ndd" %}
11382   ins_encode %{
11383     __ enegl($dst$$Register, $src$$Register, false);
11384   %}
11385   ins_pipe(ialu_reg);
11386 %}
11387 
11388 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11389 %{
11390   predicate(!UseAPX);
11391   match(Set dst (NegI dst));
11392   effect(KILL cr);
11393   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11394 
11395   format %{ "negl    $dst\t# int" %}
11396   ins_encode %{
11397     __ negl($dst$$Register);
11398   %}
11399   ins_pipe(ialu_reg);
11400 %}
11401 
11402 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11403 %{
11404   predicate(UseAPX);
11405   match(Set dst (NegI src));
11406   effect(KILL cr);
11407   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11408 
11409   format %{ "enegl    $dst, $src\t# int ndd" %}
11410   ins_encode %{
11411     __ enegl($dst$$Register, $src$$Register, false);
11412   %}
11413   ins_pipe(ialu_reg);
11414 %}
11415 
11416 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11417 %{
11418   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11419   effect(KILL cr);
11420   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11421 
11422   format %{ "negl    $dst\t# int" %}
11423   ins_encode %{
11424     __ negl($dst$$Address);
11425   %}
11426   ins_pipe(ialu_reg);
11427 %}
11428 
11429 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11430 %{
11431   predicate(!UseAPX);
11432   match(Set dst (SubL zero dst));
11433   effect(KILL cr);
11434   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11435 
11436   format %{ "negq    $dst\t# long" %}
11437   ins_encode %{
11438     __ negq($dst$$Register);
11439   %}
11440   ins_pipe(ialu_reg);
11441 %}
11442 
11443 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11444 %{
11445   predicate(UseAPX);
11446   match(Set dst (SubL zero src));
11447   effect(KILL cr);
11448   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11449 
11450   format %{ "enegq    $dst, $src\t# long ndd" %}
11451   ins_encode %{
11452     __ enegq($dst$$Register, $src$$Register, false);
11453   %}
11454   ins_pipe(ialu_reg);
11455 %}
11456 
11457 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11458 %{
11459   predicate(!UseAPX);
11460   match(Set dst (NegL dst));
11461   effect(KILL cr);
11462   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11463 
11464   format %{ "negq    $dst\t# int" %}
11465   ins_encode %{
11466     __ negq($dst$$Register);
11467   %}
11468   ins_pipe(ialu_reg);
11469 %}
11470 
11471 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11472 %{
11473   predicate(UseAPX);
11474   match(Set dst (NegL src));
11475   effect(KILL cr);
11476   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11477 
11478   format %{ "enegq    $dst, $src\t# long ndd" %}
11479   ins_encode %{
11480     __ enegq($dst$$Register, $src$$Register, false);
11481   %}
11482   ins_pipe(ialu_reg);
11483 %}
11484 
11485 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11486 %{
11487   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11488   effect(KILL cr);
11489   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11490 
11491   format %{ "negq    $dst\t# long" %}
11492   ins_encode %{
11493     __ negq($dst$$Address);
11494   %}
11495   ins_pipe(ialu_reg);
11496 %}
11497 
11498 //----------Multiplication/Division Instructions-------------------------------
11499 // Integer Multiplication Instructions
11500 // Multiply Register
11501 
11502 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11503 %{
11504   predicate(!UseAPX);
11505   match(Set dst (MulI dst src));
11506   effect(KILL cr);
11507 
11508   ins_cost(300);
11509   format %{ "imull   $dst, $src\t# int" %}
11510   ins_encode %{
11511     __ imull($dst$$Register, $src$$Register);
11512   %}
11513   ins_pipe(ialu_reg_reg_alu0);
11514 %}
11515 
11516 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11517 %{
11518   predicate(UseAPX);
11519   match(Set dst (MulI src1 src2));
11520   effect(KILL cr);
11521   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11522 
11523   ins_cost(300);
11524   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11525   ins_encode %{
11526     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11527   %}
11528   ins_pipe(ialu_reg_reg_alu0);
11529 %}
11530 
11531 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11532 %{
11533   match(Set dst (MulI src imm));
11534   effect(KILL cr);
11535 
11536   ins_cost(300);
11537   format %{ "imull   $dst, $src, $imm\t# int" %}
11538   ins_encode %{
11539     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11540   %}
11541   ins_pipe(ialu_reg_reg_alu0);
11542 %}
11543 
11544 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11545 %{
11546   predicate(!UseAPX);
11547   match(Set dst (MulI dst (LoadI src)));
11548   effect(KILL cr);
11549 
11550   ins_cost(350);
11551   format %{ "imull   $dst, $src\t# int" %}
11552   ins_encode %{
11553     __ imull($dst$$Register, $src$$Address);
11554   %}
11555   ins_pipe(ialu_reg_mem_alu0);
11556 %}
11557 
11558 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11559 %{
11560   predicate(UseAPX);
11561   match(Set dst (MulI src1 (LoadI src2)));
11562   effect(KILL cr);
11563   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11564 
11565   ins_cost(350);
11566   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11567   ins_encode %{
11568     __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11569   %}
11570   ins_pipe(ialu_reg_mem_alu0);
11571 %}
11572 
11573 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11574 %{
11575   match(Set dst (MulI (LoadI src) imm));
11576   effect(KILL cr);
11577 
11578   ins_cost(300);
11579   format %{ "imull   $dst, $src, $imm\t# int" %}
11580   ins_encode %{
11581     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11582   %}
11583   ins_pipe(ialu_reg_mem_alu0);
11584 %}
11585 
11586 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11587 %{
11588   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11589   effect(KILL cr, KILL src2);
11590 
11591   expand %{ mulI_rReg(dst, src1, cr);
11592            mulI_rReg(src2, src3, cr);
11593            addI_rReg(dst, src2, cr); %}
11594 %}
11595 
11596 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11597 %{
11598   predicate(!UseAPX);
11599   match(Set dst (MulL dst src));
11600   effect(KILL cr);
11601 
11602   ins_cost(300);
11603   format %{ "imulq   $dst, $src\t# long" %}
11604   ins_encode %{
11605     __ imulq($dst$$Register, $src$$Register);
11606   %}
11607   ins_pipe(ialu_reg_reg_alu0);
11608 %}
11609 
11610 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11611 %{
11612   predicate(UseAPX);
11613   match(Set dst (MulL src1 src2));
11614   effect(KILL cr);
11615   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11616 
11617   ins_cost(300);
11618   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11619   ins_encode %{
11620     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11621   %}
11622   ins_pipe(ialu_reg_reg_alu0);
11623 %}
11624 
11625 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11626 %{
11627   match(Set dst (MulL src imm));
11628   effect(KILL cr);
11629 
11630   ins_cost(300);
11631   format %{ "imulq   $dst, $src, $imm\t# long" %}
11632   ins_encode %{
11633     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11634   %}
11635   ins_pipe(ialu_reg_reg_alu0);
11636 %}
11637 
11638 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11639 %{
11640   predicate(!UseAPX);
11641   match(Set dst (MulL dst (LoadL src)));
11642   effect(KILL cr);
11643 
11644   ins_cost(350);
11645   format %{ "imulq   $dst, $src\t# long" %}
11646   ins_encode %{
11647     __ imulq($dst$$Register, $src$$Address);
11648   %}
11649   ins_pipe(ialu_reg_mem_alu0);
11650 %}
11651 
11652 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11653 %{
11654   predicate(UseAPX);
11655   match(Set dst (MulL src1 (LoadL src2)));
11656   effect(KILL cr);
11657   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11658 
11659   ins_cost(350);
11660   format %{ "eimulq   $dst, $src1, $src2 \t# long" %}
11661   ins_encode %{
11662     __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11663   %}
11664   ins_pipe(ialu_reg_mem_alu0);
11665 %}
11666 
11667 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11668 %{
11669   match(Set dst (MulL (LoadL src) imm));
11670   effect(KILL cr);
11671 
11672   ins_cost(300);
11673   format %{ "imulq   $dst, $src, $imm\t# long" %}
11674   ins_encode %{
11675     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11676   %}
11677   ins_pipe(ialu_reg_mem_alu0);
11678 %}
11679 
11680 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11681 %{
11682   match(Set dst (MulHiL src rax));
11683   effect(USE_KILL rax, KILL cr);
11684 
11685   ins_cost(300);
11686   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11687   ins_encode %{
11688     __ imulq($src$$Register);
11689   %}
11690   ins_pipe(ialu_reg_reg_alu0);
11691 %}
11692 
11693 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11694 %{
11695   match(Set dst (UMulHiL src rax));
11696   effect(USE_KILL rax, KILL cr);
11697 
11698   ins_cost(300);
11699   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11700   ins_encode %{
11701     __ mulq($src$$Register);
11702   %}
11703   ins_pipe(ialu_reg_reg_alu0);
11704 %}
11705 
11706 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11707                    rFlagsReg cr)
11708 %{
11709   match(Set rax (DivI rax div));
11710   effect(KILL rdx, KILL cr);
11711 
11712   ins_cost(30*100+10*100); // XXX
11713   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11714             "jne,s   normal\n\t"
11715             "xorl    rdx, rdx\n\t"
11716             "cmpl    $div, -1\n\t"
11717             "je,s    done\n"
11718     "normal: cdql\n\t"
11719             "idivl   $div\n"
11720     "done:"        %}
11721   ins_encode(cdql_enc(div));
11722   ins_pipe(ialu_reg_reg_alu0);
11723 %}
11724 
11725 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11726                    rFlagsReg cr)
11727 %{
11728   match(Set rax (DivL rax div));
11729   effect(KILL rdx, KILL cr);
11730 
11731   ins_cost(30*100+10*100); // XXX
11732   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11733             "cmpq    rax, rdx\n\t"
11734             "jne,s   normal\n\t"
11735             "xorl    rdx, rdx\n\t"
11736             "cmpq    $div, -1\n\t"
11737             "je,s    done\n"
11738     "normal: cdqq\n\t"
11739             "idivq   $div\n"
11740     "done:"        %}
11741   ins_encode(cdqq_enc(div));
11742   ins_pipe(ialu_reg_reg_alu0);
11743 %}
11744 
11745 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11746 %{
11747   match(Set rax (UDivI rax div));
11748   effect(KILL rdx, KILL cr);
11749 
11750   ins_cost(300);
11751   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11752   ins_encode %{
11753     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11754   %}
11755   ins_pipe(ialu_reg_reg_alu0);
11756 %}
11757 
11758 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11759 %{
11760   match(Set rax (UDivL rax div));
11761   effect(KILL rdx, KILL cr);
11762 
11763   ins_cost(300);
11764   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11765   ins_encode %{
11766      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11767   %}
11768   ins_pipe(ialu_reg_reg_alu0);
11769 %}
11770 
11771 // Integer DIVMOD with Register, both quotient and mod results
11772 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11773                              rFlagsReg cr)
11774 %{
11775   match(DivModI rax div);
11776   effect(KILL cr);
11777 
11778   ins_cost(30*100+10*100); // XXX
11779   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11780             "jne,s   normal\n\t"
11781             "xorl    rdx, rdx\n\t"
11782             "cmpl    $div, -1\n\t"
11783             "je,s    done\n"
11784     "normal: cdql\n\t"
11785             "idivl   $div\n"
11786     "done:"        %}
11787   ins_encode(cdql_enc(div));
11788   ins_pipe(pipe_slow);
11789 %}
11790 
11791 // Long DIVMOD with Register, both quotient and mod results
11792 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11793                              rFlagsReg cr)
11794 %{
11795   match(DivModL rax div);
11796   effect(KILL cr);
11797 
11798   ins_cost(30*100+10*100); // XXX
11799   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11800             "cmpq    rax, rdx\n\t"
11801             "jne,s   normal\n\t"
11802             "xorl    rdx, rdx\n\t"
11803             "cmpq    $div, -1\n\t"
11804             "je,s    done\n"
11805     "normal: cdqq\n\t"
11806             "idivq   $div\n"
11807     "done:"        %}
11808   ins_encode(cdqq_enc(div));
11809   ins_pipe(pipe_slow);
11810 %}
11811 
11812 // Unsigned integer DIVMOD with Register, both quotient and mod results
11813 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11814                               no_rax_rdx_RegI div, rFlagsReg cr)
11815 %{
11816   match(UDivModI rax div);
11817   effect(TEMP tmp, KILL cr);
11818 
11819   ins_cost(300);
11820   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11821             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11822           %}
11823   ins_encode %{
11824     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11825   %}
11826   ins_pipe(pipe_slow);
11827 %}
11828 
11829 // Unsigned long DIVMOD with Register, both quotient and mod results
11830 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11831                               no_rax_rdx_RegL div, rFlagsReg cr)
11832 %{
11833   match(UDivModL rax div);
11834   effect(TEMP tmp, KILL cr);
11835 
11836   ins_cost(300);
11837   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11838             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11839           %}
11840   ins_encode %{
11841     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11842   %}
11843   ins_pipe(pipe_slow);
11844 %}
11845 
11846 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11847                    rFlagsReg cr)
11848 %{
11849   match(Set rdx (ModI rax div));
11850   effect(KILL rax, KILL cr);
11851 
11852   ins_cost(300); // XXX
11853   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11854             "jne,s   normal\n\t"
11855             "xorl    rdx, rdx\n\t"
11856             "cmpl    $div, -1\n\t"
11857             "je,s    done\n"
11858     "normal: cdql\n\t"
11859             "idivl   $div\n"
11860     "done:"        %}
11861   ins_encode(cdql_enc(div));
11862   ins_pipe(ialu_reg_reg_alu0);
11863 %}
11864 
11865 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11866                    rFlagsReg cr)
11867 %{
11868   match(Set rdx (ModL rax div));
11869   effect(KILL rax, KILL cr);
11870 
11871   ins_cost(300); // XXX
11872   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11873             "cmpq    rax, rdx\n\t"
11874             "jne,s   normal\n\t"
11875             "xorl    rdx, rdx\n\t"
11876             "cmpq    $div, -1\n\t"
11877             "je,s    done\n"
11878     "normal: cdqq\n\t"
11879             "idivq   $div\n"
11880     "done:"        %}
11881   ins_encode(cdqq_enc(div));
11882   ins_pipe(ialu_reg_reg_alu0);
11883 %}
11884 
11885 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11886 %{
11887   match(Set rdx (UModI rax div));
11888   effect(KILL rax, KILL cr);
11889 
11890   ins_cost(300);
11891   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11892   ins_encode %{
11893     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11894   %}
11895   ins_pipe(ialu_reg_reg_alu0);
11896 %}
11897 
11898 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11899 %{
11900   match(Set rdx (UModL rax div));
11901   effect(KILL rax, KILL cr);
11902 
11903   ins_cost(300);
11904   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11905   ins_encode %{
11906     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11907   %}
11908   ins_pipe(ialu_reg_reg_alu0);
11909 %}
11910 
11911 // Integer Shift Instructions
11912 // Shift Left by one, two, three
11913 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11914 %{
11915   predicate(!UseAPX);
11916   match(Set dst (LShiftI dst shift));
11917   effect(KILL cr);
11918 
11919   format %{ "sall    $dst, $shift" %}
11920   ins_encode %{
11921     __ sall($dst$$Register, $shift$$constant);
11922   %}
11923   ins_pipe(ialu_reg);
11924 %}
11925 
11926 // Shift Left by one, two, three
11927 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11928 %{
11929   predicate(UseAPX);
11930   match(Set dst (LShiftI src shift));
11931   effect(KILL cr);
11932   flag(PD::Flag_ndd_demotable_opr1);
11933 
11934   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
11935   ins_encode %{
11936     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11937   %}
11938   ins_pipe(ialu_reg);
11939 %}
11940 
11941 // Shift Left by 8-bit immediate
11942 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11943 %{
11944   predicate(!UseAPX);
11945   match(Set dst (LShiftI dst shift));
11946   effect(KILL cr);
11947 
11948   format %{ "sall    $dst, $shift" %}
11949   ins_encode %{
11950     __ sall($dst$$Register, $shift$$constant);
11951   %}
11952   ins_pipe(ialu_reg);
11953 %}
11954 
11955 // Shift Left by 8-bit immediate
11956 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11957 %{
11958   predicate(UseAPX);
11959   match(Set dst (LShiftI src shift));
11960   effect(KILL cr);
11961   flag(PD::Flag_ndd_demotable_opr1);
11962 
11963   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11964   ins_encode %{
11965     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11966   %}
11967   ins_pipe(ialu_reg);
11968 %}
11969 
11970 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11971 %{
11972   predicate(UseAPX);
11973   match(Set dst (LShiftI (LoadI src) shift));
11974   effect(KILL cr);
11975 
11976   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11977   ins_encode %{
11978     __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11979   %}
11980   ins_pipe(ialu_reg);
11981 %}
11982 
11983 // Shift Left by 8-bit immediate
11984 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11985 %{
11986   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11987   effect(KILL cr);
11988 
11989   format %{ "sall    $dst, $shift" %}
11990   ins_encode %{
11991     __ sall($dst$$Address, $shift$$constant);
11992   %}
11993   ins_pipe(ialu_mem_imm);
11994 %}
11995 
11996 // Shift Left by variable
11997 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11998 %{
11999   predicate(!VM_Version::supports_bmi2());
12000   match(Set dst (LShiftI dst shift));
12001   effect(KILL cr);
12002 
12003   format %{ "sall    $dst, $shift" %}
12004   ins_encode %{
12005     __ sall($dst$$Register);
12006   %}
12007   ins_pipe(ialu_reg_reg);
12008 %}
12009 
12010 // Shift Left by variable
12011 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12012 %{
12013   predicate(!VM_Version::supports_bmi2());
12014   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12015   effect(KILL cr);
12016 
12017   format %{ "sall    $dst, $shift" %}
12018   ins_encode %{
12019     __ sall($dst$$Address);
12020   %}
12021   ins_pipe(ialu_mem_reg);
12022 %}
12023 
12024 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12025 %{
12026   predicate(VM_Version::supports_bmi2());
12027   match(Set dst (LShiftI src shift));
12028 
12029   format %{ "shlxl   $dst, $src, $shift" %}
12030   ins_encode %{
12031     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12032   %}
12033   ins_pipe(ialu_reg_reg);
12034 %}
12035 
12036 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12037 %{
12038   predicate(VM_Version::supports_bmi2());
12039   match(Set dst (LShiftI (LoadI src) shift));
12040   ins_cost(175);
12041   format %{ "shlxl   $dst, $src, $shift" %}
12042   ins_encode %{
12043     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12044   %}
12045   ins_pipe(ialu_reg_mem);
12046 %}
12047 
12048 // Arithmetic Shift Right by 8-bit immediate
12049 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12050 %{
12051   predicate(!UseAPX);
12052   match(Set dst (RShiftI dst shift));
12053   effect(KILL cr);
12054 
12055   format %{ "sarl    $dst, $shift" %}
12056   ins_encode %{
12057     __ sarl($dst$$Register, $shift$$constant);
12058   %}
12059   ins_pipe(ialu_mem_imm);
12060 %}
12061 
12062 // Arithmetic Shift Right by 8-bit immediate
12063 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12064 %{
12065   predicate(UseAPX);
12066   match(Set dst (RShiftI src shift));
12067   effect(KILL cr);
12068   flag(PD::Flag_ndd_demotable_opr1);
12069 
12070   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12071   ins_encode %{
12072     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12073   %}
12074   ins_pipe(ialu_mem_imm);
12075 %}
12076 
12077 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12078 %{
12079   predicate(UseAPX);
12080   match(Set dst (RShiftI (LoadI src) shift));
12081   effect(KILL cr);
12082 
12083   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12084   ins_encode %{
12085     __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12086   %}
12087   ins_pipe(ialu_mem_imm);
12088 %}
12089 
12090 // Arithmetic Shift Right by 8-bit immediate
12091 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12092 %{
12093   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12094   effect(KILL cr);
12095 
12096   format %{ "sarl    $dst, $shift" %}
12097   ins_encode %{
12098     __ sarl($dst$$Address, $shift$$constant);
12099   %}
12100   ins_pipe(ialu_mem_imm);
12101 %}
12102 
12103 // Arithmetic Shift Right by variable
12104 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12105 %{
12106   predicate(!VM_Version::supports_bmi2());
12107   match(Set dst (RShiftI dst shift));
12108   effect(KILL cr);
12109 
12110   format %{ "sarl    $dst, $shift" %}
12111   ins_encode %{
12112     __ sarl($dst$$Register);
12113   %}
12114   ins_pipe(ialu_reg_reg);
12115 %}
12116 
12117 // Arithmetic Shift Right by variable
12118 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12119 %{
12120   predicate(!VM_Version::supports_bmi2());
12121   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12122   effect(KILL cr);
12123 
12124   format %{ "sarl    $dst, $shift" %}
12125   ins_encode %{
12126     __ sarl($dst$$Address);
12127   %}
12128   ins_pipe(ialu_mem_reg);
12129 %}
12130 
12131 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12132 %{
12133   predicate(VM_Version::supports_bmi2());
12134   match(Set dst (RShiftI src shift));
12135 
12136   format %{ "sarxl   $dst, $src, $shift" %}
12137   ins_encode %{
12138     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12139   %}
12140   ins_pipe(ialu_reg_reg);
12141 %}
12142 
12143 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12144 %{
12145   predicate(VM_Version::supports_bmi2());
12146   match(Set dst (RShiftI (LoadI src) shift));
12147   ins_cost(175);
12148   format %{ "sarxl   $dst, $src, $shift" %}
12149   ins_encode %{
12150     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12151   %}
12152   ins_pipe(ialu_reg_mem);
12153 %}
12154 
12155 // Logical Shift Right by 8-bit immediate
12156 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12157 %{
12158   predicate(!UseAPX);
12159   match(Set dst (URShiftI dst shift));
12160   effect(KILL cr);
12161 
12162   format %{ "shrl    $dst, $shift" %}
12163   ins_encode %{
12164     __ shrl($dst$$Register, $shift$$constant);
12165   %}
12166   ins_pipe(ialu_reg);
12167 %}
12168 
12169 // Logical Shift Right by 8-bit immediate
12170 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12171 %{
12172   predicate(UseAPX);
12173   match(Set dst (URShiftI src shift));
12174   effect(KILL cr);
12175   flag(PD::Flag_ndd_demotable_opr1);
12176 
12177   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12178   ins_encode %{
12179     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12180   %}
12181   ins_pipe(ialu_reg);
12182 %}
12183 
12184 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12185 %{
12186   predicate(UseAPX);
12187   match(Set dst (URShiftI (LoadI src) shift));
12188   effect(KILL cr);
12189 
12190   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12191   ins_encode %{
12192     __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12193   %}
12194   ins_pipe(ialu_reg);
12195 %}
12196 
12197 // Logical Shift Right by 8-bit immediate
12198 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12199 %{
12200   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12201   effect(KILL cr);
12202 
12203   format %{ "shrl    $dst, $shift" %}
12204   ins_encode %{
12205     __ shrl($dst$$Address, $shift$$constant);
12206   %}
12207   ins_pipe(ialu_mem_imm);
12208 %}
12209 
12210 // Logical Shift Right by variable
12211 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12212 %{
12213   predicate(!VM_Version::supports_bmi2());
12214   match(Set dst (URShiftI dst shift));
12215   effect(KILL cr);
12216 
12217   format %{ "shrl    $dst, $shift" %}
12218   ins_encode %{
12219     __ shrl($dst$$Register);
12220   %}
12221   ins_pipe(ialu_reg_reg);
12222 %}
12223 
12224 // Logical Shift Right by variable
12225 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12226 %{
12227   predicate(!VM_Version::supports_bmi2());
12228   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12229   effect(KILL cr);
12230 
12231   format %{ "shrl    $dst, $shift" %}
12232   ins_encode %{
12233     __ shrl($dst$$Address);
12234   %}
12235   ins_pipe(ialu_mem_reg);
12236 %}
12237 
12238 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12239 %{
12240   predicate(VM_Version::supports_bmi2());
12241   match(Set dst (URShiftI src shift));
12242 
12243   format %{ "shrxl   $dst, $src, $shift" %}
12244   ins_encode %{
12245     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12246   %}
12247   ins_pipe(ialu_reg_reg);
12248 %}
12249 
12250 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12251 %{
12252   predicate(VM_Version::supports_bmi2());
12253   match(Set dst (URShiftI (LoadI src) shift));
12254   ins_cost(175);
12255   format %{ "shrxl   $dst, $src, $shift" %}
12256   ins_encode %{
12257     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12258   %}
12259   ins_pipe(ialu_reg_mem);
12260 %}
12261 
12262 // Long Shift Instructions
12263 // Shift Left by one, two, three
12264 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12265 %{
12266   predicate(!UseAPX);
12267   match(Set dst (LShiftL dst shift));
12268   effect(KILL cr);
12269 
12270   format %{ "salq    $dst, $shift" %}
12271   ins_encode %{
12272     __ salq($dst$$Register, $shift$$constant);
12273   %}
12274   ins_pipe(ialu_reg);
12275 %}
12276 
12277 // Shift Left by one, two, three
12278 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12279 %{
12280   predicate(UseAPX);
12281   match(Set dst (LShiftL src shift));
12282   effect(KILL cr);
12283   flag(PD::Flag_ndd_demotable_opr1);
12284 
12285   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12286   ins_encode %{
12287     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12288   %}
12289   ins_pipe(ialu_reg);
12290 %}
12291 
12292 // Shift Left by 8-bit immediate
12293 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12294 %{
12295   predicate(!UseAPX);
12296   match(Set dst (LShiftL dst shift));
12297   effect(KILL cr);
12298 
12299   format %{ "salq    $dst, $shift" %}
12300   ins_encode %{
12301     __ salq($dst$$Register, $shift$$constant);
12302   %}
12303   ins_pipe(ialu_reg);
12304 %}
12305 
12306 // Shift Left by 8-bit immediate
12307 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12308 %{
12309   predicate(UseAPX);
12310   match(Set dst (LShiftL src shift));
12311   effect(KILL cr);
12312   flag(PD::Flag_ndd_demotable_opr1);
12313 
12314   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12315   ins_encode %{
12316     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12317   %}
12318   ins_pipe(ialu_reg);
12319 %}
12320 
12321 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12322 %{
12323   predicate(UseAPX);
12324   match(Set dst (LShiftL (LoadL src) shift));
12325   effect(KILL cr);
12326 
12327   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12328   ins_encode %{
12329     __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12330   %}
12331   ins_pipe(ialu_reg);
12332 %}
12333 
12334 // Shift Left by 8-bit immediate
12335 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12336 %{
12337   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12338   effect(KILL cr);
12339 
12340   format %{ "salq    $dst, $shift" %}
12341   ins_encode %{
12342     __ salq($dst$$Address, $shift$$constant);
12343   %}
12344   ins_pipe(ialu_mem_imm);
12345 %}
12346 
12347 // Shift Left by variable
12348 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12349 %{
12350   predicate(!VM_Version::supports_bmi2());
12351   match(Set dst (LShiftL dst shift));
12352   effect(KILL cr);
12353 
12354   format %{ "salq    $dst, $shift" %}
12355   ins_encode %{
12356     __ salq($dst$$Register);
12357   %}
12358   ins_pipe(ialu_reg_reg);
12359 %}
12360 
12361 // Shift Left by variable
12362 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12363 %{
12364   predicate(!VM_Version::supports_bmi2());
12365   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12366   effect(KILL cr);
12367 
12368   format %{ "salq    $dst, $shift" %}
12369   ins_encode %{
12370     __ salq($dst$$Address);
12371   %}
12372   ins_pipe(ialu_mem_reg);
12373 %}
12374 
12375 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12376 %{
12377   predicate(VM_Version::supports_bmi2());
12378   match(Set dst (LShiftL src shift));
12379 
12380   format %{ "shlxq   $dst, $src, $shift" %}
12381   ins_encode %{
12382     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12383   %}
12384   ins_pipe(ialu_reg_reg);
12385 %}
12386 
12387 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12388 %{
12389   predicate(VM_Version::supports_bmi2());
12390   match(Set dst (LShiftL (LoadL src) shift));
12391   ins_cost(175);
12392   format %{ "shlxq   $dst, $src, $shift" %}
12393   ins_encode %{
12394     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12395   %}
12396   ins_pipe(ialu_reg_mem);
12397 %}
12398 
12399 // Arithmetic Shift Right by 8-bit immediate
12400 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12401 %{
12402   predicate(!UseAPX);
12403   match(Set dst (RShiftL dst shift));
12404   effect(KILL cr);
12405 
12406   format %{ "sarq    $dst, $shift" %}
12407   ins_encode %{
12408     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12409   %}
12410   ins_pipe(ialu_mem_imm);
12411 %}
12412 
12413 // Arithmetic Shift Right by 8-bit immediate
12414 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12415 %{
12416   predicate(UseAPX);
12417   match(Set dst (RShiftL src shift));
12418   effect(KILL cr);
12419   flag(PD::Flag_ndd_demotable_opr1);
12420 
12421   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12422   ins_encode %{
12423     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12424   %}
12425   ins_pipe(ialu_mem_imm);
12426 %}
12427 
12428 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12429 %{
12430   predicate(UseAPX);
12431   match(Set dst (RShiftL (LoadL src) shift));
12432   effect(KILL cr);
12433 
12434   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12435   ins_encode %{
12436     __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12437   %}
12438   ins_pipe(ialu_mem_imm);
12439 %}
12440 
12441 // Arithmetic Shift Right by 8-bit immediate
12442 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12443 %{
12444   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12445   effect(KILL cr);
12446 
12447   format %{ "sarq    $dst, $shift" %}
12448   ins_encode %{
12449     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12450   %}
12451   ins_pipe(ialu_mem_imm);
12452 %}
12453 
12454 // Arithmetic Shift Right by variable
12455 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12456 %{
12457   predicate(!VM_Version::supports_bmi2());
12458   match(Set dst (RShiftL dst shift));
12459   effect(KILL cr);
12460 
12461   format %{ "sarq    $dst, $shift" %}
12462   ins_encode %{
12463     __ sarq($dst$$Register);
12464   %}
12465   ins_pipe(ialu_reg_reg);
12466 %}
12467 
12468 // Arithmetic Shift Right by variable
12469 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12470 %{
12471   predicate(!VM_Version::supports_bmi2());
12472   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12473   effect(KILL cr);
12474 
12475   format %{ "sarq    $dst, $shift" %}
12476   ins_encode %{
12477     __ sarq($dst$$Address);
12478   %}
12479   ins_pipe(ialu_mem_reg);
12480 %}
12481 
12482 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12483 %{
12484   predicate(VM_Version::supports_bmi2());
12485   match(Set dst (RShiftL src shift));
12486 
12487   format %{ "sarxq   $dst, $src, $shift" %}
12488   ins_encode %{
12489     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12490   %}
12491   ins_pipe(ialu_reg_reg);
12492 %}
12493 
12494 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12495 %{
12496   predicate(VM_Version::supports_bmi2());
12497   match(Set dst (RShiftL (LoadL src) shift));
12498   ins_cost(175);
12499   format %{ "sarxq   $dst, $src, $shift" %}
12500   ins_encode %{
12501     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12502   %}
12503   ins_pipe(ialu_reg_mem);
12504 %}
12505 
12506 // Logical Shift Right by 8-bit immediate
12507 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12508 %{
12509   predicate(!UseAPX);
12510   match(Set dst (URShiftL dst shift));
12511   effect(KILL cr);
12512 
12513   format %{ "shrq    $dst, $shift" %}
12514   ins_encode %{
12515     __ shrq($dst$$Register, $shift$$constant);
12516   %}
12517   ins_pipe(ialu_reg);
12518 %}
12519 
12520 // Logical Shift Right by 8-bit immediate
12521 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12522 %{
12523   predicate(UseAPX);
12524   match(Set dst (URShiftL src shift));
12525   effect(KILL cr);
12526   flag(PD::Flag_ndd_demotable_opr1);
12527 
12528   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12529   ins_encode %{
12530     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12531   %}
12532   ins_pipe(ialu_reg);
12533 %}
12534 
12535 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12536 %{
12537   predicate(UseAPX);
12538   match(Set dst (URShiftL (LoadL src) shift));
12539   effect(KILL cr);
12540 
12541   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12542   ins_encode %{
12543     __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12544   %}
12545   ins_pipe(ialu_reg);
12546 %}
12547 
12548 // Logical Shift Right by 8-bit immediate
12549 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12550 %{
12551   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12552   effect(KILL cr);
12553 
12554   format %{ "shrq    $dst, $shift" %}
12555   ins_encode %{
12556     __ shrq($dst$$Address, $shift$$constant);
12557   %}
12558   ins_pipe(ialu_mem_imm);
12559 %}
12560 
12561 // Logical Shift Right by variable
12562 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12563 %{
12564   predicate(!VM_Version::supports_bmi2());
12565   match(Set dst (URShiftL dst shift));
12566   effect(KILL cr);
12567 
12568   format %{ "shrq    $dst, $shift" %}
12569   ins_encode %{
12570     __ shrq($dst$$Register);
12571   %}
12572   ins_pipe(ialu_reg_reg);
12573 %}
12574 
12575 // Logical Shift Right by variable
12576 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12577 %{
12578   predicate(!VM_Version::supports_bmi2());
12579   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12580   effect(KILL cr);
12581 
12582   format %{ "shrq    $dst, $shift" %}
12583   ins_encode %{
12584     __ shrq($dst$$Address);
12585   %}
12586   ins_pipe(ialu_mem_reg);
12587 %}
12588 
12589 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12590 %{
12591   predicate(VM_Version::supports_bmi2());
12592   match(Set dst (URShiftL src shift));
12593 
12594   format %{ "shrxq   $dst, $src, $shift" %}
12595   ins_encode %{
12596     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12597   %}
12598   ins_pipe(ialu_reg_reg);
12599 %}
12600 
12601 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12602 %{
12603   predicate(VM_Version::supports_bmi2());
12604   match(Set dst (URShiftL (LoadL src) shift));
12605   ins_cost(175);
12606   format %{ "shrxq   $dst, $src, $shift" %}
12607   ins_encode %{
12608     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12609   %}
12610   ins_pipe(ialu_reg_mem);
12611 %}
12612 
12613 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12614 // This idiom is used by the compiler for the i2b bytecode.
12615 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12616 %{
12617   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12618 
12619   format %{ "movsbl  $dst, $src\t# i2b" %}
12620   ins_encode %{
12621     __ movsbl($dst$$Register, $src$$Register);
12622   %}
12623   ins_pipe(ialu_reg_reg);
12624 %}
12625 
12626 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12627 // This idiom is used by the compiler the i2s bytecode.
12628 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12629 %{
12630   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12631 
12632   format %{ "movswl  $dst, $src\t# i2s" %}
12633   ins_encode %{
12634     __ movswl($dst$$Register, $src$$Register);
12635   %}
12636   ins_pipe(ialu_reg_reg);
12637 %}
12638 
12639 // ROL/ROR instructions
12640 
12641 // Rotate left by constant.
12642 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12643 %{
12644   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12645   match(Set dst (RotateLeft dst shift));
12646   effect(KILL cr);
12647   format %{ "roll    $dst, $shift" %}
12648   ins_encode %{
12649     __ roll($dst$$Register, $shift$$constant);
12650   %}
12651   ins_pipe(ialu_reg);
12652 %}
12653 
12654 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12655 %{
12656   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12657   match(Set dst (RotateLeft src shift));
12658   format %{ "rolxl   $dst, $src, $shift" %}
12659   ins_encode %{
12660     int shift = 32 - ($shift$$constant & 31);
12661     __ rorxl($dst$$Register, $src$$Register, shift);
12662   %}
12663   ins_pipe(ialu_reg_reg);
12664 %}
12665 
12666 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12667 %{
12668   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12669   match(Set dst (RotateLeft (LoadI src) shift));
12670   ins_cost(175);
12671   format %{ "rolxl   $dst, $src, $shift" %}
12672   ins_encode %{
12673     int shift = 32 - ($shift$$constant & 31);
12674     __ rorxl($dst$$Register, $src$$Address, shift);
12675   %}
12676   ins_pipe(ialu_reg_mem);
12677 %}
12678 
12679 // Rotate Left by variable
12680 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12681 %{
12682   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12683   match(Set dst (RotateLeft dst shift));
12684   effect(KILL cr);
12685   format %{ "roll    $dst, $shift" %}
12686   ins_encode %{
12687     __ roll($dst$$Register);
12688   %}
12689   ins_pipe(ialu_reg_reg);
12690 %}
12691 
12692 // Rotate Left by variable
12693 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12694 %{
12695   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12696   match(Set dst (RotateLeft src shift));
12697   effect(KILL cr);
12698   flag(PD::Flag_ndd_demotable_opr1);
12699 
12700   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12701   ins_encode %{
12702     __ eroll($dst$$Register, $src$$Register, false);
12703   %}
12704   ins_pipe(ialu_reg_reg);
12705 %}
12706 
12707 // Rotate Right by constant.
12708 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12709 %{
12710   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12711   match(Set dst (RotateRight dst shift));
12712   effect(KILL cr);
12713   format %{ "rorl    $dst, $shift" %}
12714   ins_encode %{
12715     __ rorl($dst$$Register, $shift$$constant);
12716   %}
12717   ins_pipe(ialu_reg);
12718 %}
12719 
12720 // Rotate Right by constant.
12721 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12722 %{
12723   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12724   match(Set dst (RotateRight src shift));
12725   format %{ "rorxl   $dst, $src, $shift" %}
12726   ins_encode %{
12727     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12728   %}
12729   ins_pipe(ialu_reg_reg);
12730 %}
12731 
12732 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12733 %{
12734   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12735   match(Set dst (RotateRight (LoadI src) shift));
12736   ins_cost(175);
12737   format %{ "rorxl   $dst, $src, $shift" %}
12738   ins_encode %{
12739     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12740   %}
12741   ins_pipe(ialu_reg_mem);
12742 %}
12743 
12744 // Rotate Right by variable
12745 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12746 %{
12747   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12748   match(Set dst (RotateRight dst shift));
12749   effect(KILL cr);
12750   format %{ "rorl    $dst, $shift" %}
12751   ins_encode %{
12752     __ rorl($dst$$Register);
12753   %}
12754   ins_pipe(ialu_reg_reg);
12755 %}
12756 
12757 // Rotate Right by variable
12758 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12759 %{
12760   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12761   match(Set dst (RotateRight src shift));
12762   effect(KILL cr);
12763   flag(PD::Flag_ndd_demotable_opr1);
12764 
12765   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12766   ins_encode %{
12767     __ erorl($dst$$Register, $src$$Register, false);
12768   %}
12769   ins_pipe(ialu_reg_reg);
12770 %}
12771 
12772 // Rotate Left by constant.
12773 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12774 %{
12775   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12776   match(Set dst (RotateLeft dst shift));
12777   effect(KILL cr);
12778   format %{ "rolq    $dst, $shift" %}
12779   ins_encode %{
12780     __ rolq($dst$$Register, $shift$$constant);
12781   %}
12782   ins_pipe(ialu_reg);
12783 %}
12784 
12785 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12786 %{
12787   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12788   match(Set dst (RotateLeft src shift));
12789   format %{ "rolxq   $dst, $src, $shift" %}
12790   ins_encode %{
12791     int shift = 64 - ($shift$$constant & 63);
12792     __ rorxq($dst$$Register, $src$$Register, shift);
12793   %}
12794   ins_pipe(ialu_reg_reg);
12795 %}
12796 
12797 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12798 %{
12799   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12800   match(Set dst (RotateLeft (LoadL src) shift));
12801   ins_cost(175);
12802   format %{ "rolxq   $dst, $src, $shift" %}
12803   ins_encode %{
12804     int shift = 64 - ($shift$$constant & 63);
12805     __ rorxq($dst$$Register, $src$$Address, shift);
12806   %}
12807   ins_pipe(ialu_reg_mem);
12808 %}
12809 
12810 // Rotate Left by variable
12811 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12812 %{
12813   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12814   match(Set dst (RotateLeft dst shift));
12815   effect(KILL cr);
12816 
12817   format %{ "rolq    $dst, $shift" %}
12818   ins_encode %{
12819     __ rolq($dst$$Register);
12820   %}
12821   ins_pipe(ialu_reg_reg);
12822 %}
12823 
12824 // Rotate Left by variable
12825 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12826 %{
12827   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12828   match(Set dst (RotateLeft src shift));
12829   effect(KILL cr);
12830   flag(PD::Flag_ndd_demotable_opr1);
12831 
12832   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12833   ins_encode %{
12834     __ erolq($dst$$Register, $src$$Register, false);
12835   %}
12836   ins_pipe(ialu_reg_reg);
12837 %}
12838 
12839 // Rotate Right by constant.
12840 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12841 %{
12842   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12843   match(Set dst (RotateRight dst shift));
12844   effect(KILL cr);
12845   format %{ "rorq    $dst, $shift" %}
12846   ins_encode %{
12847     __ rorq($dst$$Register, $shift$$constant);
12848   %}
12849   ins_pipe(ialu_reg);
12850 %}
12851 
12852 // Rotate Right by constant
12853 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12854 %{
12855   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12856   match(Set dst (RotateRight src shift));
12857   format %{ "rorxq   $dst, $src, $shift" %}
12858   ins_encode %{
12859     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12860   %}
12861   ins_pipe(ialu_reg_reg);
12862 %}
12863 
12864 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12865 %{
12866   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12867   match(Set dst (RotateRight (LoadL src) shift));
12868   ins_cost(175);
12869   format %{ "rorxq   $dst, $src, $shift" %}
12870   ins_encode %{
12871     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12872   %}
12873   ins_pipe(ialu_reg_mem);
12874 %}
12875 
12876 // Rotate Right by variable
12877 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12878 %{
12879   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12880   match(Set dst (RotateRight dst shift));
12881   effect(KILL cr);
12882   format %{ "rorq    $dst, $shift" %}
12883   ins_encode %{
12884     __ rorq($dst$$Register);
12885   %}
12886   ins_pipe(ialu_reg_reg);
12887 %}
12888 
12889 // Rotate Right by variable
12890 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12891 %{
12892   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12893   match(Set dst (RotateRight src shift));
12894   effect(KILL cr);
12895   flag(PD::Flag_ndd_demotable_opr1);
12896 
12897   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12898   ins_encode %{
12899     __ erorq($dst$$Register, $src$$Register, false);
12900   %}
12901   ins_pipe(ialu_reg_reg);
12902 %}
12903 
12904 //----------------------------- CompressBits/ExpandBits ------------------------
12905 
12906 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12907   predicate(n->bottom_type()->isa_long());
12908   match(Set dst (CompressBits src mask));
12909   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12910   ins_encode %{
12911     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12912   %}
12913   ins_pipe( pipe_slow );
12914 %}
12915 
12916 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12917   predicate(n->bottom_type()->isa_long());
12918   match(Set dst (ExpandBits src mask));
12919   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12920   ins_encode %{
12921     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12922   %}
12923   ins_pipe( pipe_slow );
12924 %}
12925 
12926 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12927   predicate(n->bottom_type()->isa_long());
12928   match(Set dst (CompressBits src (LoadL mask)));
12929   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12930   ins_encode %{
12931     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12932   %}
12933   ins_pipe( pipe_slow );
12934 %}
12935 
12936 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12937   predicate(n->bottom_type()->isa_long());
12938   match(Set dst (ExpandBits src (LoadL mask)));
12939   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12940   ins_encode %{
12941     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12942   %}
12943   ins_pipe( pipe_slow );
12944 %}
12945 
12946 
12947 // Logical Instructions
12948 
12949 // Integer Logical Instructions
12950 
12951 // And Instructions
12952 // And Register with Register
12953 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12954 %{
12955   predicate(!UseAPX);
12956   match(Set dst (AndI dst src));
12957   effect(KILL cr);
12958   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12959 
12960   format %{ "andl    $dst, $src\t# int" %}
12961   ins_encode %{
12962     __ andl($dst$$Register, $src$$Register);
12963   %}
12964   ins_pipe(ialu_reg_reg);
12965 %}
12966 
12967 // And Register with Register using New Data Destination (NDD)
12968 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12969 %{
12970   predicate(UseAPX);
12971   match(Set dst (AndI src1 src2));
12972   effect(KILL cr);
12973   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12974 
12975   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
12976   ins_encode %{
12977     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12978 
12979   %}
12980   ins_pipe(ialu_reg_reg);
12981 %}
12982 
12983 // And Register with Immediate 255
12984 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12985 %{
12986   match(Set dst (AndI src mask));
12987 
12988   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
12989   ins_encode %{
12990     __ movzbl($dst$$Register, $src$$Register);
12991   %}
12992   ins_pipe(ialu_reg);
12993 %}
12994 
12995 // And Register with Immediate 255 and promote to long
12996 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12997 %{
12998   match(Set dst (ConvI2L (AndI src mask)));
12999 
13000   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
13001   ins_encode %{
13002     __ movzbl($dst$$Register, $src$$Register);
13003   %}
13004   ins_pipe(ialu_reg);
13005 %}
13006 
13007 // And Register with Immediate 65535
13008 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13009 %{
13010   match(Set dst (AndI src mask));
13011 
13012   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
13013   ins_encode %{
13014     __ movzwl($dst$$Register, $src$$Register);
13015   %}
13016   ins_pipe(ialu_reg);
13017 %}
13018 
13019 // And Register with Immediate 65535 and promote to long
13020 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13021 %{
13022   match(Set dst (ConvI2L (AndI src mask)));
13023 
13024   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
13025   ins_encode %{
13026     __ movzwl($dst$$Register, $src$$Register);
13027   %}
13028   ins_pipe(ialu_reg);
13029 %}
13030 
13031 // Can skip int2long conversions after AND with small bitmask
13032 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13033 %{
13034   predicate(VM_Version::supports_bmi2());
13035   ins_cost(125);
13036   effect(TEMP tmp, KILL cr);
13037   match(Set dst (ConvI2L (AndI src mask)));
13038   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
13039   ins_encode %{
13040     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13041     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13042   %}
13043   ins_pipe(ialu_reg_reg);
13044 %}
13045 
13046 // And Register with Immediate
13047 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13048 %{
13049   predicate(!UseAPX);
13050   match(Set dst (AndI dst src));
13051   effect(KILL cr);
13052   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13053 
13054   format %{ "andl    $dst, $src\t# int" %}
13055   ins_encode %{
13056     __ andl($dst$$Register, $src$$constant);
13057   %}
13058   ins_pipe(ialu_reg);
13059 %}
13060 
13061 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13062 %{
13063   predicate(UseAPX);
13064   match(Set dst (AndI src1 src2));
13065   effect(KILL cr);
13066   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13067 
13068   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13069   ins_encode %{
13070     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13071   %}
13072   ins_pipe(ialu_reg);
13073 %}
13074 
13075 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13076 %{
13077   predicate(UseAPX);
13078   match(Set dst (AndI (LoadI src1) src2));
13079   effect(KILL cr);
13080   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13081 
13082   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13083   ins_encode %{
13084     __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13085   %}
13086   ins_pipe(ialu_reg);
13087 %}
13088 
13089 // And Register with Memory
13090 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13091 %{
13092   predicate(!UseAPX);
13093   match(Set dst (AndI dst (LoadI src)));
13094   effect(KILL cr);
13095   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13096 
13097   ins_cost(150);
13098   format %{ "andl    $dst, $src\t# int" %}
13099   ins_encode %{
13100     __ andl($dst$$Register, $src$$Address);
13101   %}
13102   ins_pipe(ialu_reg_mem);
13103 %}
13104 
13105 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13106 %{
13107   predicate(UseAPX);
13108   match(Set dst (AndI src1 (LoadI src2)));
13109   effect(KILL cr);
13110   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13111 
13112   ins_cost(150);
13113   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13114   ins_encode %{
13115     __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13116   %}
13117   ins_pipe(ialu_reg_mem);
13118 %}
13119 
13120 // And Memory with Register
13121 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13122 %{
13123   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13124   effect(KILL cr);
13125   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13126 
13127   ins_cost(150);
13128   format %{ "andb    $dst, $src\t# byte" %}
13129   ins_encode %{
13130     __ andb($dst$$Address, $src$$Register);
13131   %}
13132   ins_pipe(ialu_mem_reg);
13133 %}
13134 
13135 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13136 %{
13137   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13138   effect(KILL cr);
13139   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13140 
13141   ins_cost(150);
13142   format %{ "andl    $dst, $src\t# int" %}
13143   ins_encode %{
13144     __ andl($dst$$Address, $src$$Register);
13145   %}
13146   ins_pipe(ialu_mem_reg);
13147 %}
13148 
13149 // And Memory with Immediate
13150 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13151 %{
13152   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13153   effect(KILL cr);
13154   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13155 
13156   ins_cost(125);
13157   format %{ "andl    $dst, $src\t# int" %}
13158   ins_encode %{
13159     __ andl($dst$$Address, $src$$constant);
13160   %}
13161   ins_pipe(ialu_mem_imm);
13162 %}
13163 
13164 // BMI1 instructions
13165 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13166   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13167   predicate(UseBMI1Instructions);
13168   effect(KILL cr);
13169   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13170 
13171   ins_cost(125);
13172   format %{ "andnl  $dst, $src1, $src2" %}
13173 
13174   ins_encode %{
13175     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13176   %}
13177   ins_pipe(ialu_reg_mem);
13178 %}
13179 
13180 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13181   match(Set dst (AndI (XorI src1 minus_1) src2));
13182   predicate(UseBMI1Instructions);
13183   effect(KILL cr);
13184   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13185 
13186   format %{ "andnl  $dst, $src1, $src2" %}
13187 
13188   ins_encode %{
13189     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13190   %}
13191   ins_pipe(ialu_reg);
13192 %}
13193 
13194 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13195   match(Set dst (AndI (SubI imm_zero src) src));
13196   predicate(UseBMI1Instructions);
13197   effect(KILL cr);
13198   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13199 
13200   format %{ "blsil  $dst, $src" %}
13201 
13202   ins_encode %{
13203     __ blsil($dst$$Register, $src$$Register);
13204   %}
13205   ins_pipe(ialu_reg);
13206 %}
13207 
13208 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13209   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13210   predicate(UseBMI1Instructions);
13211   effect(KILL cr);
13212   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13213 
13214   ins_cost(125);
13215   format %{ "blsil  $dst, $src" %}
13216 
13217   ins_encode %{
13218     __ blsil($dst$$Register, $src$$Address);
13219   %}
13220   ins_pipe(ialu_reg_mem);
13221 %}
13222 
13223 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13224 %{
13225   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13226   predicate(UseBMI1Instructions);
13227   effect(KILL cr);
13228   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13229 
13230   ins_cost(125);
13231   format %{ "blsmskl $dst, $src" %}
13232 
13233   ins_encode %{
13234     __ blsmskl($dst$$Register, $src$$Address);
13235   %}
13236   ins_pipe(ialu_reg_mem);
13237 %}
13238 
13239 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13240 %{
13241   match(Set dst (XorI (AddI src minus_1) src));
13242   predicate(UseBMI1Instructions);
13243   effect(KILL cr);
13244   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13245 
13246   format %{ "blsmskl $dst, $src" %}
13247 
13248   ins_encode %{
13249     __ blsmskl($dst$$Register, $src$$Register);
13250   %}
13251 
13252   ins_pipe(ialu_reg);
13253 %}
13254 
13255 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13256 %{
13257   match(Set dst (AndI (AddI src minus_1) src) );
13258   predicate(UseBMI1Instructions);
13259   effect(KILL cr);
13260   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13261 
13262   format %{ "blsrl  $dst, $src" %}
13263 
13264   ins_encode %{
13265     __ blsrl($dst$$Register, $src$$Register);
13266   %}
13267 
13268   ins_pipe(ialu_reg_mem);
13269 %}
13270 
13271 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13272 %{
13273   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13274   predicate(UseBMI1Instructions);
13275   effect(KILL cr);
13276   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13277 
13278   ins_cost(125);
13279   format %{ "blsrl  $dst, $src" %}
13280 
13281   ins_encode %{
13282     __ blsrl($dst$$Register, $src$$Address);
13283   %}
13284 
13285   ins_pipe(ialu_reg);
13286 %}
13287 
13288 // Or Instructions
13289 // Or Register with Register
13290 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13291 %{
13292   predicate(!UseAPX);
13293   match(Set dst (OrI dst src));
13294   effect(KILL cr);
13295   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13296 
13297   format %{ "orl     $dst, $src\t# int" %}
13298   ins_encode %{
13299     __ orl($dst$$Register, $src$$Register);
13300   %}
13301   ins_pipe(ialu_reg_reg);
13302 %}
13303 
13304 // Or Register with Register using New Data Destination (NDD)
13305 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13306 %{
13307   predicate(UseAPX);
13308   match(Set dst (OrI src1 src2));
13309   effect(KILL cr);
13310   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13311 
13312   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13313   ins_encode %{
13314     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13315   %}
13316   ins_pipe(ialu_reg_reg);
13317 %}
13318 
13319 // Or Register with Immediate
13320 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13321 %{
13322   predicate(!UseAPX);
13323   match(Set dst (OrI dst src));
13324   effect(KILL cr);
13325   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13326 
13327   format %{ "orl     $dst, $src\t# int" %}
13328   ins_encode %{
13329     __ orl($dst$$Register, $src$$constant);
13330   %}
13331   ins_pipe(ialu_reg);
13332 %}
13333 
13334 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13335 %{
13336   predicate(UseAPX);
13337   match(Set dst (OrI src1 src2));
13338   effect(KILL cr);
13339   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13340 
13341   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13342   ins_encode %{
13343     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13344   %}
13345   ins_pipe(ialu_reg);
13346 %}
13347 
13348 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13349 %{
13350   predicate(UseAPX);
13351   match(Set dst (OrI src1 src2));
13352   effect(KILL cr);
13353   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13354 
13355   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
13356   ins_encode %{
13357     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13358   %}
13359   ins_pipe(ialu_reg);
13360 %}
13361 
13362 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13363 %{
13364   predicate(UseAPX);
13365   match(Set dst (OrI (LoadI src1) src2));
13366   effect(KILL cr);
13367   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13368 
13369   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13370   ins_encode %{
13371     __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13372   %}
13373   ins_pipe(ialu_reg);
13374 %}
13375 
13376 // Or Register with Memory
13377 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13378 %{
13379   predicate(!UseAPX);
13380   match(Set dst (OrI dst (LoadI src)));
13381   effect(KILL cr);
13382   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13383 
13384   ins_cost(150);
13385   format %{ "orl     $dst, $src\t# int" %}
13386   ins_encode %{
13387     __ orl($dst$$Register, $src$$Address);
13388   %}
13389   ins_pipe(ialu_reg_mem);
13390 %}
13391 
13392 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13393 %{
13394   predicate(UseAPX);
13395   match(Set dst (OrI src1 (LoadI src2)));
13396   effect(KILL cr);
13397   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13398 
13399   ins_cost(150);
13400   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13401   ins_encode %{
13402     __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13403   %}
13404   ins_pipe(ialu_reg_mem);
13405 %}
13406 
13407 // Or Memory with Register
13408 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13409 %{
13410   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13411   effect(KILL cr);
13412   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13413 
13414   ins_cost(150);
13415   format %{ "orb    $dst, $src\t# byte" %}
13416   ins_encode %{
13417     __ orb($dst$$Address, $src$$Register);
13418   %}
13419   ins_pipe(ialu_mem_reg);
13420 %}
13421 
13422 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13423 %{
13424   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13425   effect(KILL cr);
13426   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13427 
13428   ins_cost(150);
13429   format %{ "orl     $dst, $src\t# int" %}
13430   ins_encode %{
13431     __ orl($dst$$Address, $src$$Register);
13432   %}
13433   ins_pipe(ialu_mem_reg);
13434 %}
13435 
13436 // Or Memory with Immediate
13437 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13438 %{
13439   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13440   effect(KILL cr);
13441   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13442 
13443   ins_cost(125);
13444   format %{ "orl     $dst, $src\t# int" %}
13445   ins_encode %{
13446     __ orl($dst$$Address, $src$$constant);
13447   %}
13448   ins_pipe(ialu_mem_imm);
13449 %}
13450 
13451 // Xor Instructions
13452 // Xor Register with Register
13453 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13454 %{
13455   predicate(!UseAPX);
13456   match(Set dst (XorI dst src));
13457   effect(KILL cr);
13458   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13459 
13460   format %{ "xorl    $dst, $src\t# int" %}
13461   ins_encode %{
13462     __ xorl($dst$$Register, $src$$Register);
13463   %}
13464   ins_pipe(ialu_reg_reg);
13465 %}
13466 
13467 // Xor Register with Register using New Data Destination (NDD)
13468 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13469 %{
13470   predicate(UseAPX);
13471   match(Set dst (XorI src1 src2));
13472   effect(KILL cr);
13473   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13474 
13475   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13476   ins_encode %{
13477     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13478   %}
13479   ins_pipe(ialu_reg_reg);
13480 %}
13481 
13482 // Xor Register with Immediate -1
13483 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13484 %{
13485   predicate(!UseAPX);
13486   match(Set dst (XorI dst imm));
13487 
13488   format %{ "notl    $dst" %}
13489   ins_encode %{
13490      __ notl($dst$$Register);
13491   %}
13492   ins_pipe(ialu_reg);
13493 %}
13494 
13495 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13496 %{
13497   match(Set dst (XorI src imm));
13498   predicate(UseAPX);
13499   flag(PD::Flag_ndd_demotable_opr1);
13500 
13501   format %{ "enotl    $dst, $src" %}
13502   ins_encode %{
13503      __ enotl($dst$$Register, $src$$Register);
13504   %}
13505   ins_pipe(ialu_reg);
13506 %}
13507 
13508 // Xor Register with Immediate
13509 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13510 %{
13511   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13512   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13513   match(Set dst (XorI dst src));
13514   effect(KILL cr);
13515   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13516 
13517   format %{ "xorl    $dst, $src\t# int" %}
13518   ins_encode %{
13519     __ xorl($dst$$Register, $src$$constant);
13520   %}
13521   ins_pipe(ialu_reg);
13522 %}
13523 
13524 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13525 %{
13526   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13527   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13528   match(Set dst (XorI src1 src2));
13529   effect(KILL cr);
13530   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13531 
13532   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13533   ins_encode %{
13534     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13535   %}
13536   ins_pipe(ialu_reg);
13537 %}
13538 
13539 // Xor Memory with Immediate
13540 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13541 %{
13542   predicate(UseAPX);
13543   match(Set dst (XorI (LoadI src1) src2));
13544   effect(KILL cr);
13545   ins_cost(150);
13546   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13547 
13548   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13549   ins_encode %{
13550     __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13551   %}
13552   ins_pipe(ialu_reg);
13553 %}
13554 
13555 // Xor Register with Memory
13556 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13557 %{
13558   predicate(!UseAPX);
13559   match(Set dst (XorI dst (LoadI src)));
13560   effect(KILL cr);
13561   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13562 
13563   ins_cost(150);
13564   format %{ "xorl    $dst, $src\t# int" %}
13565   ins_encode %{
13566     __ xorl($dst$$Register, $src$$Address);
13567   %}
13568   ins_pipe(ialu_reg_mem);
13569 %}
13570 
13571 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13572 %{
13573   predicate(UseAPX);
13574   match(Set dst (XorI src1 (LoadI src2)));
13575   effect(KILL cr);
13576   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13577 
13578   ins_cost(150);
13579   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13580   ins_encode %{
13581     __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13582   %}
13583   ins_pipe(ialu_reg_mem);
13584 %}
13585 
13586 // Xor Memory with Register
13587 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13588 %{
13589   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13590   effect(KILL cr);
13591   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13592 
13593   ins_cost(150);
13594   format %{ "xorb    $dst, $src\t# byte" %}
13595   ins_encode %{
13596     __ xorb($dst$$Address, $src$$Register);
13597   %}
13598   ins_pipe(ialu_mem_reg);
13599 %}
13600 
13601 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13602 %{
13603   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13604   effect(KILL cr);
13605   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13606 
13607   ins_cost(150);
13608   format %{ "xorl    $dst, $src\t# int" %}
13609   ins_encode %{
13610     __ xorl($dst$$Address, $src$$Register);
13611   %}
13612   ins_pipe(ialu_mem_reg);
13613 %}
13614 
13615 // Xor Memory with Immediate
13616 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13617 %{
13618   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13619   effect(KILL cr);
13620   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13621 
13622   ins_cost(125);
13623   format %{ "xorl    $dst, $src\t# int" %}
13624   ins_encode %{
13625     __ xorl($dst$$Address, $src$$constant);
13626   %}
13627   ins_pipe(ialu_mem_imm);
13628 %}
13629 
13630 
13631 // Long Logical Instructions
13632 
13633 // And Instructions
13634 // And Register with Register
13635 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13636 %{
13637   predicate(!UseAPX);
13638   match(Set dst (AndL dst src));
13639   effect(KILL cr);
13640   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13641 
13642   format %{ "andq    $dst, $src\t# long" %}
13643   ins_encode %{
13644     __ andq($dst$$Register, $src$$Register);
13645   %}
13646   ins_pipe(ialu_reg_reg);
13647 %}
13648 
13649 // And Register with Register using New Data Destination (NDD)
13650 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13651 %{
13652   predicate(UseAPX);
13653   match(Set dst (AndL src1 src2));
13654   effect(KILL cr);
13655   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13656 
13657   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13658   ins_encode %{
13659     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13660 
13661   %}
13662   ins_pipe(ialu_reg_reg);
13663 %}
13664 
13665 // And Register with Immediate 255
13666 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13667 %{
13668   match(Set dst (AndL src mask));
13669 
13670   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13671   ins_encode %{
13672     // movzbl zeroes out the upper 32-bit and does not need REX.W
13673     __ movzbl($dst$$Register, $src$$Register);
13674   %}
13675   ins_pipe(ialu_reg);
13676 %}
13677 
13678 // And Register with Immediate 65535
13679 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13680 %{
13681   match(Set dst (AndL src mask));
13682 
13683   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13684   ins_encode %{
13685     // movzwl zeroes out the upper 32-bit and does not need REX.W
13686     __ movzwl($dst$$Register, $src$$Register);
13687   %}
13688   ins_pipe(ialu_reg);
13689 %}
13690 
13691 // And Register with Immediate
13692 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13693 %{
13694   predicate(!UseAPX);
13695   match(Set dst (AndL dst src));
13696   effect(KILL cr);
13697   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13698 
13699   format %{ "andq    $dst, $src\t# long" %}
13700   ins_encode %{
13701     __ andq($dst$$Register, $src$$constant);
13702   %}
13703   ins_pipe(ialu_reg);
13704 %}
13705 
13706 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13707 %{
13708   predicate(UseAPX);
13709   match(Set dst (AndL src1 src2));
13710   effect(KILL cr);
13711   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13712 
13713   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13714   ins_encode %{
13715     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13716   %}
13717   ins_pipe(ialu_reg);
13718 %}
13719 
13720 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13721 %{
13722   predicate(UseAPX);
13723   match(Set dst (AndL (LoadL src1) src2));
13724   effect(KILL cr);
13725   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13726 
13727   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13728   ins_encode %{
13729     __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13730   %}
13731   ins_pipe(ialu_reg);
13732 %}
13733 
13734 // And Register with Memory
13735 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13736 %{
13737   predicate(!UseAPX);
13738   match(Set dst (AndL dst (LoadL src)));
13739   effect(KILL cr);
13740   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13741 
13742   ins_cost(150);
13743   format %{ "andq    $dst, $src\t# long" %}
13744   ins_encode %{
13745     __ andq($dst$$Register, $src$$Address);
13746   %}
13747   ins_pipe(ialu_reg_mem);
13748 %}
13749 
13750 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13751 %{
13752   predicate(UseAPX);
13753   match(Set dst (AndL src1 (LoadL src2)));
13754   effect(KILL cr);
13755   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13756 
13757   ins_cost(150);
13758   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13759   ins_encode %{
13760     __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13761   %}
13762   ins_pipe(ialu_reg_mem);
13763 %}
13764 
13765 // And Memory with Register
13766 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13767 %{
13768   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13769   effect(KILL cr);
13770   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13771 
13772   ins_cost(150);
13773   format %{ "andq    $dst, $src\t# long" %}
13774   ins_encode %{
13775     __ andq($dst$$Address, $src$$Register);
13776   %}
13777   ins_pipe(ialu_mem_reg);
13778 %}
13779 
13780 // And Memory with Immediate
13781 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13782 %{
13783   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13784   effect(KILL cr);
13785   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13786 
13787   ins_cost(125);
13788   format %{ "andq    $dst, $src\t# long" %}
13789   ins_encode %{
13790     __ andq($dst$$Address, $src$$constant);
13791   %}
13792   ins_pipe(ialu_mem_imm);
13793 %}
13794 
13795 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13796 %{
13797   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13798   // because AND/OR works well enough for 8/32-bit values.
13799   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13800 
13801   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13802   effect(KILL cr);
13803 
13804   ins_cost(125);
13805   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13806   ins_encode %{
13807     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13808   %}
13809   ins_pipe(ialu_mem_imm);
13810 %}
13811 
13812 // BMI1 instructions
13813 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13814   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13815   predicate(UseBMI1Instructions);
13816   effect(KILL cr);
13817   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13818 
13819   ins_cost(125);
13820   format %{ "andnq  $dst, $src1, $src2" %}
13821 
13822   ins_encode %{
13823     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13824   %}
13825   ins_pipe(ialu_reg_mem);
13826 %}
13827 
13828 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13829   match(Set dst (AndL (XorL src1 minus_1) src2));
13830   predicate(UseBMI1Instructions);
13831   effect(KILL cr);
13832   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13833 
13834   format %{ "andnq  $dst, $src1, $src2" %}
13835 
13836   ins_encode %{
13837   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13838   %}
13839   ins_pipe(ialu_reg_mem);
13840 %}
13841 
13842 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13843   match(Set dst (AndL (SubL imm_zero src) src));
13844   predicate(UseBMI1Instructions);
13845   effect(KILL cr);
13846   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13847 
13848   format %{ "blsiq  $dst, $src" %}
13849 
13850   ins_encode %{
13851     __ blsiq($dst$$Register, $src$$Register);
13852   %}
13853   ins_pipe(ialu_reg);
13854 %}
13855 
13856 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13857   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13858   predicate(UseBMI1Instructions);
13859   effect(KILL cr);
13860   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13861 
13862   ins_cost(125);
13863   format %{ "blsiq  $dst, $src" %}
13864 
13865   ins_encode %{
13866     __ blsiq($dst$$Register, $src$$Address);
13867   %}
13868   ins_pipe(ialu_reg_mem);
13869 %}
13870 
13871 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13872 %{
13873   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13874   predicate(UseBMI1Instructions);
13875   effect(KILL cr);
13876   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13877 
13878   ins_cost(125);
13879   format %{ "blsmskq $dst, $src" %}
13880 
13881   ins_encode %{
13882     __ blsmskq($dst$$Register, $src$$Address);
13883   %}
13884   ins_pipe(ialu_reg_mem);
13885 %}
13886 
13887 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13888 %{
13889   match(Set dst (XorL (AddL src minus_1) src));
13890   predicate(UseBMI1Instructions);
13891   effect(KILL cr);
13892   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13893 
13894   format %{ "blsmskq $dst, $src" %}
13895 
13896   ins_encode %{
13897     __ blsmskq($dst$$Register, $src$$Register);
13898   %}
13899 
13900   ins_pipe(ialu_reg);
13901 %}
13902 
13903 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13904 %{
13905   match(Set dst (AndL (AddL src minus_1) src) );
13906   predicate(UseBMI1Instructions);
13907   effect(KILL cr);
13908   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13909 
13910   format %{ "blsrq  $dst, $src" %}
13911 
13912   ins_encode %{
13913     __ blsrq($dst$$Register, $src$$Register);
13914   %}
13915 
13916   ins_pipe(ialu_reg);
13917 %}
13918 
13919 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13920 %{
13921   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13922   predicate(UseBMI1Instructions);
13923   effect(KILL cr);
13924   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13925 
13926   ins_cost(125);
13927   format %{ "blsrq  $dst, $src" %}
13928 
13929   ins_encode %{
13930     __ blsrq($dst$$Register, $src$$Address);
13931   %}
13932 
13933   ins_pipe(ialu_reg);
13934 %}
13935 
13936 // Or Instructions
13937 // Or Register with Register
13938 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13939 %{
13940   predicate(!UseAPX);
13941   match(Set dst (OrL dst src));
13942   effect(KILL cr);
13943   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13944 
13945   format %{ "orq     $dst, $src\t# long" %}
13946   ins_encode %{
13947     __ orq($dst$$Register, $src$$Register);
13948   %}
13949   ins_pipe(ialu_reg_reg);
13950 %}
13951 
13952 // Or Register with Register using New Data Destination (NDD)
13953 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13954 %{
13955   predicate(UseAPX);
13956   match(Set dst (OrL src1 src2));
13957   effect(KILL cr);
13958   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13959 
13960   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13961   ins_encode %{
13962     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13963 
13964   %}
13965   ins_pipe(ialu_reg_reg);
13966 %}
13967 
13968 // Use any_RegP to match R15 (TLS register) without spilling.
13969 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13970   match(Set dst (OrL dst (CastP2X src)));
13971   effect(KILL cr);
13972   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13973 
13974   format %{ "orq     $dst, $src\t# long" %}
13975   ins_encode %{
13976     __ orq($dst$$Register, $src$$Register);
13977   %}
13978   ins_pipe(ialu_reg_reg);
13979 %}
13980 
13981 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13982   match(Set dst (OrL src1 (CastP2X src2)));
13983   effect(KILL cr);
13984   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13985 
13986   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13987   ins_encode %{
13988     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13989   %}
13990   ins_pipe(ialu_reg_reg);
13991 %}
13992 
13993 // Or Register with Immediate
13994 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13995 %{
13996   predicate(!UseAPX);
13997   match(Set dst (OrL dst src));
13998   effect(KILL cr);
13999   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14000 
14001   format %{ "orq     $dst, $src\t# long" %}
14002   ins_encode %{
14003     __ orq($dst$$Register, $src$$constant);
14004   %}
14005   ins_pipe(ialu_reg);
14006 %}
14007 
14008 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14009 %{
14010   predicate(UseAPX);
14011   match(Set dst (OrL src1 src2));
14012   effect(KILL cr);
14013   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14014 
14015   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14016   ins_encode %{
14017     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14018   %}
14019   ins_pipe(ialu_reg);
14020 %}
14021 
14022 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14023 %{
14024   predicate(UseAPX);
14025   match(Set dst (OrL src1 src2));
14026   effect(KILL cr);
14027   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14028 
14029   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
14030   ins_encode %{
14031     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14032   %}
14033   ins_pipe(ialu_reg);
14034 %}
14035 
14036 // Or Memory with Immediate
14037 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14038 %{
14039   predicate(UseAPX);
14040   match(Set dst (OrL (LoadL src1) src2));
14041   effect(KILL cr);
14042   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14043 
14044   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14045   ins_encode %{
14046     __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14047   %}
14048   ins_pipe(ialu_reg);
14049 %}
14050 
14051 // Or Register with Memory
14052 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14053 %{
14054   predicate(!UseAPX);
14055   match(Set dst (OrL dst (LoadL src)));
14056   effect(KILL cr);
14057   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14058 
14059   ins_cost(150);
14060   format %{ "orq     $dst, $src\t# long" %}
14061   ins_encode %{
14062     __ orq($dst$$Register, $src$$Address);
14063   %}
14064   ins_pipe(ialu_reg_mem);
14065 %}
14066 
14067 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14068 %{
14069   predicate(UseAPX);
14070   match(Set dst (OrL src1 (LoadL src2)));
14071   effect(KILL cr);
14072   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14073 
14074   ins_cost(150);
14075   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14076   ins_encode %{
14077     __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14078   %}
14079   ins_pipe(ialu_reg_mem);
14080 %}
14081 
14082 // Or Memory with Register
14083 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14084 %{
14085   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14086   effect(KILL cr);
14087   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14088 
14089   ins_cost(150);
14090   format %{ "orq     $dst, $src\t# long" %}
14091   ins_encode %{
14092     __ orq($dst$$Address, $src$$Register);
14093   %}
14094   ins_pipe(ialu_mem_reg);
14095 %}
14096 
14097 // Or Memory with Immediate
14098 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14099 %{
14100   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14101   effect(KILL cr);
14102   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14103 
14104   ins_cost(125);
14105   format %{ "orq     $dst, $src\t# long" %}
14106   ins_encode %{
14107     __ orq($dst$$Address, $src$$constant);
14108   %}
14109   ins_pipe(ialu_mem_imm);
14110 %}
14111 
14112 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14113 %{
14114   // con should be a pure 64-bit power of 2 immediate
14115   // because AND/OR works well enough for 8/32-bit values.
14116   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14117 
14118   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14119   effect(KILL cr);
14120 
14121   ins_cost(125);
14122   format %{ "btsq    $dst, log2($con)\t# long" %}
14123   ins_encode %{
14124     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14125   %}
14126   ins_pipe(ialu_mem_imm);
14127 %}
14128 
14129 // Xor Instructions
14130 // Xor Register with Register
14131 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14132 %{
14133   predicate(!UseAPX);
14134   match(Set dst (XorL dst src));
14135   effect(KILL cr);
14136   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14137 
14138   format %{ "xorq    $dst, $src\t# long" %}
14139   ins_encode %{
14140     __ xorq($dst$$Register, $src$$Register);
14141   %}
14142   ins_pipe(ialu_reg_reg);
14143 %}
14144 
14145 // Xor Register with Register using New Data Destination (NDD)
14146 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14147 %{
14148   predicate(UseAPX);
14149   match(Set dst (XorL src1 src2));
14150   effect(KILL cr);
14151   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14152 
14153   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14154   ins_encode %{
14155     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14156   %}
14157   ins_pipe(ialu_reg_reg);
14158 %}
14159 
14160 // Xor Register with Immediate -1
14161 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14162 %{
14163   predicate(!UseAPX);
14164   match(Set dst (XorL dst imm));
14165 
14166   format %{ "notq   $dst" %}
14167   ins_encode %{
14168      __ notq($dst$$Register);
14169   %}
14170   ins_pipe(ialu_reg);
14171 %}
14172 
14173 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14174 %{
14175   predicate(UseAPX);
14176   match(Set dst (XorL src imm));
14177   flag(PD::Flag_ndd_demotable_opr1);
14178 
14179   format %{ "enotq   $dst, $src" %}
14180   ins_encode %{
14181     __ enotq($dst$$Register, $src$$Register);
14182   %}
14183   ins_pipe(ialu_reg);
14184 %}
14185 
14186 // Xor Register with Immediate
14187 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14188 %{
14189   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14190   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14191   match(Set dst (XorL dst src));
14192   effect(KILL cr);
14193   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14194 
14195   format %{ "xorq    $dst, $src\t# long" %}
14196   ins_encode %{
14197     __ xorq($dst$$Register, $src$$constant);
14198   %}
14199   ins_pipe(ialu_reg);
14200 %}
14201 
14202 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14203 %{
14204   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14205   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14206   match(Set dst (XorL src1 src2));
14207   effect(KILL cr);
14208   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14209 
14210   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14211   ins_encode %{
14212     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14213   %}
14214   ins_pipe(ialu_reg);
14215 %}
14216 
14217 // Xor Memory with Immediate
14218 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14219 %{
14220   predicate(UseAPX);
14221   match(Set dst (XorL (LoadL src1) src2));
14222   effect(KILL cr);
14223   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14224   ins_cost(150);
14225 
14226   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14227   ins_encode %{
14228     __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14229   %}
14230   ins_pipe(ialu_reg);
14231 %}
14232 
14233 // Xor Register with Memory
14234 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14235 %{
14236   predicate(!UseAPX);
14237   match(Set dst (XorL dst (LoadL src)));
14238   effect(KILL cr);
14239   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14240 
14241   ins_cost(150);
14242   format %{ "xorq    $dst, $src\t# long" %}
14243   ins_encode %{
14244     __ xorq($dst$$Register, $src$$Address);
14245   %}
14246   ins_pipe(ialu_reg_mem);
14247 %}
14248 
14249 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14250 %{
14251   predicate(UseAPX);
14252   match(Set dst (XorL src1 (LoadL src2)));
14253   effect(KILL cr);
14254   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14255 
14256   ins_cost(150);
14257   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14258   ins_encode %{
14259     __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14260   %}
14261   ins_pipe(ialu_reg_mem);
14262 %}
14263 
14264 // Xor Memory with Register
14265 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14266 %{
14267   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14268   effect(KILL cr);
14269   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14270 
14271   ins_cost(150);
14272   format %{ "xorq    $dst, $src\t# long" %}
14273   ins_encode %{
14274     __ xorq($dst$$Address, $src$$Register);
14275   %}
14276   ins_pipe(ialu_mem_reg);
14277 %}
14278 
14279 // Xor Memory with Immediate
14280 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14281 %{
14282   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14283   effect(KILL cr);
14284   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14285 
14286   ins_cost(125);
14287   format %{ "xorq    $dst, $src\t# long" %}
14288   ins_encode %{
14289     __ xorq($dst$$Address, $src$$constant);
14290   %}
14291   ins_pipe(ialu_mem_imm);
14292 %}
14293 
14294 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14295 %{
14296   match(Set dst (CmpLTMask p q));
14297   effect(KILL cr);
14298 
14299   ins_cost(400);
14300   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
14301             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
14302             "negl    $dst" %}
14303   ins_encode %{
14304     __ cmpl($p$$Register, $q$$Register);
14305     __ setcc(Assembler::less, $dst$$Register);
14306     __ negl($dst$$Register);
14307   %}
14308   ins_pipe(pipe_slow);
14309 %}
14310 
14311 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14312 %{
14313   match(Set dst (CmpLTMask dst zero));
14314   effect(KILL cr);
14315 
14316   ins_cost(100);
14317   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
14318   ins_encode %{
14319     __ sarl($dst$$Register, 31);
14320   %}
14321   ins_pipe(ialu_reg);
14322 %}
14323 
14324 /* Better to save a register than avoid a branch */
14325 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14326 %{
14327   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14328   effect(KILL cr);
14329   ins_cost(300);
14330   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
14331             "jge     done\n\t"
14332             "addl    $p,$y\n"
14333             "done:   " %}
14334   ins_encode %{
14335     Register Rp = $p$$Register;
14336     Register Rq = $q$$Register;
14337     Register Ry = $y$$Register;
14338     Label done;
14339     __ subl(Rp, Rq);
14340     __ jccb(Assembler::greaterEqual, done);
14341     __ addl(Rp, Ry);
14342     __ bind(done);
14343   %}
14344   ins_pipe(pipe_cmplt);
14345 %}
14346 
14347 /* Better to save a register than avoid a branch */
14348 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14349 %{
14350   match(Set y (AndI (CmpLTMask p q) y));
14351   effect(KILL cr);
14352 
14353   ins_cost(300);
14354 
14355   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
14356             "jlt     done\n\t"
14357             "xorl    $y, $y\n"
14358             "done:   " %}
14359   ins_encode %{
14360     Register Rp = $p$$Register;
14361     Register Rq = $q$$Register;
14362     Register Ry = $y$$Register;
14363     Label done;
14364     __ cmpl(Rp, Rq);
14365     __ jccb(Assembler::less, done);
14366     __ xorl(Ry, Ry);
14367     __ bind(done);
14368   %}
14369   ins_pipe(pipe_cmplt);
14370 %}
14371 
14372 
14373 //---------- FP Instructions------------------------------------------------
14374 
14375 // Really expensive, avoid
14376 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14377 %{
14378   match(Set cr (CmpF src1 src2));
14379 
14380   ins_cost(500);
14381   format %{ "ucomiss $src1, $src2\n\t"
14382             "jnp,s   exit\n\t"
14383             "pushfq\t# saw NaN, set CF\n\t"
14384             "andq    [rsp], #0xffffff2b\n\t"
14385             "popfq\n"
14386     "exit:" %}
14387   ins_encode %{
14388     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14389     emit_cmpfp_fixup(masm);
14390   %}
14391   ins_pipe(pipe_slow);
14392 %}
14393 
14394 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14395   match(Set cr (CmpF src1 src2));
14396 
14397   ins_cost(100);
14398   format %{ "ucomiss $src1, $src2" %}
14399   ins_encode %{
14400     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14401   %}
14402   ins_pipe(pipe_slow);
14403 %}
14404 
14405 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14406   match(Set cr (CmpF src1 src2));
14407 
14408   ins_cost(100);
14409   format %{ "vucomxss $src1, $src2" %}
14410   ins_encode %{
14411     __ vucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14412   %}
14413   ins_pipe(pipe_slow);
14414 %}
14415 
14416 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14417   match(Set cr (CmpF src1 (LoadF src2)));
14418 
14419   ins_cost(100);
14420   format %{ "ucomiss $src1, $src2" %}
14421   ins_encode %{
14422     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14423   %}
14424   ins_pipe(pipe_slow);
14425 %}
14426 
14427 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14428   match(Set cr (CmpF src1 (LoadF src2)));
14429 
14430   ins_cost(100);
14431   format %{ "vucomxss $src1, $src2" %}
14432   ins_encode %{
14433     __ vucomxss($src1$$XMMRegister, $src2$$Address);
14434   %}
14435   ins_pipe(pipe_slow);
14436 %}
14437 
14438 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14439   match(Set cr (CmpF src con));
14440 
14441   ins_cost(100);
14442   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14443   ins_encode %{
14444     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14445   %}
14446   ins_pipe(pipe_slow);
14447 %}
14448 
14449 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14450   match(Set cr (CmpF src con));
14451 
14452   ins_cost(100);
14453   format %{ "vucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14454   ins_encode %{
14455     __ vucomxss($src$$XMMRegister, $constantaddress($con));
14456   %}
14457   ins_pipe(pipe_slow);
14458 %}
14459 
14460 // Really expensive, avoid
14461 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14462 %{
14463   match(Set cr (CmpD src1 src2));
14464 
14465   ins_cost(500);
14466   format %{ "ucomisd $src1, $src2\n\t"
14467             "jnp,s   exit\n\t"
14468             "pushfq\t# saw NaN, set CF\n\t"
14469             "andq    [rsp], #0xffffff2b\n\t"
14470             "popfq\n"
14471     "exit:" %}
14472   ins_encode %{
14473     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14474     emit_cmpfp_fixup(masm);
14475   %}
14476   ins_pipe(pipe_slow);
14477 %}
14478 
14479 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14480   match(Set cr (CmpD src1 src2));
14481 
14482   ins_cost(100);
14483   format %{ "ucomisd $src1, $src2 test" %}
14484   ins_encode %{
14485     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14486   %}
14487   ins_pipe(pipe_slow);
14488 %}
14489 
14490 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14491   match(Set cr (CmpD src1 src2));
14492 
14493   ins_cost(100);
14494   format %{ "vucomxsd $src1, $src2 test" %}
14495   ins_encode %{
14496     __ vucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14497   %}
14498   ins_pipe(pipe_slow);
14499 %}
14500 
14501 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14502   match(Set cr (CmpD src1 (LoadD src2)));
14503 
14504   ins_cost(100);
14505   format %{ "ucomisd $src1, $src2" %}
14506   ins_encode %{
14507     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14508   %}
14509   ins_pipe(pipe_slow);
14510 %}
14511 
14512 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14513   match(Set cr (CmpD src1 (LoadD src2)));
14514 
14515   ins_cost(100);
14516   format %{ "vucomxsd $src1, $src2" %}
14517   ins_encode %{
14518     __ vucomxsd($src1$$XMMRegister, $src2$$Address);
14519   %}
14520   ins_pipe(pipe_slow);
14521 %}
14522 
14523 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14524   match(Set cr (CmpD src con));
14525   ins_cost(100);
14526   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14527   ins_encode %{
14528     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14529   %}
14530   ins_pipe(pipe_slow);
14531 %}
14532 
14533 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14534   match(Set cr (CmpD src con));
14535 
14536   ins_cost(100);
14537   format %{ "vucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14538   ins_encode %{
14539     __ vucomxsd($src$$XMMRegister, $constantaddress($con));
14540   %}
14541   ins_pipe(pipe_slow);
14542 %}
14543 
14544 // Compare into -1,0,1
14545 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14546 %{
14547   match(Set dst (CmpF3 src1 src2));
14548   effect(KILL cr);
14549 
14550   ins_cost(275);
14551   format %{ "ucomiss $src1, $src2\n\t"
14552             "movl    $dst, #-1\n\t"
14553             "jp,s    done\n\t"
14554             "jb,s    done\n\t"
14555             "setne   $dst\n\t"
14556             "movzbl  $dst, $dst\n"
14557     "done:" %}
14558   ins_encode %{
14559     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14560     emit_cmpfp3(masm, $dst$$Register);
14561   %}
14562   ins_pipe(pipe_slow);
14563 %}
14564 
14565 // Compare into -1,0,1
14566 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14567 %{
14568   match(Set dst (CmpF3 src1 (LoadF src2)));
14569   effect(KILL cr);
14570 
14571   ins_cost(275);
14572   format %{ "ucomiss $src1, $src2\n\t"
14573             "movl    $dst, #-1\n\t"
14574             "jp,s    done\n\t"
14575             "jb,s    done\n\t"
14576             "setne   $dst\n\t"
14577             "movzbl  $dst, $dst\n"
14578     "done:" %}
14579   ins_encode %{
14580     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14581     emit_cmpfp3(masm, $dst$$Register);
14582   %}
14583   ins_pipe(pipe_slow);
14584 %}
14585 
14586 // Compare into -1,0,1
14587 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14588   match(Set dst (CmpF3 src con));
14589   effect(KILL cr);
14590 
14591   ins_cost(275);
14592   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14593             "movl    $dst, #-1\n\t"
14594             "jp,s    done\n\t"
14595             "jb,s    done\n\t"
14596             "setne   $dst\n\t"
14597             "movzbl  $dst, $dst\n"
14598     "done:" %}
14599   ins_encode %{
14600     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14601     emit_cmpfp3(masm, $dst$$Register);
14602   %}
14603   ins_pipe(pipe_slow);
14604 %}
14605 
14606 // Compare into -1,0,1
14607 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14608 %{
14609   match(Set dst (CmpD3 src1 src2));
14610   effect(KILL cr);
14611 
14612   ins_cost(275);
14613   format %{ "ucomisd $src1, $src2\n\t"
14614             "movl    $dst, #-1\n\t"
14615             "jp,s    done\n\t"
14616             "jb,s    done\n\t"
14617             "setne   $dst\n\t"
14618             "movzbl  $dst, $dst\n"
14619     "done:" %}
14620   ins_encode %{
14621     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14622     emit_cmpfp3(masm, $dst$$Register);
14623   %}
14624   ins_pipe(pipe_slow);
14625 %}
14626 
14627 // Compare into -1,0,1
14628 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14629 %{
14630   match(Set dst (CmpD3 src1 (LoadD src2)));
14631   effect(KILL cr);
14632 
14633   ins_cost(275);
14634   format %{ "ucomisd $src1, $src2\n\t"
14635             "movl    $dst, #-1\n\t"
14636             "jp,s    done\n\t"
14637             "jb,s    done\n\t"
14638             "setne   $dst\n\t"
14639             "movzbl  $dst, $dst\n"
14640     "done:" %}
14641   ins_encode %{
14642     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14643     emit_cmpfp3(masm, $dst$$Register);
14644   %}
14645   ins_pipe(pipe_slow);
14646 %}
14647 
14648 // Compare into -1,0,1
14649 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14650   match(Set dst (CmpD3 src con));
14651   effect(KILL cr);
14652 
14653   ins_cost(275);
14654   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14655             "movl    $dst, #-1\n\t"
14656             "jp,s    done\n\t"
14657             "jb,s    done\n\t"
14658             "setne   $dst\n\t"
14659             "movzbl  $dst, $dst\n"
14660     "done:" %}
14661   ins_encode %{
14662     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14663     emit_cmpfp3(masm, $dst$$Register);
14664   %}
14665   ins_pipe(pipe_slow);
14666 %}
14667 
14668 //----------Arithmetic Conversion Instructions---------------------------------
14669 
14670 instruct convF2D_reg_reg(regD dst, regF src)
14671 %{
14672   match(Set dst (ConvF2D src));
14673 
14674   format %{ "cvtss2sd $dst, $src" %}
14675   ins_encode %{
14676     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14677   %}
14678   ins_pipe(pipe_slow); // XXX
14679 %}
14680 
14681 instruct convF2D_reg_mem(regD dst, memory src)
14682 %{
14683   predicate(UseAVX == 0);
14684   match(Set dst (ConvF2D (LoadF src)));
14685 
14686   format %{ "cvtss2sd $dst, $src" %}
14687   ins_encode %{
14688     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14689   %}
14690   ins_pipe(pipe_slow); // XXX
14691 %}
14692 
14693 instruct convD2F_reg_reg(regF dst, regD src)
14694 %{
14695   match(Set dst (ConvD2F src));
14696 
14697   format %{ "cvtsd2ss $dst, $src" %}
14698   ins_encode %{
14699     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14700   %}
14701   ins_pipe(pipe_slow); // XXX
14702 %}
14703 
14704 instruct convD2F_reg_mem(regF dst, memory src)
14705 %{
14706   predicate(UseAVX == 0);
14707   match(Set dst (ConvD2F (LoadD src)));
14708 
14709   format %{ "cvtsd2ss $dst, $src" %}
14710   ins_encode %{
14711     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14712   %}
14713   ins_pipe(pipe_slow); // XXX
14714 %}
14715 
14716 // XXX do mem variants
14717 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14718 %{
14719   predicate(!VM_Version::supports_avx10_2());
14720   match(Set dst (ConvF2I src));
14721   effect(KILL cr);
14722   format %{ "convert_f2i $dst, $src" %}
14723   ins_encode %{
14724     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14725   %}
14726   ins_pipe(pipe_slow);
14727 %}
14728 
14729 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14730 %{
14731   predicate(VM_Version::supports_avx10_2());
14732   match(Set dst (ConvF2I src));
14733   format %{ "evcvttss2sisl $dst, $src" %}
14734   ins_encode %{
14735     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14736   %}
14737   ins_pipe(pipe_slow);
14738 %}
14739 
14740 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14741 %{
14742   predicate(VM_Version::supports_avx10_2());
14743   match(Set dst (ConvF2I (LoadF src)));
14744   format %{ "evcvttss2sisl $dst, $src" %}
14745   ins_encode %{
14746     __ evcvttss2sisl($dst$$Register, $src$$Address);
14747   %}
14748   ins_pipe(pipe_slow);
14749 %}
14750 
14751 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14752 %{
14753   predicate(!VM_Version::supports_avx10_2());
14754   match(Set dst (ConvF2L src));
14755   effect(KILL cr);
14756   format %{ "convert_f2l $dst, $src"%}
14757   ins_encode %{
14758     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14759   %}
14760   ins_pipe(pipe_slow);
14761 %}
14762 
14763 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14764 %{
14765   predicate(VM_Version::supports_avx10_2());
14766   match(Set dst (ConvF2L src));
14767   format %{ "evcvttss2sisq $dst, $src" %}
14768   ins_encode %{
14769     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14770   %}
14771   ins_pipe(pipe_slow);
14772 %}
14773 
14774 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14775 %{
14776   predicate(VM_Version::supports_avx10_2());
14777   match(Set dst (ConvF2L (LoadF src)));
14778   format %{ "evcvttss2sisq $dst, $src" %}
14779   ins_encode %{
14780     __ evcvttss2sisq($dst$$Register, $src$$Address);
14781   %}
14782   ins_pipe(pipe_slow);
14783 %}
14784 
14785 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14786 %{
14787   predicate(!VM_Version::supports_avx10_2());
14788   match(Set dst (ConvD2I src));
14789   effect(KILL cr);
14790   format %{ "convert_d2i $dst, $src"%}
14791   ins_encode %{
14792     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14793   %}
14794   ins_pipe(pipe_slow);
14795 %}
14796 
14797 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14798 %{
14799   predicate(VM_Version::supports_avx10_2());
14800   match(Set dst (ConvD2I src));
14801   format %{ "evcvttsd2sisl $dst, $src" %}
14802   ins_encode %{
14803     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14804   %}
14805   ins_pipe(pipe_slow);
14806 %}
14807 
14808 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14809 %{
14810   predicate(VM_Version::supports_avx10_2());
14811   match(Set dst (ConvD2I (LoadD src)));
14812   format %{ "evcvttsd2sisl $dst, $src" %}
14813   ins_encode %{
14814     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14815   %}
14816   ins_pipe(pipe_slow);
14817 %}
14818 
14819 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14820 %{
14821   predicate(!VM_Version::supports_avx10_2());
14822   match(Set dst (ConvD2L src));
14823   effect(KILL cr);
14824   format %{ "convert_d2l $dst, $src"%}
14825   ins_encode %{
14826     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14827   %}
14828   ins_pipe(pipe_slow);
14829 %}
14830 
14831 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14832 %{
14833   predicate(VM_Version::supports_avx10_2());
14834   match(Set dst (ConvD2L src));
14835   format %{ "evcvttsd2sisq $dst, $src" %}
14836   ins_encode %{
14837     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14838   %}
14839   ins_pipe(pipe_slow);
14840 %}
14841 
14842 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14843 %{
14844   predicate(VM_Version::supports_avx10_2());
14845   match(Set dst (ConvD2L (LoadD src)));
14846   format %{ "evcvttsd2sisq $dst, $src" %}
14847   ins_encode %{
14848     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14849   %}
14850   ins_pipe(pipe_slow);
14851 %}
14852 
14853 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14854 %{
14855   match(Set dst (RoundD src));
14856   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14857   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14858   ins_encode %{
14859     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14860   %}
14861   ins_pipe(pipe_slow);
14862 %}
14863 
14864 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14865 %{
14866   match(Set dst (RoundF src));
14867   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14868   format %{ "round_float $dst,$src" %}
14869   ins_encode %{
14870     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14871   %}
14872   ins_pipe(pipe_slow);
14873 %}
14874 
14875 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14876 %{
14877   predicate(!UseXmmI2F);
14878   match(Set dst (ConvI2F src));
14879 
14880   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14881   ins_encode %{
14882     if (UseAVX > 0) {
14883       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14884     }
14885     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14886   %}
14887   ins_pipe(pipe_slow); // XXX
14888 %}
14889 
14890 instruct convI2F_reg_mem(regF dst, memory src)
14891 %{
14892   predicate(UseAVX == 0);
14893   match(Set dst (ConvI2F (LoadI src)));
14894 
14895   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14896   ins_encode %{
14897     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14898   %}
14899   ins_pipe(pipe_slow); // XXX
14900 %}
14901 
14902 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14903 %{
14904   predicate(!UseXmmI2D);
14905   match(Set dst (ConvI2D src));
14906 
14907   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14908   ins_encode %{
14909     if (UseAVX > 0) {
14910       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14911     }
14912     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14913   %}
14914   ins_pipe(pipe_slow); // XXX
14915 %}
14916 
14917 instruct convI2D_reg_mem(regD dst, memory src)
14918 %{
14919   predicate(UseAVX == 0);
14920   match(Set dst (ConvI2D (LoadI src)));
14921 
14922   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14923   ins_encode %{
14924     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14925   %}
14926   ins_pipe(pipe_slow); // XXX
14927 %}
14928 
14929 instruct convXI2F_reg(regF dst, rRegI src)
14930 %{
14931   predicate(UseXmmI2F);
14932   match(Set dst (ConvI2F src));
14933 
14934   format %{ "movdl $dst, $src\n\t"
14935             "cvtdq2psl $dst, $dst\t# i2f" %}
14936   ins_encode %{
14937     __ movdl($dst$$XMMRegister, $src$$Register);
14938     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14939   %}
14940   ins_pipe(pipe_slow); // XXX
14941 %}
14942 
14943 instruct convXI2D_reg(regD dst, rRegI src)
14944 %{
14945   predicate(UseXmmI2D);
14946   match(Set dst (ConvI2D src));
14947 
14948   format %{ "movdl $dst, $src\n\t"
14949             "cvtdq2pdl $dst, $dst\t# i2d" %}
14950   ins_encode %{
14951     __ movdl($dst$$XMMRegister, $src$$Register);
14952     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14953   %}
14954   ins_pipe(pipe_slow); // XXX
14955 %}
14956 
14957 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14958 %{
14959   match(Set dst (ConvL2F src));
14960 
14961   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14962   ins_encode %{
14963     if (UseAVX > 0) {
14964       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14965     }
14966     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14967   %}
14968   ins_pipe(pipe_slow); // XXX
14969 %}
14970 
14971 instruct convL2F_reg_mem(regF dst, memory src)
14972 %{
14973   predicate(UseAVX == 0);
14974   match(Set dst (ConvL2F (LoadL src)));
14975 
14976   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14977   ins_encode %{
14978     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14979   %}
14980   ins_pipe(pipe_slow); // XXX
14981 %}
14982 
14983 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14984 %{
14985   match(Set dst (ConvL2D src));
14986 
14987   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14988   ins_encode %{
14989     if (UseAVX > 0) {
14990       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14991     }
14992     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14993   %}
14994   ins_pipe(pipe_slow); // XXX
14995 %}
14996 
14997 instruct convL2D_reg_mem(regD dst, memory src)
14998 %{
14999   predicate(UseAVX == 0);
15000   match(Set dst (ConvL2D (LoadL src)));
15001 
15002   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15003   ins_encode %{
15004     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
15005   %}
15006   ins_pipe(pipe_slow); // XXX
15007 %}
15008 
15009 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15010 %{
15011   match(Set dst (ConvI2L src));
15012 
15013   ins_cost(125);
15014   format %{ "movslq  $dst, $src\t# i2l" %}
15015   ins_encode %{
15016     __ movslq($dst$$Register, $src$$Register);
15017   %}
15018   ins_pipe(ialu_reg_reg);
15019 %}
15020 
15021 // Zero-extend convert int to long
15022 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15023 %{
15024   match(Set dst (AndL (ConvI2L src) mask));
15025 
15026   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15027   ins_encode %{
15028     if ($dst$$reg != $src$$reg) {
15029       __ movl($dst$$Register, $src$$Register);
15030     }
15031   %}
15032   ins_pipe(ialu_reg_reg);
15033 %}
15034 
15035 // Zero-extend convert int to long
15036 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15037 %{
15038   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15039 
15040   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15041   ins_encode %{
15042     __ movl($dst$$Register, $src$$Address);
15043   %}
15044   ins_pipe(ialu_reg_mem);
15045 %}
15046 
15047 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15048 %{
15049   match(Set dst (AndL src mask));
15050 
15051   format %{ "movl    $dst, $src\t# zero-extend long" %}
15052   ins_encode %{
15053     __ movl($dst$$Register, $src$$Register);
15054   %}
15055   ins_pipe(ialu_reg_reg);
15056 %}
15057 
15058 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15059 %{
15060   match(Set dst (ConvL2I src));
15061 
15062   format %{ "movl    $dst, $src\t# l2i" %}
15063   ins_encode %{
15064     __ movl($dst$$Register, $src$$Register);
15065   %}
15066   ins_pipe(ialu_reg_reg);
15067 %}
15068 
15069 
15070 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15071   match(Set dst (MoveF2I src));
15072   effect(DEF dst, USE src);
15073 
15074   ins_cost(125);
15075   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
15076   ins_encode %{
15077     __ movl($dst$$Register, Address(rsp, $src$$disp));
15078   %}
15079   ins_pipe(ialu_reg_mem);
15080 %}
15081 
15082 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15083   match(Set dst (MoveI2F src));
15084   effect(DEF dst, USE src);
15085 
15086   ins_cost(125);
15087   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
15088   ins_encode %{
15089     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15090   %}
15091   ins_pipe(pipe_slow);
15092 %}
15093 
15094 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15095   match(Set dst (MoveD2L src));
15096   effect(DEF dst, USE src);
15097 
15098   ins_cost(125);
15099   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
15100   ins_encode %{
15101     __ movq($dst$$Register, Address(rsp, $src$$disp));
15102   %}
15103   ins_pipe(ialu_reg_mem);
15104 %}
15105 
15106 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15107   predicate(!UseXmmLoadAndClearUpper);
15108   match(Set dst (MoveL2D src));
15109   effect(DEF dst, USE src);
15110 
15111   ins_cost(125);
15112   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
15113   ins_encode %{
15114     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15115   %}
15116   ins_pipe(pipe_slow);
15117 %}
15118 
15119 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15120   predicate(UseXmmLoadAndClearUpper);
15121   match(Set dst (MoveL2D src));
15122   effect(DEF dst, USE src);
15123 
15124   ins_cost(125);
15125   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
15126   ins_encode %{
15127     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15128   %}
15129   ins_pipe(pipe_slow);
15130 %}
15131 
15132 
15133 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15134   match(Set dst (MoveF2I src));
15135   effect(DEF dst, USE src);
15136 
15137   ins_cost(95); // XXX
15138   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
15139   ins_encode %{
15140     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15141   %}
15142   ins_pipe(pipe_slow);
15143 %}
15144 
15145 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15146   match(Set dst (MoveI2F src));
15147   effect(DEF dst, USE src);
15148 
15149   ins_cost(100);
15150   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
15151   ins_encode %{
15152     __ movl(Address(rsp, $dst$$disp), $src$$Register);
15153   %}
15154   ins_pipe( ialu_mem_reg );
15155 %}
15156 
15157 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15158   match(Set dst (MoveD2L src));
15159   effect(DEF dst, USE src);
15160 
15161   ins_cost(95); // XXX
15162   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
15163   ins_encode %{
15164     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15165   %}
15166   ins_pipe(pipe_slow);
15167 %}
15168 
15169 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15170   match(Set dst (MoveL2D src));
15171   effect(DEF dst, USE src);
15172 
15173   ins_cost(100);
15174   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
15175   ins_encode %{
15176     __ movq(Address(rsp, $dst$$disp), $src$$Register);
15177   %}
15178   ins_pipe(ialu_mem_reg);
15179 %}
15180 
15181 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15182   match(Set dst (MoveF2I src));
15183   effect(DEF dst, USE src);
15184   ins_cost(85);
15185   format %{ "movd    $dst,$src\t# MoveF2I" %}
15186   ins_encode %{
15187     __ movdl($dst$$Register, $src$$XMMRegister);
15188   %}
15189   ins_pipe( pipe_slow );
15190 %}
15191 
15192 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15193   match(Set dst (MoveD2L src));
15194   effect(DEF dst, USE src);
15195   ins_cost(85);
15196   format %{ "movd    $dst,$src\t# MoveD2L" %}
15197   ins_encode %{
15198     __ movdq($dst$$Register, $src$$XMMRegister);
15199   %}
15200   ins_pipe( pipe_slow );
15201 %}
15202 
15203 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15204   match(Set dst (MoveI2F src));
15205   effect(DEF dst, USE src);
15206   ins_cost(100);
15207   format %{ "movd    $dst,$src\t# MoveI2F" %}
15208   ins_encode %{
15209     __ movdl($dst$$XMMRegister, $src$$Register);
15210   %}
15211   ins_pipe( pipe_slow );
15212 %}
15213 
15214 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15215   match(Set dst (MoveL2D src));
15216   effect(DEF dst, USE src);
15217   ins_cost(100);
15218   format %{ "movd    $dst,$src\t# MoveL2D" %}
15219   ins_encode %{
15220      __ movdq($dst$$XMMRegister, $src$$Register);
15221   %}
15222   ins_pipe( pipe_slow );
15223 %}
15224 
15225 // Fast clearing of an array
15226 // Small non-constant lenght ClearArray for non-AVX512 targets.
15227 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15228                   Universe dummy, rFlagsReg cr)
15229 %{
15230   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15231   match(Set dummy (ClearArray cnt base));
15232   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15233 
15234   format %{ $$template
15235     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15236     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15237     $$emit$$"jg      LARGE\n\t"
15238     $$emit$$"dec     rcx\n\t"
15239     $$emit$$"js      DONE\t# Zero length\n\t"
15240     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15241     $$emit$$"dec     rcx\n\t"
15242     $$emit$$"jge     LOOP\n\t"
15243     $$emit$$"jmp     DONE\n\t"
15244     $$emit$$"# LARGE:\n\t"
15245     if (UseFastStosb) {
15246        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15247        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15248     } else if (UseXMMForObjInit) {
15249        $$emit$$"mov     rdi,rax\n\t"
15250        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15251        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15252        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15253        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15254        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15255        $$emit$$"add     0x40,rax\n\t"
15256        $$emit$$"# L_zero_64_bytes:\n\t"
15257        $$emit$$"sub     0x8,rcx\n\t"
15258        $$emit$$"jge     L_loop\n\t"
15259        $$emit$$"add     0x4,rcx\n\t"
15260        $$emit$$"jl      L_tail\n\t"
15261        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15262        $$emit$$"add     0x20,rax\n\t"
15263        $$emit$$"sub     0x4,rcx\n\t"
15264        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15265        $$emit$$"add     0x4,rcx\n\t"
15266        $$emit$$"jle     L_end\n\t"
15267        $$emit$$"dec     rcx\n\t"
15268        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15269        $$emit$$"vmovq   xmm0,(rax)\n\t"
15270        $$emit$$"add     0x8,rax\n\t"
15271        $$emit$$"dec     rcx\n\t"
15272        $$emit$$"jge     L_sloop\n\t"
15273        $$emit$$"# L_end:\n\t"
15274     } else {
15275        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15276     }
15277     $$emit$$"# DONE"
15278   %}
15279   ins_encode %{
15280     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15281                  $tmp$$XMMRegister, false, knoreg);
15282   %}
15283   ins_pipe(pipe_slow);
15284 %}
15285 
15286 // Small non-constant length ClearArray for AVX512 targets.
15287 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15288                        Universe dummy, rFlagsReg cr)
15289 %{
15290   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15291   match(Set dummy (ClearArray cnt base));
15292   ins_cost(125);
15293   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15294 
15295   format %{ $$template
15296     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15297     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15298     $$emit$$"jg      LARGE\n\t"
15299     $$emit$$"dec     rcx\n\t"
15300     $$emit$$"js      DONE\t# Zero length\n\t"
15301     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15302     $$emit$$"dec     rcx\n\t"
15303     $$emit$$"jge     LOOP\n\t"
15304     $$emit$$"jmp     DONE\n\t"
15305     $$emit$$"# LARGE:\n\t"
15306     if (UseFastStosb) {
15307        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15308        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15309     } else if (UseXMMForObjInit) {
15310        $$emit$$"mov     rdi,rax\n\t"
15311        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15312        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15313        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15314        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15315        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15316        $$emit$$"add     0x40,rax\n\t"
15317        $$emit$$"# L_zero_64_bytes:\n\t"
15318        $$emit$$"sub     0x8,rcx\n\t"
15319        $$emit$$"jge     L_loop\n\t"
15320        $$emit$$"add     0x4,rcx\n\t"
15321        $$emit$$"jl      L_tail\n\t"
15322        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15323        $$emit$$"add     0x20,rax\n\t"
15324        $$emit$$"sub     0x4,rcx\n\t"
15325        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15326        $$emit$$"add     0x4,rcx\n\t"
15327        $$emit$$"jle     L_end\n\t"
15328        $$emit$$"dec     rcx\n\t"
15329        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15330        $$emit$$"vmovq   xmm0,(rax)\n\t"
15331        $$emit$$"add     0x8,rax\n\t"
15332        $$emit$$"dec     rcx\n\t"
15333        $$emit$$"jge     L_sloop\n\t"
15334        $$emit$$"# L_end:\n\t"
15335     } else {
15336        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15337     }
15338     $$emit$$"# DONE"
15339   %}
15340   ins_encode %{
15341     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15342                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15343   %}
15344   ins_pipe(pipe_slow);
15345 %}
15346 
15347 // Large non-constant length ClearArray for non-AVX512 targets.
15348 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15349                         Universe dummy, rFlagsReg cr)
15350 %{
15351   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15352   match(Set dummy (ClearArray cnt base));
15353   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15354 
15355   format %{ $$template
15356     if (UseFastStosb) {
15357        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15358        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15359        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15360     } else if (UseXMMForObjInit) {
15361        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15362        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15363        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15364        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15365        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15366        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15367        $$emit$$"add     0x40,rax\n\t"
15368        $$emit$$"# L_zero_64_bytes:\n\t"
15369        $$emit$$"sub     0x8,rcx\n\t"
15370        $$emit$$"jge     L_loop\n\t"
15371        $$emit$$"add     0x4,rcx\n\t"
15372        $$emit$$"jl      L_tail\n\t"
15373        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15374        $$emit$$"add     0x20,rax\n\t"
15375        $$emit$$"sub     0x4,rcx\n\t"
15376        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15377        $$emit$$"add     0x4,rcx\n\t"
15378        $$emit$$"jle     L_end\n\t"
15379        $$emit$$"dec     rcx\n\t"
15380        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15381        $$emit$$"vmovq   xmm0,(rax)\n\t"
15382        $$emit$$"add     0x8,rax\n\t"
15383        $$emit$$"dec     rcx\n\t"
15384        $$emit$$"jge     L_sloop\n\t"
15385        $$emit$$"# L_end:\n\t"
15386     } else {
15387        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15388        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15389     }
15390   %}
15391   ins_encode %{
15392     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15393                  $tmp$$XMMRegister, true, knoreg);
15394   %}
15395   ins_pipe(pipe_slow);
15396 %}
15397 
15398 // Large non-constant length ClearArray for AVX512 targets.
15399 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15400                              Universe dummy, rFlagsReg cr)
15401 %{
15402   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15403   match(Set dummy (ClearArray cnt base));
15404   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15405 
15406   format %{ $$template
15407     if (UseFastStosb) {
15408        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15409        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15410        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15411     } else if (UseXMMForObjInit) {
15412        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15413        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15414        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15415        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15416        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15417        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15418        $$emit$$"add     0x40,rax\n\t"
15419        $$emit$$"# L_zero_64_bytes:\n\t"
15420        $$emit$$"sub     0x8,rcx\n\t"
15421        $$emit$$"jge     L_loop\n\t"
15422        $$emit$$"add     0x4,rcx\n\t"
15423        $$emit$$"jl      L_tail\n\t"
15424        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15425        $$emit$$"add     0x20,rax\n\t"
15426        $$emit$$"sub     0x4,rcx\n\t"
15427        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15428        $$emit$$"add     0x4,rcx\n\t"
15429        $$emit$$"jle     L_end\n\t"
15430        $$emit$$"dec     rcx\n\t"
15431        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15432        $$emit$$"vmovq   xmm0,(rax)\n\t"
15433        $$emit$$"add     0x8,rax\n\t"
15434        $$emit$$"dec     rcx\n\t"
15435        $$emit$$"jge     L_sloop\n\t"
15436        $$emit$$"# L_end:\n\t"
15437     } else {
15438        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15439        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15440     }
15441   %}
15442   ins_encode %{
15443     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15444                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15445   %}
15446   ins_pipe(pipe_slow);
15447 %}
15448 
15449 // Small constant length ClearArray for AVX512 targets.
15450 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15451 %{
15452   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15453   match(Set dummy (ClearArray cnt base));
15454   ins_cost(100);
15455   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15456   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15457   ins_encode %{
15458    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15459   %}
15460   ins_pipe(pipe_slow);
15461 %}
15462 
15463 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15464                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15465 %{
15466   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15467   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15468   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15469 
15470   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15471   ins_encode %{
15472     __ string_compare($str1$$Register, $str2$$Register,
15473                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15474                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15475   %}
15476   ins_pipe( pipe_slow );
15477 %}
15478 
15479 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15480                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15481 %{
15482   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15483   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15484   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15485 
15486   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15487   ins_encode %{
15488     __ string_compare($str1$$Register, $str2$$Register,
15489                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15490                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15491   %}
15492   ins_pipe( pipe_slow );
15493 %}
15494 
15495 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15496                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15497 %{
15498   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15499   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15500   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15501 
15502   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15503   ins_encode %{
15504     __ string_compare($str1$$Register, $str2$$Register,
15505                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15506                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15507   %}
15508   ins_pipe( pipe_slow );
15509 %}
15510 
15511 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15512                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15513 %{
15514   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15515   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15516   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15517 
15518   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15519   ins_encode %{
15520     __ string_compare($str1$$Register, $str2$$Register,
15521                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15522                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15523   %}
15524   ins_pipe( pipe_slow );
15525 %}
15526 
15527 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15528                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15529 %{
15530   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15531   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15532   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15533 
15534   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15535   ins_encode %{
15536     __ string_compare($str1$$Register, $str2$$Register,
15537                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15538                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15539   %}
15540   ins_pipe( pipe_slow );
15541 %}
15542 
15543 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15544                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15545 %{
15546   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15547   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15548   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15549 
15550   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15551   ins_encode %{
15552     __ string_compare($str1$$Register, $str2$$Register,
15553                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15554                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15555   %}
15556   ins_pipe( pipe_slow );
15557 %}
15558 
15559 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15560                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15561 %{
15562   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15563   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15564   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15565 
15566   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15567   ins_encode %{
15568     __ string_compare($str2$$Register, $str1$$Register,
15569                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15570                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15571   %}
15572   ins_pipe( pipe_slow );
15573 %}
15574 
15575 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15576                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15577 %{
15578   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15579   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15580   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15581 
15582   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15583   ins_encode %{
15584     __ string_compare($str2$$Register, $str1$$Register,
15585                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15586                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15587   %}
15588   ins_pipe( pipe_slow );
15589 %}
15590 
15591 // fast search of substring with known size.
15592 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15593                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15594 %{
15595   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15596   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15597   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15598 
15599   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15600   ins_encode %{
15601     int icnt2 = (int)$int_cnt2$$constant;
15602     if (icnt2 >= 16) {
15603       // IndexOf for constant substrings with size >= 16 elements
15604       // which don't need to be loaded through stack.
15605       __ string_indexofC8($str1$$Register, $str2$$Register,
15606                           $cnt1$$Register, $cnt2$$Register,
15607                           icnt2, $result$$Register,
15608                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15609     } else {
15610       // Small strings are loaded through stack if they cross page boundary.
15611       __ string_indexof($str1$$Register, $str2$$Register,
15612                         $cnt1$$Register, $cnt2$$Register,
15613                         icnt2, $result$$Register,
15614                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15615     }
15616   %}
15617   ins_pipe( pipe_slow );
15618 %}
15619 
15620 // fast search of substring with known size.
15621 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15622                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15623 %{
15624   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15625   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15626   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15627 
15628   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15629   ins_encode %{
15630     int icnt2 = (int)$int_cnt2$$constant;
15631     if (icnt2 >= 8) {
15632       // IndexOf for constant substrings with size >= 8 elements
15633       // which don't need to be loaded through stack.
15634       __ string_indexofC8($str1$$Register, $str2$$Register,
15635                           $cnt1$$Register, $cnt2$$Register,
15636                           icnt2, $result$$Register,
15637                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15638     } else {
15639       // Small strings are loaded through stack if they cross page boundary.
15640       __ string_indexof($str1$$Register, $str2$$Register,
15641                         $cnt1$$Register, $cnt2$$Register,
15642                         icnt2, $result$$Register,
15643                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15644     }
15645   %}
15646   ins_pipe( pipe_slow );
15647 %}
15648 
15649 // fast search of substring with known size.
15650 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15651                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15652 %{
15653   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15654   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15655   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15656 
15657   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15658   ins_encode %{
15659     int icnt2 = (int)$int_cnt2$$constant;
15660     if (icnt2 >= 8) {
15661       // IndexOf for constant substrings with size >= 8 elements
15662       // which don't need to be loaded through stack.
15663       __ string_indexofC8($str1$$Register, $str2$$Register,
15664                           $cnt1$$Register, $cnt2$$Register,
15665                           icnt2, $result$$Register,
15666                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15667     } else {
15668       // Small strings are loaded through stack if they cross page boundary.
15669       __ string_indexof($str1$$Register, $str2$$Register,
15670                         $cnt1$$Register, $cnt2$$Register,
15671                         icnt2, $result$$Register,
15672                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15673     }
15674   %}
15675   ins_pipe( pipe_slow );
15676 %}
15677 
15678 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15679                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15680 %{
15681   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15682   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15683   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15684 
15685   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15686   ins_encode %{
15687     __ string_indexof($str1$$Register, $str2$$Register,
15688                       $cnt1$$Register, $cnt2$$Register,
15689                       (-1), $result$$Register,
15690                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15691   %}
15692   ins_pipe( pipe_slow );
15693 %}
15694 
15695 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15696                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15697 %{
15698   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15699   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15700   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15701 
15702   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15703   ins_encode %{
15704     __ string_indexof($str1$$Register, $str2$$Register,
15705                       $cnt1$$Register, $cnt2$$Register,
15706                       (-1), $result$$Register,
15707                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15708   %}
15709   ins_pipe( pipe_slow );
15710 %}
15711 
15712 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15713                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15714 %{
15715   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15716   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15717   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15718 
15719   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15720   ins_encode %{
15721     __ string_indexof($str1$$Register, $str2$$Register,
15722                       $cnt1$$Register, $cnt2$$Register,
15723                       (-1), $result$$Register,
15724                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15725   %}
15726   ins_pipe( pipe_slow );
15727 %}
15728 
15729 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15730                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15731 %{
15732   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15733   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15734   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15735   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15736   ins_encode %{
15737     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15738                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15739   %}
15740   ins_pipe( pipe_slow );
15741 %}
15742 
15743 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15744                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15745 %{
15746   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15747   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15748   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15749   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15750   ins_encode %{
15751     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15752                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15753   %}
15754   ins_pipe( pipe_slow );
15755 %}
15756 
15757 // fast string equals
15758 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15759                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15760 %{
15761   predicate(!VM_Version::supports_avx512vlbw());
15762   match(Set result (StrEquals (Binary str1 str2) cnt));
15763   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15764 
15765   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15766   ins_encode %{
15767     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15768                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15769                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15770   %}
15771   ins_pipe( pipe_slow );
15772 %}
15773 
15774 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15775                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15776 %{
15777   predicate(VM_Version::supports_avx512vlbw());
15778   match(Set result (StrEquals (Binary str1 str2) cnt));
15779   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15780 
15781   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15782   ins_encode %{
15783     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15784                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15785                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15786   %}
15787   ins_pipe( pipe_slow );
15788 %}
15789 
15790 // fast array equals
15791 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15792                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15793 %{
15794   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15795   match(Set result (AryEq ary1 ary2));
15796   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15797 
15798   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15799   ins_encode %{
15800     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15801                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15802                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15803   %}
15804   ins_pipe( pipe_slow );
15805 %}
15806 
15807 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15808                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15809 %{
15810   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15811   match(Set result (AryEq ary1 ary2));
15812   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15813 
15814   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15815   ins_encode %{
15816     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15817                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15818                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15819   %}
15820   ins_pipe( pipe_slow );
15821 %}
15822 
15823 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15824                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15825 %{
15826   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15827   match(Set result (AryEq ary1 ary2));
15828   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15829 
15830   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15831   ins_encode %{
15832     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15833                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15834                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15835   %}
15836   ins_pipe( pipe_slow );
15837 %}
15838 
15839 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15840                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15841 %{
15842   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15843   match(Set result (AryEq ary1 ary2));
15844   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15845 
15846   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15847   ins_encode %{
15848     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15849                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15850                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15851   %}
15852   ins_pipe( pipe_slow );
15853 %}
15854 
15855 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15856                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15857                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15858                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15859                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15860 %{
15861   predicate(UseAVX >= 2);
15862   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15863   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15864          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15865          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15866          USE basic_type, KILL cr);
15867 
15868   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
15869   ins_encode %{
15870     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15871                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15872                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15873                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15874                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15875                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15876                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15877   %}
15878   ins_pipe( pipe_slow );
15879 %}
15880 
15881 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15882                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15883 %{
15884   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15885   match(Set result (CountPositives ary1 len));
15886   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15887 
15888   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15889   ins_encode %{
15890     __ count_positives($ary1$$Register, $len$$Register,
15891                        $result$$Register, $tmp3$$Register,
15892                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15893   %}
15894   ins_pipe( pipe_slow );
15895 %}
15896 
15897 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15898                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15899 %{
15900   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15901   match(Set result (CountPositives ary1 len));
15902   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15903 
15904   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15905   ins_encode %{
15906     __ count_positives($ary1$$Register, $len$$Register,
15907                        $result$$Register, $tmp3$$Register,
15908                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15909   %}
15910   ins_pipe( pipe_slow );
15911 %}
15912 
15913 // fast char[] to byte[] compression
15914 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15915                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15916   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15917   match(Set result (StrCompressedCopy src (Binary dst len)));
15918   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15919          USE_KILL len, KILL tmp5, KILL cr);
15920 
15921   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15922   ins_encode %{
15923     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15924                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15925                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15926                            knoreg, knoreg);
15927   %}
15928   ins_pipe( pipe_slow );
15929 %}
15930 
15931 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15932                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15933   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15934   match(Set result (StrCompressedCopy src (Binary dst len)));
15935   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15936          USE_KILL len, KILL tmp5, KILL cr);
15937 
15938   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15939   ins_encode %{
15940     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15941                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15942                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15943                            $ktmp1$$KRegister, $ktmp2$$KRegister);
15944   %}
15945   ins_pipe( pipe_slow );
15946 %}
15947 // fast byte[] to char[] inflation
15948 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15949                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15950   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15951   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15952   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15953 
15954   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15955   ins_encode %{
15956     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15957                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15958   %}
15959   ins_pipe( pipe_slow );
15960 %}
15961 
15962 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15963                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15964   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15965   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15966   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15967 
15968   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15969   ins_encode %{
15970     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15971                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15972   %}
15973   ins_pipe( pipe_slow );
15974 %}
15975 
15976 // encode char[] to byte[] in ISO_8859_1
15977 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15978                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15979                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15980   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15981   match(Set result (EncodeISOArray src (Binary dst len)));
15982   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15983 
15984   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15985   ins_encode %{
15986     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15987                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15988                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15989   %}
15990   ins_pipe( pipe_slow );
15991 %}
15992 
15993 // encode char[] to byte[] in ASCII
15994 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15995                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15996                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15997   predicate(((EncodeISOArrayNode*)n)->is_ascii());
15998   match(Set result (EncodeISOArray src (Binary dst len)));
15999   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16000 
16001   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16002   ins_encode %{
16003     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16004                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16005                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16006   %}
16007   ins_pipe( pipe_slow );
16008 %}
16009 
16010 //----------Overflow Math Instructions-----------------------------------------
16011 
16012 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16013 %{
16014   match(Set cr (OverflowAddI op1 op2));
16015   effect(DEF cr, USE_KILL op1, USE op2);
16016 
16017   format %{ "addl    $op1, $op2\t# overflow check int" %}
16018 
16019   ins_encode %{
16020     __ addl($op1$$Register, $op2$$Register);
16021   %}
16022   ins_pipe(ialu_reg_reg);
16023 %}
16024 
16025 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16026 %{
16027   match(Set cr (OverflowAddI op1 op2));
16028   effect(DEF cr, USE_KILL op1, USE op2);
16029 
16030   format %{ "addl    $op1, $op2\t# overflow check int" %}
16031 
16032   ins_encode %{
16033     __ addl($op1$$Register, $op2$$constant);
16034   %}
16035   ins_pipe(ialu_reg_reg);
16036 %}
16037 
16038 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16039 %{
16040   match(Set cr (OverflowAddL op1 op2));
16041   effect(DEF cr, USE_KILL op1, USE op2);
16042 
16043   format %{ "addq    $op1, $op2\t# overflow check long" %}
16044   ins_encode %{
16045     __ addq($op1$$Register, $op2$$Register);
16046   %}
16047   ins_pipe(ialu_reg_reg);
16048 %}
16049 
16050 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16051 %{
16052   match(Set cr (OverflowAddL op1 op2));
16053   effect(DEF cr, USE_KILL op1, USE op2);
16054 
16055   format %{ "addq    $op1, $op2\t# overflow check long" %}
16056   ins_encode %{
16057     __ addq($op1$$Register, $op2$$constant);
16058   %}
16059   ins_pipe(ialu_reg_reg);
16060 %}
16061 
16062 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16063 %{
16064   match(Set cr (OverflowSubI op1 op2));
16065 
16066   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16067   ins_encode %{
16068     __ cmpl($op1$$Register, $op2$$Register);
16069   %}
16070   ins_pipe(ialu_reg_reg);
16071 %}
16072 
16073 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16074 %{
16075   match(Set cr (OverflowSubI op1 op2));
16076 
16077   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16078   ins_encode %{
16079     __ cmpl($op1$$Register, $op2$$constant);
16080   %}
16081   ins_pipe(ialu_reg_reg);
16082 %}
16083 
16084 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16085 %{
16086   match(Set cr (OverflowSubL op1 op2));
16087 
16088   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16089   ins_encode %{
16090     __ cmpq($op1$$Register, $op2$$Register);
16091   %}
16092   ins_pipe(ialu_reg_reg);
16093 %}
16094 
16095 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16096 %{
16097   match(Set cr (OverflowSubL op1 op2));
16098 
16099   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16100   ins_encode %{
16101     __ cmpq($op1$$Register, $op2$$constant);
16102   %}
16103   ins_pipe(ialu_reg_reg);
16104 %}
16105 
16106 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16107 %{
16108   match(Set cr (OverflowSubI zero op2));
16109   effect(DEF cr, USE_KILL op2);
16110 
16111   format %{ "negl    $op2\t# overflow check int" %}
16112   ins_encode %{
16113     __ negl($op2$$Register);
16114   %}
16115   ins_pipe(ialu_reg_reg);
16116 %}
16117 
16118 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16119 %{
16120   match(Set cr (OverflowSubL zero op2));
16121   effect(DEF cr, USE_KILL op2);
16122 
16123   format %{ "negq    $op2\t# overflow check long" %}
16124   ins_encode %{
16125     __ negq($op2$$Register);
16126   %}
16127   ins_pipe(ialu_reg_reg);
16128 %}
16129 
16130 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16131 %{
16132   match(Set cr (OverflowMulI op1 op2));
16133   effect(DEF cr, USE_KILL op1, USE op2);
16134 
16135   format %{ "imull    $op1, $op2\t# overflow check int" %}
16136   ins_encode %{
16137     __ imull($op1$$Register, $op2$$Register);
16138   %}
16139   ins_pipe(ialu_reg_reg_alu0);
16140 %}
16141 
16142 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16143 %{
16144   match(Set cr (OverflowMulI op1 op2));
16145   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16146 
16147   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
16148   ins_encode %{
16149     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16150   %}
16151   ins_pipe(ialu_reg_reg_alu0);
16152 %}
16153 
16154 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16155 %{
16156   match(Set cr (OverflowMulL op1 op2));
16157   effect(DEF cr, USE_KILL op1, USE op2);
16158 
16159   format %{ "imulq    $op1, $op2\t# overflow check long" %}
16160   ins_encode %{
16161     __ imulq($op1$$Register, $op2$$Register);
16162   %}
16163   ins_pipe(ialu_reg_reg_alu0);
16164 %}
16165 
16166 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16167 %{
16168   match(Set cr (OverflowMulL op1 op2));
16169   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16170 
16171   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
16172   ins_encode %{
16173     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16174   %}
16175   ins_pipe(ialu_reg_reg_alu0);
16176 %}
16177 
16178 
16179 //----------Control Flow Instructions------------------------------------------
16180 // Signed compare Instructions
16181 
16182 // XXX more variants!!
16183 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16184 %{
16185   match(Set cr (CmpI op1 op2));
16186   effect(DEF cr, USE op1, USE op2);
16187 
16188   format %{ "cmpl    $op1, $op2" %}
16189   ins_encode %{
16190     __ cmpl($op1$$Register, $op2$$Register);
16191   %}
16192   ins_pipe(ialu_cr_reg_reg);
16193 %}
16194 
16195 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16196 %{
16197   match(Set cr (CmpI op1 op2));
16198 
16199   format %{ "cmpl    $op1, $op2" %}
16200   ins_encode %{
16201     __ cmpl($op1$$Register, $op2$$constant);
16202   %}
16203   ins_pipe(ialu_cr_reg_imm);
16204 %}
16205 
16206 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16207 %{
16208   match(Set cr (CmpI op1 (LoadI op2)));
16209 
16210   ins_cost(500); // XXX
16211   format %{ "cmpl    $op1, $op2" %}
16212   ins_encode %{
16213     __ cmpl($op1$$Register, $op2$$Address);
16214   %}
16215   ins_pipe(ialu_cr_reg_mem);
16216 %}
16217 
16218 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16219 %{
16220   match(Set cr (CmpI src zero));
16221 
16222   format %{ "testl   $src, $src" %}
16223   ins_encode %{
16224     __ testl($src$$Register, $src$$Register);
16225   %}
16226   ins_pipe(ialu_cr_reg_imm);
16227 %}
16228 
16229 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16230 %{
16231   match(Set cr (CmpI (AndI src con) zero));
16232 
16233   format %{ "testl   $src, $con" %}
16234   ins_encode %{
16235     __ testl($src$$Register, $con$$constant);
16236   %}
16237   ins_pipe(ialu_cr_reg_imm);
16238 %}
16239 
16240 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16241 %{
16242   match(Set cr (CmpI (AndI src1 src2) zero));
16243 
16244   format %{ "testl   $src1, $src2" %}
16245   ins_encode %{
16246     __ testl($src1$$Register, $src2$$Register);
16247   %}
16248   ins_pipe(ialu_cr_reg_imm);
16249 %}
16250 
16251 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16252 %{
16253   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16254 
16255   format %{ "testl   $src, $mem" %}
16256   ins_encode %{
16257     __ testl($src$$Register, $mem$$Address);
16258   %}
16259   ins_pipe(ialu_cr_reg_mem);
16260 %}
16261 
16262 // Unsigned compare Instructions; really, same as signed except they
16263 // produce an rFlagsRegU instead of rFlagsReg.
16264 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16265 %{
16266   match(Set cr (CmpU op1 op2));
16267 
16268   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16269   ins_encode %{
16270     __ cmpl($op1$$Register, $op2$$Register);
16271   %}
16272   ins_pipe(ialu_cr_reg_reg);
16273 %}
16274 
16275 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16276 %{
16277   match(Set cr (CmpU op1 op2));
16278 
16279   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16280   ins_encode %{
16281     __ cmpl($op1$$Register, $op2$$constant);
16282   %}
16283   ins_pipe(ialu_cr_reg_imm);
16284 %}
16285 
16286 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16287 %{
16288   match(Set cr (CmpU op1 (LoadI op2)));
16289 
16290   ins_cost(500); // XXX
16291   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16292   ins_encode %{
16293     __ cmpl($op1$$Register, $op2$$Address);
16294   %}
16295   ins_pipe(ialu_cr_reg_mem);
16296 %}
16297 
16298 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16299 %{
16300   match(Set cr (CmpU src zero));
16301 
16302   format %{ "testl   $src, $src\t# unsigned" %}
16303   ins_encode %{
16304     __ testl($src$$Register, $src$$Register);
16305   %}
16306   ins_pipe(ialu_cr_reg_imm);
16307 %}
16308 
16309 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16310 %{
16311   match(Set cr (CmpP op1 op2));
16312 
16313   format %{ "cmpq    $op1, $op2\t# ptr" %}
16314   ins_encode %{
16315     __ cmpq($op1$$Register, $op2$$Register);
16316   %}
16317   ins_pipe(ialu_cr_reg_reg);
16318 %}
16319 
16320 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16321 %{
16322   match(Set cr (CmpP op1 (LoadP op2)));
16323   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16324 
16325   ins_cost(500); // XXX
16326   format %{ "cmpq    $op1, $op2\t# ptr" %}
16327   ins_encode %{
16328     __ cmpq($op1$$Register, $op2$$Address);
16329   %}
16330   ins_pipe(ialu_cr_reg_mem);
16331 %}
16332 
16333 // XXX this is generalized by compP_rReg_mem???
16334 // Compare raw pointer (used in out-of-heap check).
16335 // Only works because non-oop pointers must be raw pointers
16336 // and raw pointers have no anti-dependencies.
16337 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16338 %{
16339   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16340             n->in(2)->as_Load()->barrier_data() == 0);
16341   match(Set cr (CmpP op1 (LoadP op2)));
16342 
16343   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
16344   ins_encode %{
16345     __ cmpq($op1$$Register, $op2$$Address);
16346   %}
16347   ins_pipe(ialu_cr_reg_mem);
16348 %}
16349 
16350 // This will generate a signed flags result. This should be OK since
16351 // any compare to a zero should be eq/neq.
16352 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16353 %{
16354   match(Set cr (CmpP src zero));
16355 
16356   format %{ "testq   $src, $src\t# ptr" %}
16357   ins_encode %{
16358     __ testq($src$$Register, $src$$Register);
16359   %}
16360   ins_pipe(ialu_cr_reg_imm);
16361 %}
16362 
16363 // This will generate a signed flags result. This should be OK since
16364 // any compare to a zero should be eq/neq.
16365 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16366 %{
16367   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16368             n->in(1)->as_Load()->barrier_data() == 0);
16369   match(Set cr (CmpP (LoadP op) zero));
16370 
16371   ins_cost(500); // XXX
16372   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
16373   ins_encode %{
16374     __ testq($op$$Address, 0xFFFFFFFF);
16375   %}
16376   ins_pipe(ialu_cr_reg_imm);
16377 %}
16378 
16379 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16380 %{
16381   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16382             n->in(1)->as_Load()->barrier_data() == 0);
16383   match(Set cr (CmpP (LoadP mem) zero));
16384 
16385   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
16386   ins_encode %{
16387     __ cmpq(r12, $mem$$Address);
16388   %}
16389   ins_pipe(ialu_cr_reg_mem);
16390 %}
16391 
16392 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16393 %{
16394   match(Set cr (CmpN op1 op2));
16395 
16396   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16397   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16398   ins_pipe(ialu_cr_reg_reg);
16399 %}
16400 
16401 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16402 %{
16403   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16404   match(Set cr (CmpN src (LoadN mem)));
16405 
16406   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
16407   ins_encode %{
16408     __ cmpl($src$$Register, $mem$$Address);
16409   %}
16410   ins_pipe(ialu_cr_reg_mem);
16411 %}
16412 
16413 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16414   match(Set cr (CmpN op1 op2));
16415 
16416   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16417   ins_encode %{
16418     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16419   %}
16420   ins_pipe(ialu_cr_reg_imm);
16421 %}
16422 
16423 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16424 %{
16425   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16426   match(Set cr (CmpN src (LoadN mem)));
16427 
16428   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
16429   ins_encode %{
16430     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16431   %}
16432   ins_pipe(ialu_cr_reg_mem);
16433 %}
16434 
16435 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16436   match(Set cr (CmpN op1 op2));
16437 
16438   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
16439   ins_encode %{
16440     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16441   %}
16442   ins_pipe(ialu_cr_reg_imm);
16443 %}
16444 
16445 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16446 %{
16447   predicate(!UseCompactObjectHeaders);
16448   match(Set cr (CmpN src (LoadNKlass mem)));
16449 
16450   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
16451   ins_encode %{
16452     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16453   %}
16454   ins_pipe(ialu_cr_reg_mem);
16455 %}
16456 
16457 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16458   match(Set cr (CmpN src zero));
16459 
16460   format %{ "testl   $src, $src\t# compressed ptr" %}
16461   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16462   ins_pipe(ialu_cr_reg_imm);
16463 %}
16464 
16465 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16466 %{
16467   predicate(CompressedOops::base() != nullptr &&
16468             n->in(1)->as_Load()->barrier_data() == 0);
16469   match(Set cr (CmpN (LoadN mem) zero));
16470 
16471   ins_cost(500); // XXX
16472   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
16473   ins_encode %{
16474     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16475   %}
16476   ins_pipe(ialu_cr_reg_mem);
16477 %}
16478 
16479 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16480 %{
16481   predicate(CompressedOops::base() == nullptr &&
16482             n->in(1)->as_Load()->barrier_data() == 0);
16483   match(Set cr (CmpN (LoadN mem) zero));
16484 
16485   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16486   ins_encode %{
16487     __ cmpl(r12, $mem$$Address);
16488   %}
16489   ins_pipe(ialu_cr_reg_mem);
16490 %}
16491 
16492 // Yanked all unsigned pointer compare operations.
16493 // Pointer compares are done with CmpP which is already unsigned.
16494 
16495 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16496 %{
16497   match(Set cr (CmpL op1 op2));
16498 
16499   format %{ "cmpq    $op1, $op2" %}
16500   ins_encode %{
16501     __ cmpq($op1$$Register, $op2$$Register);
16502   %}
16503   ins_pipe(ialu_cr_reg_reg);
16504 %}
16505 
16506 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16507 %{
16508   match(Set cr (CmpL op1 op2));
16509 
16510   format %{ "cmpq    $op1, $op2" %}
16511   ins_encode %{
16512     __ cmpq($op1$$Register, $op2$$constant);
16513   %}
16514   ins_pipe(ialu_cr_reg_imm);
16515 %}
16516 
16517 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16518 %{
16519   match(Set cr (CmpL op1 (LoadL op2)));
16520 
16521   format %{ "cmpq    $op1, $op2" %}
16522   ins_encode %{
16523     __ cmpq($op1$$Register, $op2$$Address);
16524   %}
16525   ins_pipe(ialu_cr_reg_mem);
16526 %}
16527 
16528 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16529 %{
16530   match(Set cr (CmpL src zero));
16531 
16532   format %{ "testq   $src, $src" %}
16533   ins_encode %{
16534     __ testq($src$$Register, $src$$Register);
16535   %}
16536   ins_pipe(ialu_cr_reg_imm);
16537 %}
16538 
16539 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16540 %{
16541   match(Set cr (CmpL (AndL src con) zero));
16542 
16543   format %{ "testq   $src, $con\t# long" %}
16544   ins_encode %{
16545     __ testq($src$$Register, $con$$constant);
16546   %}
16547   ins_pipe(ialu_cr_reg_imm);
16548 %}
16549 
16550 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16551 %{
16552   match(Set cr (CmpL (AndL src1 src2) zero));
16553 
16554   format %{ "testq   $src1, $src2\t# long" %}
16555   ins_encode %{
16556     __ testq($src1$$Register, $src2$$Register);
16557   %}
16558   ins_pipe(ialu_cr_reg_imm);
16559 %}
16560 
16561 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16562 %{
16563   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16564 
16565   format %{ "testq   $src, $mem" %}
16566   ins_encode %{
16567     __ testq($src$$Register, $mem$$Address);
16568   %}
16569   ins_pipe(ialu_cr_reg_mem);
16570 %}
16571 
16572 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16573 %{
16574   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16575 
16576   format %{ "testq   $src, $mem" %}
16577   ins_encode %{
16578     __ testq($src$$Register, $mem$$Address);
16579   %}
16580   ins_pipe(ialu_cr_reg_mem);
16581 %}
16582 
16583 // Manifest a CmpU result in an integer register.  Very painful.
16584 // This is the test to avoid.
16585 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16586 %{
16587   match(Set dst (CmpU3 src1 src2));
16588   effect(KILL flags);
16589 
16590   ins_cost(275); // XXX
16591   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16592             "movl    $dst, -1\n\t"
16593             "jb,u    done\n\t"
16594             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16595     "done:" %}
16596   ins_encode %{
16597     Label done;
16598     __ cmpl($src1$$Register, $src2$$Register);
16599     __ movl($dst$$Register, -1);
16600     __ jccb(Assembler::below, done);
16601     __ setcc(Assembler::notZero, $dst$$Register);
16602     __ bind(done);
16603   %}
16604   ins_pipe(pipe_slow);
16605 %}
16606 
16607 // Manifest a CmpL result in an integer register.  Very painful.
16608 // This is the test to avoid.
16609 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16610 %{
16611   match(Set dst (CmpL3 src1 src2));
16612   effect(KILL flags);
16613 
16614   ins_cost(275); // XXX
16615   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16616             "movl    $dst, -1\n\t"
16617             "jl,s    done\n\t"
16618             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16619     "done:" %}
16620   ins_encode %{
16621     Label done;
16622     __ cmpq($src1$$Register, $src2$$Register);
16623     __ movl($dst$$Register, -1);
16624     __ jccb(Assembler::less, done);
16625     __ setcc(Assembler::notZero, $dst$$Register);
16626     __ bind(done);
16627   %}
16628   ins_pipe(pipe_slow);
16629 %}
16630 
16631 // Manifest a CmpUL result in an integer register.  Very painful.
16632 // This is the test to avoid.
16633 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16634 %{
16635   match(Set dst (CmpUL3 src1 src2));
16636   effect(KILL flags);
16637 
16638   ins_cost(275); // XXX
16639   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16640             "movl    $dst, -1\n\t"
16641             "jb,u    done\n\t"
16642             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16643     "done:" %}
16644   ins_encode %{
16645     Label done;
16646     __ cmpq($src1$$Register, $src2$$Register);
16647     __ movl($dst$$Register, -1);
16648     __ jccb(Assembler::below, done);
16649     __ setcc(Assembler::notZero, $dst$$Register);
16650     __ bind(done);
16651   %}
16652   ins_pipe(pipe_slow);
16653 %}
16654 
16655 // Unsigned long compare Instructions; really, same as signed long except they
16656 // produce an rFlagsRegU instead of rFlagsReg.
16657 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16658 %{
16659   match(Set cr (CmpUL op1 op2));
16660 
16661   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16662   ins_encode %{
16663     __ cmpq($op1$$Register, $op2$$Register);
16664   %}
16665   ins_pipe(ialu_cr_reg_reg);
16666 %}
16667 
16668 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16669 %{
16670   match(Set cr (CmpUL op1 op2));
16671 
16672   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16673   ins_encode %{
16674     __ cmpq($op1$$Register, $op2$$constant);
16675   %}
16676   ins_pipe(ialu_cr_reg_imm);
16677 %}
16678 
16679 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16680 %{
16681   match(Set cr (CmpUL op1 (LoadL op2)));
16682 
16683   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16684   ins_encode %{
16685     __ cmpq($op1$$Register, $op2$$Address);
16686   %}
16687   ins_pipe(ialu_cr_reg_mem);
16688 %}
16689 
16690 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16691 %{
16692   match(Set cr (CmpUL src zero));
16693 
16694   format %{ "testq   $src, $src\t# unsigned" %}
16695   ins_encode %{
16696     __ testq($src$$Register, $src$$Register);
16697   %}
16698   ins_pipe(ialu_cr_reg_imm);
16699 %}
16700 
16701 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16702 %{
16703   match(Set cr (CmpI (LoadB mem) imm));
16704 
16705   ins_cost(125);
16706   format %{ "cmpb    $mem, $imm" %}
16707   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16708   ins_pipe(ialu_cr_reg_mem);
16709 %}
16710 
16711 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16712 %{
16713   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16714 
16715   ins_cost(125);
16716   format %{ "testb   $mem, $imm\t# ubyte" %}
16717   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16718   ins_pipe(ialu_cr_reg_mem);
16719 %}
16720 
16721 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16722 %{
16723   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16724 
16725   ins_cost(125);
16726   format %{ "testb   $mem, $imm\t# byte" %}
16727   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16728   ins_pipe(ialu_cr_reg_mem);
16729 %}
16730 
16731 //----------Max and Min--------------------------------------------------------
16732 // Min Instructions
16733 
16734 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16735 %{
16736   predicate(!UseAPX);
16737   effect(USE_DEF dst, USE src, USE cr);
16738 
16739   format %{ "cmovlgt $dst, $src\t# min" %}
16740   ins_encode %{
16741     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16742   %}
16743   ins_pipe(pipe_cmov_reg);
16744 %}
16745 
16746 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16747 %{
16748   predicate(UseAPX);
16749   effect(DEF dst, USE src1, USE src2, USE cr);
16750 
16751   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16752   ins_encode %{
16753     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16754   %}
16755   ins_pipe(pipe_cmov_reg);
16756 %}
16757 
16758 instruct minI_rReg(rRegI dst, rRegI src)
16759 %{
16760   predicate(!UseAPX);
16761   match(Set dst (MinI dst src));
16762 
16763   ins_cost(200);
16764   expand %{
16765     rFlagsReg cr;
16766     compI_rReg(cr, dst, src);
16767     cmovI_reg_g(dst, src, cr);
16768   %}
16769 %}
16770 
16771 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16772 %{
16773   predicate(UseAPX);
16774   match(Set dst (MinI src1 src2));
16775   effect(DEF dst, USE src1, USE src2);
16776   flag(PD::Flag_ndd_demotable_opr1);
16777 
16778   ins_cost(200);
16779   expand %{
16780     rFlagsReg cr;
16781     compI_rReg(cr, src1, src2);
16782     cmovI_reg_g_ndd(dst, src1, src2, cr);
16783   %}
16784 %}
16785 
16786 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16787 %{
16788   predicate(!UseAPX);
16789   effect(USE_DEF dst, USE src, USE cr);
16790 
16791   format %{ "cmovllt $dst, $src\t# max" %}
16792   ins_encode %{
16793     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16794   %}
16795   ins_pipe(pipe_cmov_reg);
16796 %}
16797 
16798 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16799 %{
16800   predicate(UseAPX);
16801   effect(DEF dst, USE src1, USE src2, USE cr);
16802 
16803   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16804   ins_encode %{
16805     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16806   %}
16807   ins_pipe(pipe_cmov_reg);
16808 %}
16809 
16810 instruct maxI_rReg(rRegI dst, rRegI src)
16811 %{
16812   predicate(!UseAPX);
16813   match(Set dst (MaxI dst src));
16814 
16815   ins_cost(200);
16816   expand %{
16817     rFlagsReg cr;
16818     compI_rReg(cr, dst, src);
16819     cmovI_reg_l(dst, src, cr);
16820   %}
16821 %}
16822 
16823 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16824 %{
16825   predicate(UseAPX);
16826   match(Set dst (MaxI src1 src2));
16827   effect(DEF dst, USE src1, USE src2);
16828   flag(PD::Flag_ndd_demotable_opr1);
16829 
16830   ins_cost(200);
16831   expand %{
16832     rFlagsReg cr;
16833     compI_rReg(cr, src1, src2);
16834     cmovI_reg_l_ndd(dst, src1, src2, cr);
16835   %}
16836 %}
16837 
16838 // ============================================================================
16839 // Branch Instructions
16840 
16841 // Jump Direct - Label defines a relative address from JMP+1
16842 instruct jmpDir(label labl)
16843 %{
16844   match(Goto);
16845   effect(USE labl);
16846 
16847   ins_cost(300);
16848   format %{ "jmp     $labl" %}
16849   size(5);
16850   ins_encode %{
16851     Label* L = $labl$$label;
16852     __ jmp(*L, false); // Always long jump
16853   %}
16854   ins_pipe(pipe_jmp);
16855 %}
16856 
16857 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16858 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16859 %{
16860   match(If cop cr);
16861   effect(USE labl);
16862 
16863   ins_cost(300);
16864   format %{ "j$cop     $labl" %}
16865   size(6);
16866   ins_encode %{
16867     Label* L = $labl$$label;
16868     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16869   %}
16870   ins_pipe(pipe_jcc);
16871 %}
16872 
16873 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16874 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16875 %{
16876   match(CountedLoopEnd cop cr);
16877   effect(USE labl);
16878 
16879   ins_cost(300);
16880   format %{ "j$cop     $labl\t# loop end" %}
16881   size(6);
16882   ins_encode %{
16883     Label* L = $labl$$label;
16884     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16885   %}
16886   ins_pipe(pipe_jcc);
16887 %}
16888 
16889 // Jump Direct Conditional - using unsigned comparison
16890 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16891   match(If cop cmp);
16892   effect(USE labl);
16893 
16894   ins_cost(300);
16895   format %{ "j$cop,u   $labl" %}
16896   size(6);
16897   ins_encode %{
16898     Label* L = $labl$$label;
16899     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16900   %}
16901   ins_pipe(pipe_jcc);
16902 %}
16903 
16904 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16905   match(If cop cmp);
16906   effect(USE labl);
16907 
16908   ins_cost(200);
16909   format %{ "j$cop,u   $labl" %}
16910   size(6);
16911   ins_encode %{
16912     Label* L = $labl$$label;
16913     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16914   %}
16915   ins_pipe(pipe_jcc);
16916 %}
16917 
16918 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16919   match(If cop cmp);
16920   effect(USE labl);
16921 
16922   ins_cost(200);
16923   format %{ $$template
16924     if ($cop$$cmpcode == Assembler::notEqual) {
16925       $$emit$$"jp,u    $labl\n\t"
16926       $$emit$$"j$cop,u   $labl"
16927     } else {
16928       $$emit$$"jp,u    done\n\t"
16929       $$emit$$"j$cop,u   $labl\n\t"
16930       $$emit$$"done:"
16931     }
16932   %}
16933   ins_encode %{
16934     Label* l = $labl$$label;
16935     if ($cop$$cmpcode == Assembler::notEqual) {
16936       __ jcc(Assembler::parity, *l, false);
16937       __ jcc(Assembler::notEqual, *l, false);
16938     } else if ($cop$$cmpcode == Assembler::equal) {
16939       Label done;
16940       __ jccb(Assembler::parity, done);
16941       __ jcc(Assembler::equal, *l, false);
16942       __ bind(done);
16943     } else {
16944        ShouldNotReachHere();
16945     }
16946   %}
16947   ins_pipe(pipe_jcc);
16948 %}
16949 
16950 // Jump Direct Conditional - using signed and unsigned comparison
16951 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16952   match(If cop cmp);
16953   effect(USE labl);
16954 
16955   ins_cost(200);
16956   format %{ "j$cop,su   $labl" %}
16957   size(6);
16958   ins_encode %{
16959     Label* L = $labl$$label;
16960     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16961   %}
16962   ins_pipe(pipe_jcc);
16963 %}
16964 
16965 // ============================================================================
16966 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
16967 // superklass array for an instance of the superklass.  Set a hidden
16968 // internal cache on a hit (cache is checked with exposed code in
16969 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
16970 // encoding ALSO sets flags.
16971 
16972 instruct partialSubtypeCheck(rdi_RegP result,
16973                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16974                              rFlagsReg cr)
16975 %{
16976   match(Set result (PartialSubtypeCheck sub super));
16977   predicate(!UseSecondarySupersTable);
16978   effect(KILL rcx, KILL cr);
16979 
16980   ins_cost(1100);  // slightly larger than the next version
16981   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16982             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16983             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16984             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16985             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
16986             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16987             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
16988     "miss:\t" %}
16989 
16990   ins_encode %{
16991     Label miss;
16992     // NB: Callers may assume that, when $result is a valid register,
16993     // check_klass_subtype_slow_path_linear sets it to a nonzero
16994     // value.
16995     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16996                                             $rcx$$Register, $result$$Register,
16997                                             nullptr, &miss,
16998                                             /*set_cond_codes:*/ true);
16999     __ xorptr($result$$Register, $result$$Register);
17000     __ bind(miss);
17001   %}
17002 
17003   ins_pipe(pipe_slow);
17004 %}
17005 
17006 // ============================================================================
17007 // Two versions of hashtable-based partialSubtypeCheck, both used when
17008 // we need to search for a super class in the secondary supers array.
17009 // The first is used when we don't know _a priori_ the class being
17010 // searched for. The second, far more common, is used when we do know:
17011 // this is used for instanceof, checkcast, and any case where C2 can
17012 // determine it by constant propagation.
17013 
17014 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17015                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17016                                        rFlagsReg cr)
17017 %{
17018   match(Set result (PartialSubtypeCheck sub super));
17019   predicate(UseSecondarySupersTable);
17020   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17021 
17022   ins_cost(1000);
17023   format %{ "partialSubtypeCheck $result, $sub, $super" %}
17024 
17025   ins_encode %{
17026     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17027 					 $temp3$$Register, $temp4$$Register, $result$$Register);
17028   %}
17029 
17030   ins_pipe(pipe_slow);
17031 %}
17032 
17033 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17034                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17035                                        rFlagsReg cr)
17036 %{
17037   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17038   predicate(UseSecondarySupersTable);
17039   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17040 
17041   ins_cost(700);  // smaller than the next version
17042   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17043 
17044   ins_encode %{
17045     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17046     if (InlineSecondarySupersTest) {
17047       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17048                                        $temp3$$Register, $temp4$$Register, $result$$Register,
17049                                        super_klass_slot);
17050     } else {
17051       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17052     }
17053   %}
17054 
17055   ins_pipe(pipe_slow);
17056 %}
17057 
17058 // ============================================================================
17059 // Branch Instructions -- short offset versions
17060 //
17061 // These instructions are used to replace jumps of a long offset (the default
17062 // match) with jumps of a shorter offset.  These instructions are all tagged
17063 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17064 // match rules in general matching.  Instead, the ADLC generates a conversion
17065 // method in the MachNode which can be used to do in-place replacement of the
17066 // long variant with the shorter variant.  The compiler will determine if a
17067 // branch can be taken by the is_short_branch_offset() predicate in the machine
17068 // specific code section of the file.
17069 
17070 // Jump Direct - Label defines a relative address from JMP+1
17071 instruct jmpDir_short(label labl) %{
17072   match(Goto);
17073   effect(USE labl);
17074 
17075   ins_cost(300);
17076   format %{ "jmp,s   $labl" %}
17077   size(2);
17078   ins_encode %{
17079     Label* L = $labl$$label;
17080     __ jmpb(*L);
17081   %}
17082   ins_pipe(pipe_jmp);
17083   ins_short_branch(1);
17084 %}
17085 
17086 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17087 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17088   match(If cop cr);
17089   effect(USE labl);
17090 
17091   ins_cost(300);
17092   format %{ "j$cop,s   $labl" %}
17093   size(2);
17094   ins_encode %{
17095     Label* L = $labl$$label;
17096     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17097   %}
17098   ins_pipe(pipe_jcc);
17099   ins_short_branch(1);
17100 %}
17101 
17102 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17103 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17104   match(CountedLoopEnd cop cr);
17105   effect(USE labl);
17106 
17107   ins_cost(300);
17108   format %{ "j$cop,s   $labl\t# loop end" %}
17109   size(2);
17110   ins_encode %{
17111     Label* L = $labl$$label;
17112     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17113   %}
17114   ins_pipe(pipe_jcc);
17115   ins_short_branch(1);
17116 %}
17117 
17118 // Jump Direct Conditional - using unsigned comparison
17119 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17120   match(If cop cmp);
17121   effect(USE labl);
17122 
17123   ins_cost(300);
17124   format %{ "j$cop,us  $labl" %}
17125   size(2);
17126   ins_encode %{
17127     Label* L = $labl$$label;
17128     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17129   %}
17130   ins_pipe(pipe_jcc);
17131   ins_short_branch(1);
17132 %}
17133 
17134 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17135   match(If cop cmp);
17136   effect(USE labl);
17137 
17138   ins_cost(300);
17139   format %{ "j$cop,us  $labl" %}
17140   size(2);
17141   ins_encode %{
17142     Label* L = $labl$$label;
17143     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17144   %}
17145   ins_pipe(pipe_jcc);
17146   ins_short_branch(1);
17147 %}
17148 
17149 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17150   match(If cop cmp);
17151   effect(USE labl);
17152 
17153   ins_cost(300);
17154   format %{ $$template
17155     if ($cop$$cmpcode == Assembler::notEqual) {
17156       $$emit$$"jp,u,s  $labl\n\t"
17157       $$emit$$"j$cop,u,s  $labl"
17158     } else {
17159       $$emit$$"jp,u,s  done\n\t"
17160       $$emit$$"j$cop,u,s  $labl\n\t"
17161       $$emit$$"done:"
17162     }
17163   %}
17164   size(4);
17165   ins_encode %{
17166     Label* l = $labl$$label;
17167     if ($cop$$cmpcode == Assembler::notEqual) {
17168       __ jccb(Assembler::parity, *l);
17169       __ jccb(Assembler::notEqual, *l);
17170     } else if ($cop$$cmpcode == Assembler::equal) {
17171       Label done;
17172       __ jccb(Assembler::parity, done);
17173       __ jccb(Assembler::equal, *l);
17174       __ bind(done);
17175     } else {
17176        ShouldNotReachHere();
17177     }
17178   %}
17179   ins_pipe(pipe_jcc);
17180   ins_short_branch(1);
17181 %}
17182 
17183 // Jump Direct Conditional - using signed and unsigned comparison
17184 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17185   match(If cop cmp);
17186   effect(USE labl);
17187 
17188   ins_cost(300);
17189   format %{ "j$cop,sus  $labl" %}
17190   size(2);
17191   ins_encode %{
17192     Label* L = $labl$$label;
17193     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17194   %}
17195   ins_pipe(pipe_jcc);
17196   ins_short_branch(1);
17197 %}
17198 
17199 // ============================================================================
17200 // inlined locking and unlocking
17201 
17202 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17203   match(Set cr (FastLock object box));
17204   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17205   ins_cost(300);
17206   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17207   ins_encode %{
17208     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17209   %}
17210   ins_pipe(pipe_slow);
17211 %}
17212 
17213 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17214   match(Set cr (FastUnlock object rax_reg));
17215   effect(TEMP tmp, USE_KILL rax_reg);
17216   ins_cost(300);
17217   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17218   ins_encode %{
17219     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17220   %}
17221   ins_pipe(pipe_slow);
17222 %}
17223 
17224 
17225 // ============================================================================
17226 // Safepoint Instructions
17227 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17228 %{
17229   match(SafePoint poll);
17230   effect(KILL cr, USE poll);
17231 
17232   format %{ "testl   rax, [$poll]\t"
17233             "# Safepoint: poll for GC" %}
17234   ins_cost(125);
17235   ins_encode %{
17236     __ relocate(relocInfo::poll_type);
17237     address pre_pc = __ pc();
17238     __ testl(rax, Address($poll$$Register, 0));
17239     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17240   %}
17241   ins_pipe(ialu_reg_mem);
17242 %}
17243 
17244 instruct mask_all_evexL(kReg dst, rRegL src) %{
17245   match(Set dst (MaskAll src));
17246   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17247   ins_encode %{
17248     int mask_len = Matcher::vector_length(this);
17249     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17250   %}
17251   ins_pipe( pipe_slow );
17252 %}
17253 
17254 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17255   predicate(Matcher::vector_length(n) > 32);
17256   match(Set dst (MaskAll src));
17257   effect(TEMP tmp);
17258   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17259   ins_encode %{
17260     int mask_len = Matcher::vector_length(this);
17261     __ movslq($tmp$$Register, $src$$Register);
17262     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17263   %}
17264   ins_pipe( pipe_slow );
17265 %}
17266 
17267 // ============================================================================
17268 // Procedure Call/Return Instructions
17269 // Call Java Static Instruction
17270 // Note: If this code changes, the corresponding ret_addr_offset() and
17271 //       compute_padding() functions will have to be adjusted.
17272 instruct CallStaticJavaDirect(method meth) %{
17273   match(CallStaticJava);
17274   effect(USE meth);
17275 
17276   ins_cost(300);
17277   format %{ "call,static " %}
17278   opcode(0xE8); /* E8 cd */
17279   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17280   ins_pipe(pipe_slow);
17281   ins_alignment(4);
17282 %}
17283 
17284 // Call Java Dynamic Instruction
17285 // Note: If this code changes, the corresponding ret_addr_offset() and
17286 //       compute_padding() functions will have to be adjusted.
17287 instruct CallDynamicJavaDirect(method meth)
17288 %{
17289   match(CallDynamicJava);
17290   effect(USE meth);
17291 
17292   ins_cost(300);
17293   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
17294             "call,dynamic " %}
17295   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17296   ins_pipe(pipe_slow);
17297   ins_alignment(4);
17298 %}
17299 
17300 // Call Runtime Instruction
17301 instruct CallRuntimeDirect(method meth)
17302 %{
17303   match(CallRuntime);
17304   effect(USE meth);
17305 
17306   ins_cost(300);
17307   format %{ "call,runtime " %}
17308   ins_encode(clear_avx, Java_To_Runtime(meth));
17309   ins_pipe(pipe_slow);
17310 %}
17311 
17312 // Call runtime without safepoint
17313 instruct CallLeafDirect(method meth)
17314 %{
17315   match(CallLeaf);
17316   effect(USE meth);
17317 
17318   ins_cost(300);
17319   format %{ "call_leaf,runtime " %}
17320   ins_encode(clear_avx, Java_To_Runtime(meth));
17321   ins_pipe(pipe_slow);
17322 %}
17323 
17324 // Call runtime without safepoint and with vector arguments
17325 instruct CallLeafDirectVector(method meth)
17326 %{
17327   match(CallLeafVector);
17328   effect(USE meth);
17329 
17330   ins_cost(300);
17331   format %{ "call_leaf,vector " %}
17332   ins_encode(Java_To_Runtime(meth));
17333   ins_pipe(pipe_slow);
17334 %}
17335 
17336 // Call runtime without safepoint
17337 instruct CallLeafNoFPDirect(method meth)
17338 %{
17339   match(CallLeafNoFP);
17340   effect(USE meth);
17341 
17342   ins_cost(300);
17343   format %{ "call_leaf_nofp,runtime " %}
17344   ins_encode(clear_avx, Java_To_Runtime(meth));
17345   ins_pipe(pipe_slow);
17346 %}
17347 
17348 // Return Instruction
17349 // Remove the return address & jump to it.
17350 // Notice: We always emit a nop after a ret to make sure there is room
17351 // for safepoint patching
17352 instruct Ret()
17353 %{
17354   match(Return);
17355 
17356   format %{ "ret" %}
17357   ins_encode %{
17358     __ ret(0);
17359   %}
17360   ins_pipe(pipe_jmp);
17361 %}
17362 
17363 // Tail Call; Jump from runtime stub to Java code.
17364 // Also known as an 'interprocedural jump'.
17365 // Target of jump will eventually return to caller.
17366 // TailJump below removes the return address.
17367 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17368 // emitted just above the TailCall which has reset rbp to the caller state.
17369 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17370 %{
17371   match(TailCall jump_target method_ptr);
17372 
17373   ins_cost(300);
17374   format %{ "jmp     $jump_target\t# rbx holds method" %}
17375   ins_encode %{
17376     __ jmp($jump_target$$Register);
17377   %}
17378   ins_pipe(pipe_jmp);
17379 %}
17380 
17381 // Tail Jump; remove the return address; jump to target.
17382 // TailCall above leaves the return address around.
17383 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17384 %{
17385   match(TailJump jump_target ex_oop);
17386 
17387   ins_cost(300);
17388   format %{ "popq    rdx\t# pop return address\n\t"
17389             "jmp     $jump_target" %}
17390   ins_encode %{
17391     __ popq(as_Register(RDX_enc));
17392     __ jmp($jump_target$$Register);
17393   %}
17394   ins_pipe(pipe_jmp);
17395 %}
17396 
17397 // Forward exception.
17398 instruct ForwardExceptionjmp()
17399 %{
17400   match(ForwardException);
17401 
17402   format %{ "jmp     forward_exception_stub" %}
17403   ins_encode %{
17404     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17405   %}
17406   ins_pipe(pipe_jmp);
17407 %}
17408 
17409 // Create exception oop: created by stack-crawling runtime code.
17410 // Created exception is now available to this handler, and is setup
17411 // just prior to jumping to this handler.  No code emitted.
17412 instruct CreateException(rax_RegP ex_oop)
17413 %{
17414   match(Set ex_oop (CreateEx));
17415 
17416   size(0);
17417   // use the following format syntax
17418   format %{ "# exception oop is in rax; no code emitted" %}
17419   ins_encode();
17420   ins_pipe(empty);
17421 %}
17422 
17423 // Rethrow exception:
17424 // The exception oop will come in the first argument position.
17425 // Then JUMP (not call) to the rethrow stub code.
17426 instruct RethrowException()
17427 %{
17428   match(Rethrow);
17429 
17430   // use the following format syntax
17431   format %{ "jmp     rethrow_stub" %}
17432   ins_encode %{
17433     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17434   %}
17435   ins_pipe(pipe_jmp);
17436 %}
17437 
17438 // ============================================================================
17439 // This name is KNOWN by the ADLC and cannot be changed.
17440 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17441 // for this guy.
17442 instruct tlsLoadP(r15_RegP dst) %{
17443   match(Set dst (ThreadLocal));
17444   effect(DEF dst);
17445 
17446   size(0);
17447   format %{ "# TLS is in R15" %}
17448   ins_encode( /*empty encoding*/ );
17449   ins_pipe(ialu_reg_reg);
17450 %}
17451 
17452 instruct addF_reg(regF dst, regF src) %{
17453   predicate(UseAVX == 0);
17454   match(Set dst (AddF dst src));
17455 
17456   format %{ "addss   $dst, $src" %}
17457   ins_cost(150);
17458   ins_encode %{
17459     __ addss($dst$$XMMRegister, $src$$XMMRegister);
17460   %}
17461   ins_pipe(pipe_slow);
17462 %}
17463 
17464 instruct addF_mem(regF dst, memory src) %{
17465   predicate(UseAVX == 0);
17466   match(Set dst (AddF dst (LoadF src)));
17467 
17468   format %{ "addss   $dst, $src" %}
17469   ins_cost(150);
17470   ins_encode %{
17471     __ addss($dst$$XMMRegister, $src$$Address);
17472   %}
17473   ins_pipe(pipe_slow);
17474 %}
17475 
17476 instruct addF_imm(regF dst, immF con) %{
17477   predicate(UseAVX == 0);
17478   match(Set dst (AddF dst con));
17479   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17480   ins_cost(150);
17481   ins_encode %{
17482     __ addss($dst$$XMMRegister, $constantaddress($con));
17483   %}
17484   ins_pipe(pipe_slow);
17485 %}
17486 
17487 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17488   predicate(UseAVX > 0);
17489   match(Set dst (AddF src1 src2));
17490 
17491   format %{ "vaddss  $dst, $src1, $src2" %}
17492   ins_cost(150);
17493   ins_encode %{
17494     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17495   %}
17496   ins_pipe(pipe_slow);
17497 %}
17498 
17499 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17500   predicate(UseAVX > 0);
17501   match(Set dst (AddF src1 (LoadF src2)));
17502 
17503   format %{ "vaddss  $dst, $src1, $src2" %}
17504   ins_cost(150);
17505   ins_encode %{
17506     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17507   %}
17508   ins_pipe(pipe_slow);
17509 %}
17510 
17511 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17512   predicate(UseAVX > 0);
17513   match(Set dst (AddF src con));
17514 
17515   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17516   ins_cost(150);
17517   ins_encode %{
17518     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17519   %}
17520   ins_pipe(pipe_slow);
17521 %}
17522 
17523 instruct addD_reg(regD dst, regD src) %{
17524   predicate(UseAVX == 0);
17525   match(Set dst (AddD dst src));
17526 
17527   format %{ "addsd   $dst, $src" %}
17528   ins_cost(150);
17529   ins_encode %{
17530     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17531   %}
17532   ins_pipe(pipe_slow);
17533 %}
17534 
17535 instruct addD_mem(regD dst, memory src) %{
17536   predicate(UseAVX == 0);
17537   match(Set dst (AddD dst (LoadD src)));
17538 
17539   format %{ "addsd   $dst, $src" %}
17540   ins_cost(150);
17541   ins_encode %{
17542     __ addsd($dst$$XMMRegister, $src$$Address);
17543   %}
17544   ins_pipe(pipe_slow);
17545 %}
17546 
17547 instruct addD_imm(regD dst, immD con) %{
17548   predicate(UseAVX == 0);
17549   match(Set dst (AddD dst con));
17550   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17551   ins_cost(150);
17552   ins_encode %{
17553     __ addsd($dst$$XMMRegister, $constantaddress($con));
17554   %}
17555   ins_pipe(pipe_slow);
17556 %}
17557 
17558 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17559   predicate(UseAVX > 0);
17560   match(Set dst (AddD src1 src2));
17561 
17562   format %{ "vaddsd  $dst, $src1, $src2" %}
17563   ins_cost(150);
17564   ins_encode %{
17565     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17566   %}
17567   ins_pipe(pipe_slow);
17568 %}
17569 
17570 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17571   predicate(UseAVX > 0);
17572   match(Set dst (AddD src1 (LoadD src2)));
17573 
17574   format %{ "vaddsd  $dst, $src1, $src2" %}
17575   ins_cost(150);
17576   ins_encode %{
17577     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17578   %}
17579   ins_pipe(pipe_slow);
17580 %}
17581 
17582 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17583   predicate(UseAVX > 0);
17584   match(Set dst (AddD src con));
17585 
17586   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17587   ins_cost(150);
17588   ins_encode %{
17589     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17590   %}
17591   ins_pipe(pipe_slow);
17592 %}
17593 
17594 instruct subF_reg(regF dst, regF src) %{
17595   predicate(UseAVX == 0);
17596   match(Set dst (SubF dst src));
17597 
17598   format %{ "subss   $dst, $src" %}
17599   ins_cost(150);
17600   ins_encode %{
17601     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17602   %}
17603   ins_pipe(pipe_slow);
17604 %}
17605 
17606 instruct subF_mem(regF dst, memory src) %{
17607   predicate(UseAVX == 0);
17608   match(Set dst (SubF dst (LoadF src)));
17609 
17610   format %{ "subss   $dst, $src" %}
17611   ins_cost(150);
17612   ins_encode %{
17613     __ subss($dst$$XMMRegister, $src$$Address);
17614   %}
17615   ins_pipe(pipe_slow);
17616 %}
17617 
17618 instruct subF_imm(regF dst, immF con) %{
17619   predicate(UseAVX == 0);
17620   match(Set dst (SubF dst con));
17621   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17622   ins_cost(150);
17623   ins_encode %{
17624     __ subss($dst$$XMMRegister, $constantaddress($con));
17625   %}
17626   ins_pipe(pipe_slow);
17627 %}
17628 
17629 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17630   predicate(UseAVX > 0);
17631   match(Set dst (SubF src1 src2));
17632 
17633   format %{ "vsubss  $dst, $src1, $src2" %}
17634   ins_cost(150);
17635   ins_encode %{
17636     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17637   %}
17638   ins_pipe(pipe_slow);
17639 %}
17640 
17641 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17642   predicate(UseAVX > 0);
17643   match(Set dst (SubF src1 (LoadF src2)));
17644 
17645   format %{ "vsubss  $dst, $src1, $src2" %}
17646   ins_cost(150);
17647   ins_encode %{
17648     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17649   %}
17650   ins_pipe(pipe_slow);
17651 %}
17652 
17653 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17654   predicate(UseAVX > 0);
17655   match(Set dst (SubF src con));
17656 
17657   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17658   ins_cost(150);
17659   ins_encode %{
17660     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17661   %}
17662   ins_pipe(pipe_slow);
17663 %}
17664 
17665 instruct subD_reg(regD dst, regD src) %{
17666   predicate(UseAVX == 0);
17667   match(Set dst (SubD dst src));
17668 
17669   format %{ "subsd   $dst, $src" %}
17670   ins_cost(150);
17671   ins_encode %{
17672     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17673   %}
17674   ins_pipe(pipe_slow);
17675 %}
17676 
17677 instruct subD_mem(regD dst, memory src) %{
17678   predicate(UseAVX == 0);
17679   match(Set dst (SubD dst (LoadD src)));
17680 
17681   format %{ "subsd   $dst, $src" %}
17682   ins_cost(150);
17683   ins_encode %{
17684     __ subsd($dst$$XMMRegister, $src$$Address);
17685   %}
17686   ins_pipe(pipe_slow);
17687 %}
17688 
17689 instruct subD_imm(regD dst, immD con) %{
17690   predicate(UseAVX == 0);
17691   match(Set dst (SubD dst con));
17692   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17693   ins_cost(150);
17694   ins_encode %{
17695     __ subsd($dst$$XMMRegister, $constantaddress($con));
17696   %}
17697   ins_pipe(pipe_slow);
17698 %}
17699 
17700 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17701   predicate(UseAVX > 0);
17702   match(Set dst (SubD src1 src2));
17703 
17704   format %{ "vsubsd  $dst, $src1, $src2" %}
17705   ins_cost(150);
17706   ins_encode %{
17707     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17708   %}
17709   ins_pipe(pipe_slow);
17710 %}
17711 
17712 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17713   predicate(UseAVX > 0);
17714   match(Set dst (SubD src1 (LoadD src2)));
17715 
17716   format %{ "vsubsd  $dst, $src1, $src2" %}
17717   ins_cost(150);
17718   ins_encode %{
17719     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17720   %}
17721   ins_pipe(pipe_slow);
17722 %}
17723 
17724 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17725   predicate(UseAVX > 0);
17726   match(Set dst (SubD src con));
17727 
17728   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17729   ins_cost(150);
17730   ins_encode %{
17731     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17732   %}
17733   ins_pipe(pipe_slow);
17734 %}
17735 
17736 instruct mulF_reg(regF dst, regF src) %{
17737   predicate(UseAVX == 0);
17738   match(Set dst (MulF dst src));
17739 
17740   format %{ "mulss   $dst, $src" %}
17741   ins_cost(150);
17742   ins_encode %{
17743     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17744   %}
17745   ins_pipe(pipe_slow);
17746 %}
17747 
17748 instruct mulF_mem(regF dst, memory src) %{
17749   predicate(UseAVX == 0);
17750   match(Set dst (MulF dst (LoadF src)));
17751 
17752   format %{ "mulss   $dst, $src" %}
17753   ins_cost(150);
17754   ins_encode %{
17755     __ mulss($dst$$XMMRegister, $src$$Address);
17756   %}
17757   ins_pipe(pipe_slow);
17758 %}
17759 
17760 instruct mulF_imm(regF dst, immF con) %{
17761   predicate(UseAVX == 0);
17762   match(Set dst (MulF dst con));
17763   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17764   ins_cost(150);
17765   ins_encode %{
17766     __ mulss($dst$$XMMRegister, $constantaddress($con));
17767   %}
17768   ins_pipe(pipe_slow);
17769 %}
17770 
17771 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17772   predicate(UseAVX > 0);
17773   match(Set dst (MulF src1 src2));
17774 
17775   format %{ "vmulss  $dst, $src1, $src2" %}
17776   ins_cost(150);
17777   ins_encode %{
17778     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17779   %}
17780   ins_pipe(pipe_slow);
17781 %}
17782 
17783 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17784   predicate(UseAVX > 0);
17785   match(Set dst (MulF src1 (LoadF src2)));
17786 
17787   format %{ "vmulss  $dst, $src1, $src2" %}
17788   ins_cost(150);
17789   ins_encode %{
17790     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17791   %}
17792   ins_pipe(pipe_slow);
17793 %}
17794 
17795 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17796   predicate(UseAVX > 0);
17797   match(Set dst (MulF src con));
17798 
17799   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17800   ins_cost(150);
17801   ins_encode %{
17802     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17803   %}
17804   ins_pipe(pipe_slow);
17805 %}
17806 
17807 instruct mulD_reg(regD dst, regD src) %{
17808   predicate(UseAVX == 0);
17809   match(Set dst (MulD dst src));
17810 
17811   format %{ "mulsd   $dst, $src" %}
17812   ins_cost(150);
17813   ins_encode %{
17814     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17815   %}
17816   ins_pipe(pipe_slow);
17817 %}
17818 
17819 instruct mulD_mem(regD dst, memory src) %{
17820   predicate(UseAVX == 0);
17821   match(Set dst (MulD dst (LoadD src)));
17822 
17823   format %{ "mulsd   $dst, $src" %}
17824   ins_cost(150);
17825   ins_encode %{
17826     __ mulsd($dst$$XMMRegister, $src$$Address);
17827   %}
17828   ins_pipe(pipe_slow);
17829 %}
17830 
17831 instruct mulD_imm(regD dst, immD con) %{
17832   predicate(UseAVX == 0);
17833   match(Set dst (MulD dst con));
17834   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17835   ins_cost(150);
17836   ins_encode %{
17837     __ mulsd($dst$$XMMRegister, $constantaddress($con));
17838   %}
17839   ins_pipe(pipe_slow);
17840 %}
17841 
17842 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17843   predicate(UseAVX > 0);
17844   match(Set dst (MulD src1 src2));
17845 
17846   format %{ "vmulsd  $dst, $src1, $src2" %}
17847   ins_cost(150);
17848   ins_encode %{
17849     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17850   %}
17851   ins_pipe(pipe_slow);
17852 %}
17853 
17854 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17855   predicate(UseAVX > 0);
17856   match(Set dst (MulD src1 (LoadD src2)));
17857 
17858   format %{ "vmulsd  $dst, $src1, $src2" %}
17859   ins_cost(150);
17860   ins_encode %{
17861     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17862   %}
17863   ins_pipe(pipe_slow);
17864 %}
17865 
17866 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17867   predicate(UseAVX > 0);
17868   match(Set dst (MulD src con));
17869 
17870   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17871   ins_cost(150);
17872   ins_encode %{
17873     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17874   %}
17875   ins_pipe(pipe_slow);
17876 %}
17877 
17878 instruct divF_reg(regF dst, regF src) %{
17879   predicate(UseAVX == 0);
17880   match(Set dst (DivF dst src));
17881 
17882   format %{ "divss   $dst, $src" %}
17883   ins_cost(150);
17884   ins_encode %{
17885     __ divss($dst$$XMMRegister, $src$$XMMRegister);
17886   %}
17887   ins_pipe(pipe_slow);
17888 %}
17889 
17890 instruct divF_mem(regF dst, memory src) %{
17891   predicate(UseAVX == 0);
17892   match(Set dst (DivF dst (LoadF src)));
17893 
17894   format %{ "divss   $dst, $src" %}
17895   ins_cost(150);
17896   ins_encode %{
17897     __ divss($dst$$XMMRegister, $src$$Address);
17898   %}
17899   ins_pipe(pipe_slow);
17900 %}
17901 
17902 instruct divF_imm(regF dst, immF con) %{
17903   predicate(UseAVX == 0);
17904   match(Set dst (DivF dst con));
17905   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17906   ins_cost(150);
17907   ins_encode %{
17908     __ divss($dst$$XMMRegister, $constantaddress($con));
17909   %}
17910   ins_pipe(pipe_slow);
17911 %}
17912 
17913 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17914   predicate(UseAVX > 0);
17915   match(Set dst (DivF src1 src2));
17916 
17917   format %{ "vdivss  $dst, $src1, $src2" %}
17918   ins_cost(150);
17919   ins_encode %{
17920     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17921   %}
17922   ins_pipe(pipe_slow);
17923 %}
17924 
17925 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17926   predicate(UseAVX > 0);
17927   match(Set dst (DivF src1 (LoadF src2)));
17928 
17929   format %{ "vdivss  $dst, $src1, $src2" %}
17930   ins_cost(150);
17931   ins_encode %{
17932     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17933   %}
17934   ins_pipe(pipe_slow);
17935 %}
17936 
17937 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17938   predicate(UseAVX > 0);
17939   match(Set dst (DivF src con));
17940 
17941   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17942   ins_cost(150);
17943   ins_encode %{
17944     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17945   %}
17946   ins_pipe(pipe_slow);
17947 %}
17948 
17949 instruct divD_reg(regD dst, regD src) %{
17950   predicate(UseAVX == 0);
17951   match(Set dst (DivD dst src));
17952 
17953   format %{ "divsd   $dst, $src" %}
17954   ins_cost(150);
17955   ins_encode %{
17956     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17957   %}
17958   ins_pipe(pipe_slow);
17959 %}
17960 
17961 instruct divD_mem(regD dst, memory src) %{
17962   predicate(UseAVX == 0);
17963   match(Set dst (DivD dst (LoadD src)));
17964 
17965   format %{ "divsd   $dst, $src" %}
17966   ins_cost(150);
17967   ins_encode %{
17968     __ divsd($dst$$XMMRegister, $src$$Address);
17969   %}
17970   ins_pipe(pipe_slow);
17971 %}
17972 
17973 instruct divD_imm(regD dst, immD con) %{
17974   predicate(UseAVX == 0);
17975   match(Set dst (DivD dst con));
17976   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17977   ins_cost(150);
17978   ins_encode %{
17979     __ divsd($dst$$XMMRegister, $constantaddress($con));
17980   %}
17981   ins_pipe(pipe_slow);
17982 %}
17983 
17984 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17985   predicate(UseAVX > 0);
17986   match(Set dst (DivD src1 src2));
17987 
17988   format %{ "vdivsd  $dst, $src1, $src2" %}
17989   ins_cost(150);
17990   ins_encode %{
17991     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17992   %}
17993   ins_pipe(pipe_slow);
17994 %}
17995 
17996 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17997   predicate(UseAVX > 0);
17998   match(Set dst (DivD src1 (LoadD src2)));
17999 
18000   format %{ "vdivsd  $dst, $src1, $src2" %}
18001   ins_cost(150);
18002   ins_encode %{
18003     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18004   %}
18005   ins_pipe(pipe_slow);
18006 %}
18007 
18008 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18009   predicate(UseAVX > 0);
18010   match(Set dst (DivD src con));
18011 
18012   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18013   ins_cost(150);
18014   ins_encode %{
18015     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18016   %}
18017   ins_pipe(pipe_slow);
18018 %}
18019 
18020 instruct absF_reg(regF dst) %{
18021   predicate(UseAVX == 0);
18022   match(Set dst (AbsF dst));
18023   ins_cost(150);
18024   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
18025   ins_encode %{
18026     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18027   %}
18028   ins_pipe(pipe_slow);
18029 %}
18030 
18031 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18032   predicate(UseAVX > 0);
18033   match(Set dst (AbsF src));
18034   ins_cost(150);
18035   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18036   ins_encode %{
18037     int vlen_enc = Assembler::AVX_128bit;
18038     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18039               ExternalAddress(float_signmask()), vlen_enc);
18040   %}
18041   ins_pipe(pipe_slow);
18042 %}
18043 
18044 instruct absD_reg(regD dst) %{
18045   predicate(UseAVX == 0);
18046   match(Set dst (AbsD dst));
18047   ins_cost(150);
18048   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
18049             "# abs double by sign masking" %}
18050   ins_encode %{
18051     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18052   %}
18053   ins_pipe(pipe_slow);
18054 %}
18055 
18056 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18057   predicate(UseAVX > 0);
18058   match(Set dst (AbsD src));
18059   ins_cost(150);
18060   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
18061             "# abs double by sign masking" %}
18062   ins_encode %{
18063     int vlen_enc = Assembler::AVX_128bit;
18064     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18065               ExternalAddress(double_signmask()), vlen_enc);
18066   %}
18067   ins_pipe(pipe_slow);
18068 %}
18069 
18070 instruct negF_reg(regF dst) %{
18071   predicate(UseAVX == 0);
18072   match(Set dst (NegF dst));
18073   ins_cost(150);
18074   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
18075   ins_encode %{
18076     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18077   %}
18078   ins_pipe(pipe_slow);
18079 %}
18080 
18081 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18082   predicate(UseAVX > 0);
18083   match(Set dst (NegF src));
18084   ins_cost(150);
18085   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18086   ins_encode %{
18087     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18088                  ExternalAddress(float_signflip()));
18089   %}
18090   ins_pipe(pipe_slow);
18091 %}
18092 
18093 instruct negD_reg(regD dst) %{
18094   predicate(UseAVX == 0);
18095   match(Set dst (NegD dst));
18096   ins_cost(150);
18097   format %{ "xorpd   $dst, [0x8000000000000000]\t"
18098             "# neg double by sign flipping" %}
18099   ins_encode %{
18100     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18101   %}
18102   ins_pipe(pipe_slow);
18103 %}
18104 
18105 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18106   predicate(UseAVX > 0);
18107   match(Set dst (NegD src));
18108   ins_cost(150);
18109   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
18110             "# neg double by sign flipping" %}
18111   ins_encode %{
18112     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18113                  ExternalAddress(double_signflip()));
18114   %}
18115   ins_pipe(pipe_slow);
18116 %}
18117 
18118 // sqrtss instruction needs destination register to be pre initialized for best performance
18119 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18120 instruct sqrtF_reg(regF dst) %{
18121   match(Set dst (SqrtF dst));
18122   format %{ "sqrtss  $dst, $dst" %}
18123   ins_encode %{
18124     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18125   %}
18126   ins_pipe(pipe_slow);
18127 %}
18128 
18129 // sqrtsd instruction needs destination register to be pre initialized for best performance
18130 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18131 instruct sqrtD_reg(regD dst) %{
18132   match(Set dst (SqrtD dst));
18133   format %{ "sqrtsd  $dst, $dst" %}
18134   ins_encode %{
18135     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18136   %}
18137   ins_pipe(pipe_slow);
18138 %}
18139 
18140 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18141   effect(TEMP tmp);
18142   match(Set dst (ConvF2HF src));
18143   ins_cost(125);
18144   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18145   ins_encode %{
18146     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18147   %}
18148   ins_pipe( pipe_slow );
18149 %}
18150 
18151 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18152   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18153   effect(TEMP ktmp, TEMP rtmp);
18154   match(Set mem (StoreC mem (ConvF2HF src)));
18155   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18156   ins_encode %{
18157     __ movl($rtmp$$Register, 0x1);
18158     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18159     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18160   %}
18161   ins_pipe( pipe_slow );
18162 %}
18163 
18164 instruct vconvF2HF(vec dst, vec src) %{
18165   match(Set dst (VectorCastF2HF src));
18166   format %{ "vector_conv_F2HF $dst $src" %}
18167   ins_encode %{
18168     int vlen_enc = vector_length_encoding(this, $src);
18169     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18170   %}
18171   ins_pipe( pipe_slow );
18172 %}
18173 
18174 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18175   predicate(n->as_StoreVector()->memory_size() >= 16);
18176   match(Set mem (StoreVector mem (VectorCastF2HF src)));
18177   format %{ "vcvtps2ph $mem,$src" %}
18178   ins_encode %{
18179     int vlen_enc = vector_length_encoding(this, $src);
18180     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18181   %}
18182   ins_pipe( pipe_slow );
18183 %}
18184 
18185 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18186   match(Set dst (ConvHF2F src));
18187   format %{ "vcvtph2ps $dst,$src" %}
18188   ins_encode %{
18189     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18190   %}
18191   ins_pipe( pipe_slow );
18192 %}
18193 
18194 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18195   match(Set dst (VectorCastHF2F (LoadVector mem)));
18196   format %{ "vcvtph2ps $dst,$mem" %}
18197   ins_encode %{
18198     int vlen_enc = vector_length_encoding(this);
18199     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18200   %}
18201   ins_pipe( pipe_slow );
18202 %}
18203 
18204 instruct vconvHF2F(vec dst, vec src) %{
18205   match(Set dst (VectorCastHF2F src));
18206   ins_cost(125);
18207   format %{ "vector_conv_HF2F $dst,$src" %}
18208   ins_encode %{
18209     int vlen_enc = vector_length_encoding(this);
18210     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18211   %}
18212   ins_pipe( pipe_slow );
18213 %}
18214 
18215 // ---------------------------------------- VectorReinterpret ------------------------------------
18216 instruct reinterpret_mask(kReg dst) %{
18217   predicate(n->bottom_type()->isa_vectmask() &&
18218             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18219   match(Set dst (VectorReinterpret dst));
18220   ins_cost(125);
18221   format %{ "vector_reinterpret $dst\t!" %}
18222   ins_encode %{
18223     // empty
18224   %}
18225   ins_pipe( pipe_slow );
18226 %}
18227 
18228 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18229   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18230             n->bottom_type()->isa_vectmask() &&
18231             n->in(1)->bottom_type()->isa_vectmask() &&
18232             n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18233             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18234   match(Set dst (VectorReinterpret src));
18235   effect(TEMP xtmp);
18236   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18237   ins_encode %{
18238      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18239      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18240      assert(src_sz == dst_sz , "src and dst size mismatch");
18241      int vlen_enc = vector_length_encoding(src_sz);
18242      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18243      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18244   %}
18245   ins_pipe( pipe_slow );
18246 %}
18247 
18248 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18249   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18250             n->bottom_type()->isa_vectmask() &&
18251             n->in(1)->bottom_type()->isa_vectmask() &&
18252             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18253              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18254             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18255   match(Set dst (VectorReinterpret src));
18256   effect(TEMP xtmp);
18257   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18258   ins_encode %{
18259      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18260      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18261      assert(src_sz == dst_sz , "src and dst size mismatch");
18262      int vlen_enc = vector_length_encoding(src_sz);
18263      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18264      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18265   %}
18266   ins_pipe( pipe_slow );
18267 %}
18268 
18269 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18270   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18271             n->bottom_type()->isa_vectmask() &&
18272             n->in(1)->bottom_type()->isa_vectmask() &&
18273             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18274              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18275             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18276   match(Set dst (VectorReinterpret src));
18277   effect(TEMP xtmp);
18278   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18279   ins_encode %{
18280      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18281      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18282      assert(src_sz == dst_sz , "src and dst size mismatch");
18283      int vlen_enc = vector_length_encoding(src_sz);
18284      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18285      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18286   %}
18287   ins_pipe( pipe_slow );
18288 %}
18289 
18290 instruct reinterpret(vec dst) %{
18291   predicate(!n->bottom_type()->isa_vectmask() &&
18292             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18293   match(Set dst (VectorReinterpret dst));
18294   ins_cost(125);
18295   format %{ "vector_reinterpret $dst\t!" %}
18296   ins_encode %{
18297     // empty
18298   %}
18299   ins_pipe( pipe_slow );
18300 %}
18301 
18302 instruct reinterpret_expand(vec dst, vec src) %{
18303   predicate(UseAVX == 0 &&
18304             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18305   match(Set dst (VectorReinterpret src));
18306   ins_cost(125);
18307   effect(TEMP dst);
18308   format %{ "vector_reinterpret_expand $dst,$src" %}
18309   ins_encode %{
18310     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
18311     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
18312 
18313     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18314     if (src_vlen_in_bytes == 4) {
18315       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18316     } else {
18317       assert(src_vlen_in_bytes == 8, "");
18318       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18319     }
18320     __ pand($dst$$XMMRegister, $src$$XMMRegister);
18321   %}
18322   ins_pipe( pipe_slow );
18323 %}
18324 
18325 instruct vreinterpret_expand4(legVec dst, vec src) %{
18326   predicate(UseAVX > 0 &&
18327             !n->bottom_type()->isa_vectmask() &&
18328             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18329             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18330   match(Set dst (VectorReinterpret src));
18331   ins_cost(125);
18332   format %{ "vector_reinterpret_expand $dst,$src" %}
18333   ins_encode %{
18334     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18335   %}
18336   ins_pipe( pipe_slow );
18337 %}
18338 
18339 
18340 instruct vreinterpret_expand(legVec dst, vec src) %{
18341   predicate(UseAVX > 0 &&
18342             !n->bottom_type()->isa_vectmask() &&
18343             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18344             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18345   match(Set dst (VectorReinterpret src));
18346   ins_cost(125);
18347   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18348   ins_encode %{
18349     switch (Matcher::vector_length_in_bytes(this, $src)) {
18350       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18351       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18352       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18353       default: ShouldNotReachHere();
18354     }
18355   %}
18356   ins_pipe( pipe_slow );
18357 %}
18358 
18359 instruct reinterpret_shrink(vec dst, legVec src) %{
18360   predicate(!n->bottom_type()->isa_vectmask() &&
18361             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18362   match(Set dst (VectorReinterpret src));
18363   ins_cost(125);
18364   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18365   ins_encode %{
18366     switch (Matcher::vector_length_in_bytes(this)) {
18367       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18368       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18369       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18370       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18371       default: ShouldNotReachHere();
18372     }
18373   %}
18374   ins_pipe( pipe_slow );
18375 %}
18376 
18377 // ----------------------------------------------------------------------------------------------------
18378 
18379 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18380   match(Set dst (RoundDoubleMode src rmode));
18381   format %{ "roundsd $dst,$src" %}
18382   ins_cost(150);
18383   ins_encode %{
18384     assert(UseSSE >= 4, "required");
18385     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18386       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18387     }
18388     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18389   %}
18390   ins_pipe(pipe_slow);
18391 %}
18392 
18393 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18394   match(Set dst (RoundDoubleMode con rmode));
18395   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18396   ins_cost(150);
18397   ins_encode %{
18398     assert(UseSSE >= 4, "required");
18399     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18400   %}
18401   ins_pipe(pipe_slow);
18402 %}
18403 
18404 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18405   predicate(Matcher::vector_length(n) < 8);
18406   match(Set dst (RoundDoubleModeV src rmode));
18407   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18408   ins_encode %{
18409     assert(UseAVX > 0, "required");
18410     int vlen_enc = vector_length_encoding(this);
18411     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18412   %}
18413   ins_pipe( pipe_slow );
18414 %}
18415 
18416 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18417   predicate(Matcher::vector_length(n) == 8);
18418   match(Set dst (RoundDoubleModeV src rmode));
18419   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18420   ins_encode %{
18421     assert(UseAVX > 2, "required");
18422     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18423   %}
18424   ins_pipe( pipe_slow );
18425 %}
18426 
18427 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18428   predicate(Matcher::vector_length(n) < 8);
18429   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18430   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18431   ins_encode %{
18432     assert(UseAVX > 0, "required");
18433     int vlen_enc = vector_length_encoding(this);
18434     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18435   %}
18436   ins_pipe( pipe_slow );
18437 %}
18438 
18439 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18440   predicate(Matcher::vector_length(n) == 8);
18441   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18442   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18443   ins_encode %{
18444     assert(UseAVX > 2, "required");
18445     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18446   %}
18447   ins_pipe( pipe_slow );
18448 %}
18449 
18450 instruct onspinwait() %{
18451   match(OnSpinWait);
18452   ins_cost(200);
18453 
18454   format %{
18455     $$template
18456     $$emit$$"pause\t! membar_onspinwait"
18457   %}
18458   ins_encode %{
18459     __ pause();
18460   %}
18461   ins_pipe(pipe_slow);
18462 %}
18463 
18464 // a * b + c
18465 instruct fmaD_reg(regD a, regD b, regD c) %{
18466   match(Set c (FmaD  c (Binary a b)));
18467   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18468   ins_cost(150);
18469   ins_encode %{
18470     assert(UseFMA, "Needs FMA instructions support.");
18471     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18472   %}
18473   ins_pipe( pipe_slow );
18474 %}
18475 
18476 // a * b + c
18477 instruct fmaF_reg(regF a, regF b, regF c) %{
18478   match(Set c (FmaF  c (Binary a b)));
18479   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18480   ins_cost(150);
18481   ins_encode %{
18482     assert(UseFMA, "Needs FMA instructions support.");
18483     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18484   %}
18485   ins_pipe( pipe_slow );
18486 %}
18487 
18488 // ====================VECTOR INSTRUCTIONS=====================================
18489 
18490 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18491 instruct MoveVec2Leg(legVec dst, vec src) %{
18492   match(Set dst src);
18493   format %{ "" %}
18494   ins_encode %{
18495     ShouldNotReachHere();
18496   %}
18497   ins_pipe( fpu_reg_reg );
18498 %}
18499 
18500 instruct MoveLeg2Vec(vec dst, legVec src) %{
18501   match(Set dst src);
18502   format %{ "" %}
18503   ins_encode %{
18504     ShouldNotReachHere();
18505   %}
18506   ins_pipe( fpu_reg_reg );
18507 %}
18508 
18509 // ============================================================================
18510 
18511 // Load vectors generic operand pattern
18512 instruct loadV(vec dst, memory mem) %{
18513   match(Set dst (LoadVector mem));
18514   ins_cost(125);
18515   format %{ "load_vector $dst,$mem" %}
18516   ins_encode %{
18517     BasicType bt = Matcher::vector_element_basic_type(this);
18518     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18519   %}
18520   ins_pipe( pipe_slow );
18521 %}
18522 
18523 // Store vectors generic operand pattern.
18524 instruct storeV(memory mem, vec src) %{
18525   match(Set mem (StoreVector mem src));
18526   ins_cost(145);
18527   format %{ "store_vector $mem,$src\n\t" %}
18528   ins_encode %{
18529     switch (Matcher::vector_length_in_bytes(this, $src)) {
18530       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
18531       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
18532       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
18533       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
18534       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18535       default: ShouldNotReachHere();
18536     }
18537   %}
18538   ins_pipe( pipe_slow );
18539 %}
18540 
18541 // ---------------------------------------- Gather ------------------------------------
18542 
18543 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18544 
18545 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18546   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18547             Matcher::vector_length_in_bytes(n) <= 32);
18548   match(Set dst (LoadVectorGather mem idx));
18549   effect(TEMP dst, TEMP tmp, TEMP mask);
18550   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18551   ins_encode %{
18552     int vlen_enc = vector_length_encoding(this);
18553     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18554     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18555     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18556     __ lea($tmp$$Register, $mem$$Address);
18557     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18558   %}
18559   ins_pipe( pipe_slow );
18560 %}
18561 
18562 
18563 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18564   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18565             !is_subword_type(Matcher::vector_element_basic_type(n)));
18566   match(Set dst (LoadVectorGather mem idx));
18567   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18568   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18569   ins_encode %{
18570     int vlen_enc = vector_length_encoding(this);
18571     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18572     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18573     __ lea($tmp$$Register, $mem$$Address);
18574     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18575   %}
18576   ins_pipe( pipe_slow );
18577 %}
18578 
18579 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18580   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18581             !is_subword_type(Matcher::vector_element_basic_type(n)));
18582   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18583   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18584   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18585   ins_encode %{
18586     assert(UseAVX > 2, "sanity");
18587     int vlen_enc = vector_length_encoding(this);
18588     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18589     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18590     // Note: Since gather instruction partially updates the opmask register used
18591     // for predication hense moving mask operand to a temporary.
18592     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18593     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18594     __ lea($tmp$$Register, $mem$$Address);
18595     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18596   %}
18597   ins_pipe( pipe_slow );
18598 %}
18599 
18600 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18601   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18602   match(Set dst (LoadVectorGather mem idx_base));
18603   effect(TEMP tmp, TEMP rtmp);
18604   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18605   ins_encode %{
18606     int vlen_enc = vector_length_encoding(this);
18607     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18608     __ lea($tmp$$Register, $mem$$Address);
18609     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18610   %}
18611   ins_pipe( pipe_slow );
18612 %}
18613 
18614 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18615                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18616   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18617   match(Set dst (LoadVectorGather mem idx_base));
18618   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18619   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18620   ins_encode %{
18621     int vlen_enc = vector_length_encoding(this);
18622     int vector_len = Matcher::vector_length(this);
18623     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18624     __ lea($tmp$$Register, $mem$$Address);
18625     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18626     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18627                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18628   %}
18629   ins_pipe( pipe_slow );
18630 %}
18631 
18632 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18633   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18634   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18635   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18636   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18637   ins_encode %{
18638     int vlen_enc = vector_length_encoding(this);
18639     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18640     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18641     __ lea($tmp$$Register, $mem$$Address);
18642     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18643     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18644   %}
18645   ins_pipe( pipe_slow );
18646 %}
18647 
18648 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18649                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18650   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18651   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18652   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18653   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18654   ins_encode %{
18655     int vlen_enc = vector_length_encoding(this);
18656     int vector_len = Matcher::vector_length(this);
18657     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18658     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18659     __ lea($tmp$$Register, $mem$$Address);
18660     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18661     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18662     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18663                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18664   %}
18665   ins_pipe( pipe_slow );
18666 %}
18667 
18668 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18669   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18670   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18671   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18672   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18673   ins_encode %{
18674     int vlen_enc = vector_length_encoding(this);
18675     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18676     __ lea($tmp$$Register, $mem$$Address);
18677     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18678     if (elem_bt == T_SHORT) {
18679       __ movl($mask_idx$$Register, 0x55555555);
18680       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18681     }
18682     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18683     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18684   %}
18685   ins_pipe( pipe_slow );
18686 %}
18687 
18688 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18689                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18690   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18691   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18692   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18693   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18694   ins_encode %{
18695     int vlen_enc = vector_length_encoding(this);
18696     int vector_len = Matcher::vector_length(this);
18697     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18698     __ lea($tmp$$Register, $mem$$Address);
18699     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18700     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18701     if (elem_bt == T_SHORT) {
18702       __ movl($mask_idx$$Register, 0x55555555);
18703       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18704     }
18705     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18706     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18707                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18708   %}
18709   ins_pipe( pipe_slow );
18710 %}
18711 
18712 // ====================Scatter=======================================
18713 
18714 // Scatter INT, LONG, FLOAT, DOUBLE
18715 
18716 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18717   predicate(UseAVX > 2);
18718   match(Set mem (StoreVectorScatter mem (Binary src idx)));
18719   effect(TEMP tmp, TEMP ktmp);
18720   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18721   ins_encode %{
18722     int vlen_enc = vector_length_encoding(this, $src);
18723     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18724 
18725     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18726     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18727 
18728     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18729     __ lea($tmp$$Register, $mem$$Address);
18730     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18731   %}
18732   ins_pipe( pipe_slow );
18733 %}
18734 
18735 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18736   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18737   effect(TEMP tmp, TEMP ktmp);
18738   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18739   ins_encode %{
18740     int vlen_enc = vector_length_encoding(this, $src);
18741     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18742     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18743     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18744     // Note: Since scatter instruction partially updates the opmask register used
18745     // for predication hense moving mask operand to a temporary.
18746     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18747     __ lea($tmp$$Register, $mem$$Address);
18748     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18749   %}
18750   ins_pipe( pipe_slow );
18751 %}
18752 
18753 // ====================REPLICATE=======================================
18754 
18755 // Replicate byte scalar to be vector
18756 instruct vReplB_reg(vec dst, rRegI src) %{
18757   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18758   match(Set dst (Replicate src));
18759   format %{ "replicateB $dst,$src" %}
18760   ins_encode %{
18761     uint vlen = Matcher::vector_length(this);
18762     if (UseAVX >= 2) {
18763       int vlen_enc = vector_length_encoding(this);
18764       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18765         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18766         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18767       } else {
18768         __ movdl($dst$$XMMRegister, $src$$Register);
18769         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18770       }
18771     } else {
18772        assert(UseAVX < 2, "");
18773       __ movdl($dst$$XMMRegister, $src$$Register);
18774       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18775       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18776       if (vlen >= 16) {
18777         assert(vlen == 16, "");
18778         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18779       }
18780     }
18781   %}
18782   ins_pipe( pipe_slow );
18783 %}
18784 
18785 instruct ReplB_mem(vec dst, memory mem) %{
18786   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18787   match(Set dst (Replicate (LoadB mem)));
18788   format %{ "replicateB $dst,$mem" %}
18789   ins_encode %{
18790     int vlen_enc = vector_length_encoding(this);
18791     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18792   %}
18793   ins_pipe( pipe_slow );
18794 %}
18795 
18796 // ====================ReplicateS=======================================
18797 
18798 instruct vReplS_reg(vec dst, rRegI src) %{
18799   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18800   match(Set dst (Replicate src));
18801   format %{ "replicateS $dst,$src" %}
18802   ins_encode %{
18803     uint vlen = Matcher::vector_length(this);
18804     int vlen_enc = vector_length_encoding(this);
18805     if (UseAVX >= 2) {
18806       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18807         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18808         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18809       } else {
18810         __ movdl($dst$$XMMRegister, $src$$Register);
18811         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18812       }
18813     } else {
18814       assert(UseAVX < 2, "");
18815       __ movdl($dst$$XMMRegister, $src$$Register);
18816       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18817       if (vlen >= 8) {
18818         assert(vlen == 8, "");
18819         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18820       }
18821     }
18822   %}
18823   ins_pipe( pipe_slow );
18824 %}
18825 
18826 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18827   match(Set dst (Replicate con));
18828   effect(TEMP rtmp);
18829   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18830   ins_encode %{
18831     int vlen_enc = vector_length_encoding(this);
18832     BasicType bt = Matcher::vector_element_basic_type(this);
18833     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18834     __ movl($rtmp$$Register, $con$$constant);
18835     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18836   %}
18837   ins_pipe( pipe_slow );
18838 %}
18839 
18840 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18841   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18842   match(Set dst (Replicate src));
18843   effect(TEMP rtmp);
18844   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18845   ins_encode %{
18846     int vlen_enc = vector_length_encoding(this);
18847     __ vmovw($rtmp$$Register, $src$$XMMRegister);
18848     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18849   %}
18850   ins_pipe( pipe_slow );
18851 %}
18852 
18853 instruct ReplS_mem(vec dst, memory mem) %{
18854   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18855   match(Set dst (Replicate (LoadS mem)));
18856   format %{ "replicateS $dst,$mem" %}
18857   ins_encode %{
18858     int vlen_enc = vector_length_encoding(this);
18859     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18860   %}
18861   ins_pipe( pipe_slow );
18862 %}
18863 
18864 // ====================ReplicateI=======================================
18865 
18866 instruct ReplI_reg(vec dst, rRegI src) %{
18867   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18868   match(Set dst (Replicate src));
18869   format %{ "replicateI $dst,$src" %}
18870   ins_encode %{
18871     uint vlen = Matcher::vector_length(this);
18872     int vlen_enc = vector_length_encoding(this);
18873     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18874       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18875     } else if (VM_Version::supports_avx2()) {
18876       __ movdl($dst$$XMMRegister, $src$$Register);
18877       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18878     } else {
18879       __ movdl($dst$$XMMRegister, $src$$Register);
18880       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18881     }
18882   %}
18883   ins_pipe( pipe_slow );
18884 %}
18885 
18886 instruct ReplI_mem(vec dst, memory mem) %{
18887   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18888   match(Set dst (Replicate (LoadI mem)));
18889   format %{ "replicateI $dst,$mem" %}
18890   ins_encode %{
18891     int vlen_enc = vector_length_encoding(this);
18892     if (VM_Version::supports_avx2()) {
18893       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18894     } else if (VM_Version::supports_avx()) {
18895       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18896     } else {
18897       __ movdl($dst$$XMMRegister, $mem$$Address);
18898       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18899     }
18900   %}
18901   ins_pipe( pipe_slow );
18902 %}
18903 
18904 instruct ReplI_imm(vec dst, immI con) %{
18905   predicate(Matcher::is_non_long_integral_vector(n));
18906   match(Set dst (Replicate con));
18907   format %{ "replicateI $dst,$con" %}
18908   ins_encode %{
18909     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18910                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18911                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
18912     BasicType bt = Matcher::vector_element_basic_type(this);
18913     int vlen = Matcher::vector_length_in_bytes(this);
18914     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18915   %}
18916   ins_pipe( pipe_slow );
18917 %}
18918 
18919 // Replicate scalar zero to be vector
18920 instruct ReplI_zero(vec dst, immI_0 zero) %{
18921   predicate(Matcher::is_non_long_integral_vector(n));
18922   match(Set dst (Replicate zero));
18923   format %{ "replicateI $dst,$zero" %}
18924   ins_encode %{
18925     int vlen_enc = vector_length_encoding(this);
18926     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18927       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18928     } else {
18929       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18930     }
18931   %}
18932   ins_pipe( fpu_reg_reg );
18933 %}
18934 
18935 instruct ReplI_M1(vec dst, immI_M1 con) %{
18936   predicate(Matcher::is_non_long_integral_vector(n));
18937   match(Set dst (Replicate con));
18938   format %{ "vallones $dst" %}
18939   ins_encode %{
18940     int vector_len = vector_length_encoding(this);
18941     __ vallones($dst$$XMMRegister, vector_len);
18942   %}
18943   ins_pipe( pipe_slow );
18944 %}
18945 
18946 // ====================ReplicateL=======================================
18947 
18948 // Replicate long (8 byte) scalar to be vector
18949 instruct ReplL_reg(vec dst, rRegL src) %{
18950   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18951   match(Set dst (Replicate src));
18952   format %{ "replicateL $dst,$src" %}
18953   ins_encode %{
18954     int vlen = Matcher::vector_length(this);
18955     int vlen_enc = vector_length_encoding(this);
18956     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18957       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18958     } else if (VM_Version::supports_avx2()) {
18959       __ movdq($dst$$XMMRegister, $src$$Register);
18960       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18961     } else {
18962       __ movdq($dst$$XMMRegister, $src$$Register);
18963       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18964     }
18965   %}
18966   ins_pipe( pipe_slow );
18967 %}
18968 
18969 instruct ReplL_mem(vec dst, memory mem) %{
18970   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18971   match(Set dst (Replicate (LoadL mem)));
18972   format %{ "replicateL $dst,$mem" %}
18973   ins_encode %{
18974     int vlen_enc = vector_length_encoding(this);
18975     if (VM_Version::supports_avx2()) {
18976       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18977     } else if (VM_Version::supports_sse3()) {
18978       __ movddup($dst$$XMMRegister, $mem$$Address);
18979     } else {
18980       __ movq($dst$$XMMRegister, $mem$$Address);
18981       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18982     }
18983   %}
18984   ins_pipe( pipe_slow );
18985 %}
18986 
18987 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18988 instruct ReplL_imm(vec dst, immL con) %{
18989   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18990   match(Set dst (Replicate con));
18991   format %{ "replicateL $dst,$con" %}
18992   ins_encode %{
18993     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18994     int vlen = Matcher::vector_length_in_bytes(this);
18995     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18996   %}
18997   ins_pipe( pipe_slow );
18998 %}
18999 
19000 instruct ReplL_zero(vec dst, immL0 zero) %{
19001   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19002   match(Set dst (Replicate zero));
19003   format %{ "replicateL $dst,$zero" %}
19004   ins_encode %{
19005     int vlen_enc = vector_length_encoding(this);
19006     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19007       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19008     } else {
19009       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19010     }
19011   %}
19012   ins_pipe( fpu_reg_reg );
19013 %}
19014 
19015 instruct ReplL_M1(vec dst, immL_M1 con) %{
19016   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19017   match(Set dst (Replicate con));
19018   format %{ "vallones $dst" %}
19019   ins_encode %{
19020     int vector_len = vector_length_encoding(this);
19021     __ vallones($dst$$XMMRegister, vector_len);
19022   %}
19023   ins_pipe( pipe_slow );
19024 %}
19025 
19026 // ====================ReplicateF=======================================
19027 
19028 instruct vReplF_reg(vec dst, vlRegF src) %{
19029   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19030   match(Set dst (Replicate src));
19031   format %{ "replicateF $dst,$src" %}
19032   ins_encode %{
19033     uint vlen = Matcher::vector_length(this);
19034     int vlen_enc = vector_length_encoding(this);
19035     if (vlen <= 4) {
19036       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19037     } else if (VM_Version::supports_avx2()) {
19038       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19039     } else {
19040       assert(vlen == 8, "sanity");
19041       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19042       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19043     }
19044   %}
19045   ins_pipe( pipe_slow );
19046 %}
19047 
19048 instruct ReplF_reg(vec dst, vlRegF src) %{
19049   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19050   match(Set dst (Replicate src));
19051   format %{ "replicateF $dst,$src" %}
19052   ins_encode %{
19053     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19054   %}
19055   ins_pipe( pipe_slow );
19056 %}
19057 
19058 instruct ReplF_mem(vec dst, memory mem) %{
19059   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19060   match(Set dst (Replicate (LoadF mem)));
19061   format %{ "replicateF $dst,$mem" %}
19062   ins_encode %{
19063     int vlen_enc = vector_length_encoding(this);
19064     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19065   %}
19066   ins_pipe( pipe_slow );
19067 %}
19068 
19069 // Replicate float scalar immediate to be vector by loading from const table.
19070 instruct ReplF_imm(vec dst, immF con) %{
19071   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19072   match(Set dst (Replicate con));
19073   format %{ "replicateF $dst,$con" %}
19074   ins_encode %{
19075     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19076                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19077     int vlen = Matcher::vector_length_in_bytes(this);
19078     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19079   %}
19080   ins_pipe( pipe_slow );
19081 %}
19082 
19083 instruct ReplF_zero(vec dst, immF0 zero) %{
19084   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19085   match(Set dst (Replicate zero));
19086   format %{ "replicateF $dst,$zero" %}
19087   ins_encode %{
19088     int vlen_enc = vector_length_encoding(this);
19089     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19090       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19091     } else {
19092       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19093     }
19094   %}
19095   ins_pipe( fpu_reg_reg );
19096 %}
19097 
19098 // ====================ReplicateD=======================================
19099 
19100 // Replicate double (8 bytes) scalar to be vector
19101 instruct vReplD_reg(vec dst, vlRegD src) %{
19102   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19103   match(Set dst (Replicate src));
19104   format %{ "replicateD $dst,$src" %}
19105   ins_encode %{
19106     uint vlen = Matcher::vector_length(this);
19107     int vlen_enc = vector_length_encoding(this);
19108     if (vlen <= 2) {
19109       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19110     } else if (VM_Version::supports_avx2()) {
19111       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19112     } else {
19113       assert(vlen == 4, "sanity");
19114       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19115       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19116     }
19117   %}
19118   ins_pipe( pipe_slow );
19119 %}
19120 
19121 instruct ReplD_reg(vec dst, vlRegD src) %{
19122   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19123   match(Set dst (Replicate src));
19124   format %{ "replicateD $dst,$src" %}
19125   ins_encode %{
19126     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19127   %}
19128   ins_pipe( pipe_slow );
19129 %}
19130 
19131 instruct ReplD_mem(vec dst, memory mem) %{
19132   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19133   match(Set dst (Replicate (LoadD mem)));
19134   format %{ "replicateD $dst,$mem" %}
19135   ins_encode %{
19136     if (Matcher::vector_length(this) >= 4) {
19137       int vlen_enc = vector_length_encoding(this);
19138       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19139     } else {
19140       __ movddup($dst$$XMMRegister, $mem$$Address);
19141     }
19142   %}
19143   ins_pipe( pipe_slow );
19144 %}
19145 
19146 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19147 instruct ReplD_imm(vec dst, immD con) %{
19148   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19149   match(Set dst (Replicate con));
19150   format %{ "replicateD $dst,$con" %}
19151   ins_encode %{
19152     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19153     int vlen = Matcher::vector_length_in_bytes(this);
19154     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19155   %}
19156   ins_pipe( pipe_slow );
19157 %}
19158 
19159 instruct ReplD_zero(vec dst, immD0 zero) %{
19160   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19161   match(Set dst (Replicate zero));
19162   format %{ "replicateD $dst,$zero" %}
19163   ins_encode %{
19164     int vlen_enc = vector_length_encoding(this);
19165     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19166       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19167     } else {
19168       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19169     }
19170   %}
19171   ins_pipe( fpu_reg_reg );
19172 %}
19173 
19174 // ====================VECTOR INSERT=======================================
19175 
19176 instruct insert(vec dst, rRegI val, immU8 idx) %{
19177   predicate(Matcher::vector_length_in_bytes(n) < 32);
19178   match(Set dst (VectorInsert (Binary dst val) idx));
19179   format %{ "vector_insert $dst,$val,$idx" %}
19180   ins_encode %{
19181     assert(UseSSE >= 4, "required");
19182     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19183 
19184     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19185 
19186     assert(is_integral_type(elem_bt), "");
19187     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19188 
19189     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19190   %}
19191   ins_pipe( pipe_slow );
19192 %}
19193 
19194 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19195   predicate(Matcher::vector_length_in_bytes(n) == 32);
19196   match(Set dst (VectorInsert (Binary src val) idx));
19197   effect(TEMP vtmp);
19198   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19199   ins_encode %{
19200     int vlen_enc = Assembler::AVX_256bit;
19201     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19202     int elem_per_lane = 16/type2aelembytes(elem_bt);
19203     int log2epr = log2(elem_per_lane);
19204 
19205     assert(is_integral_type(elem_bt), "sanity");
19206     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19207 
19208     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19209     uint y_idx = ($idx$$constant >> log2epr) & 1;
19210     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19211     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19212     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19213   %}
19214   ins_pipe( pipe_slow );
19215 %}
19216 
19217 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19218   predicate(Matcher::vector_length_in_bytes(n) == 64);
19219   match(Set dst (VectorInsert (Binary src val) idx));
19220   effect(TEMP vtmp);
19221   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19222   ins_encode %{
19223     assert(UseAVX > 2, "sanity");
19224 
19225     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19226     int elem_per_lane = 16/type2aelembytes(elem_bt);
19227     int log2epr = log2(elem_per_lane);
19228 
19229     assert(is_integral_type(elem_bt), "");
19230     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19231 
19232     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19233     uint y_idx = ($idx$$constant >> log2epr) & 3;
19234     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19235     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19236     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19237   %}
19238   ins_pipe( pipe_slow );
19239 %}
19240 
19241 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19242   predicate(Matcher::vector_length(n) == 2);
19243   match(Set dst (VectorInsert (Binary dst val) idx));
19244   format %{ "vector_insert $dst,$val,$idx" %}
19245   ins_encode %{
19246     assert(UseSSE >= 4, "required");
19247     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19248     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19249 
19250     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19251   %}
19252   ins_pipe( pipe_slow );
19253 %}
19254 
19255 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19256   predicate(Matcher::vector_length(n) == 4);
19257   match(Set dst (VectorInsert (Binary src val) idx));
19258   effect(TEMP vtmp);
19259   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19260   ins_encode %{
19261     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19262     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19263 
19264     uint x_idx = $idx$$constant & right_n_bits(1);
19265     uint y_idx = ($idx$$constant >> 1) & 1;
19266     int vlen_enc = Assembler::AVX_256bit;
19267     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19268     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19269     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19270   %}
19271   ins_pipe( pipe_slow );
19272 %}
19273 
19274 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19275   predicate(Matcher::vector_length(n) == 8);
19276   match(Set dst (VectorInsert (Binary src val) idx));
19277   effect(TEMP vtmp);
19278   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19279   ins_encode %{
19280     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19281     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19282 
19283     uint x_idx = $idx$$constant & right_n_bits(1);
19284     uint y_idx = ($idx$$constant >> 1) & 3;
19285     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19286     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19287     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19288   %}
19289   ins_pipe( pipe_slow );
19290 %}
19291 
19292 instruct insertF(vec dst, regF val, immU8 idx) %{
19293   predicate(Matcher::vector_length(n) < 8);
19294   match(Set dst (VectorInsert (Binary dst val) idx));
19295   format %{ "vector_insert $dst,$val,$idx" %}
19296   ins_encode %{
19297     assert(UseSSE >= 4, "sanity");
19298 
19299     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19300     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19301 
19302     uint x_idx = $idx$$constant & right_n_bits(2);
19303     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19304   %}
19305   ins_pipe( pipe_slow );
19306 %}
19307 
19308 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19309   predicate(Matcher::vector_length(n) >= 8);
19310   match(Set dst (VectorInsert (Binary src val) idx));
19311   effect(TEMP vtmp);
19312   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19313   ins_encode %{
19314     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19315     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19316 
19317     int vlen = Matcher::vector_length(this);
19318     uint x_idx = $idx$$constant & right_n_bits(2);
19319     if (vlen == 8) {
19320       uint y_idx = ($idx$$constant >> 2) & 1;
19321       int vlen_enc = Assembler::AVX_256bit;
19322       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19323       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19324       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19325     } else {
19326       assert(vlen == 16, "sanity");
19327       uint y_idx = ($idx$$constant >> 2) & 3;
19328       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19329       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19330       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19331     }
19332   %}
19333   ins_pipe( pipe_slow );
19334 %}
19335 
19336 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19337   predicate(Matcher::vector_length(n) == 2);
19338   match(Set dst (VectorInsert (Binary dst val) idx));
19339   effect(TEMP tmp);
19340   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19341   ins_encode %{
19342     assert(UseSSE >= 4, "sanity");
19343     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19344     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19345 
19346     __ movq($tmp$$Register, $val$$XMMRegister);
19347     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19348   %}
19349   ins_pipe( pipe_slow );
19350 %}
19351 
19352 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19353   predicate(Matcher::vector_length(n) == 4);
19354   match(Set dst (VectorInsert (Binary src val) idx));
19355   effect(TEMP vtmp, TEMP tmp);
19356   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19357   ins_encode %{
19358     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19359     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19360 
19361     uint x_idx = $idx$$constant & right_n_bits(1);
19362     uint y_idx = ($idx$$constant >> 1) & 1;
19363     int vlen_enc = Assembler::AVX_256bit;
19364     __ movq($tmp$$Register, $val$$XMMRegister);
19365     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19366     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19367     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19368   %}
19369   ins_pipe( pipe_slow );
19370 %}
19371 
19372 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19373   predicate(Matcher::vector_length(n) == 8);
19374   match(Set dst (VectorInsert (Binary src val) idx));
19375   effect(TEMP tmp, TEMP vtmp);
19376   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19377   ins_encode %{
19378     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19379     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19380 
19381     uint x_idx = $idx$$constant & right_n_bits(1);
19382     uint y_idx = ($idx$$constant >> 1) & 3;
19383     __ movq($tmp$$Register, $val$$XMMRegister);
19384     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19385     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19386     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19387   %}
19388   ins_pipe( pipe_slow );
19389 %}
19390 
19391 // ====================REDUCTION ARITHMETIC=======================================
19392 
19393 // =======================Int Reduction==========================================
19394 
19395 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19396   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19397   match(Set dst (AddReductionVI src1 src2));
19398   match(Set dst (MulReductionVI src1 src2));
19399   match(Set dst (AndReductionV  src1 src2));
19400   match(Set dst ( OrReductionV  src1 src2));
19401   match(Set dst (XorReductionV  src1 src2));
19402   match(Set dst (MinReductionV  src1 src2));
19403   match(Set dst (MaxReductionV  src1 src2));
19404   match(Set dst (UMinReductionV  src1 src2));
19405   match(Set dst (UMaxReductionV  src1 src2));
19406   effect(TEMP vtmp1, TEMP vtmp2);
19407   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19408   ins_encode %{
19409     int opcode = this->ideal_Opcode();
19410     int vlen = Matcher::vector_length(this, $src2);
19411     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19412   %}
19413   ins_pipe( pipe_slow );
19414 %}
19415 
19416 // =======================Long Reduction==========================================
19417 
19418 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19419   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19420   match(Set dst (AddReductionVL src1 src2));
19421   match(Set dst (MulReductionVL src1 src2));
19422   match(Set dst (AndReductionV  src1 src2));
19423   match(Set dst ( OrReductionV  src1 src2));
19424   match(Set dst (XorReductionV  src1 src2));
19425   match(Set dst (MinReductionV  src1 src2));
19426   match(Set dst (MaxReductionV  src1 src2));
19427   match(Set dst (UMinReductionV  src1 src2));
19428   match(Set dst (UMaxReductionV  src1 src2));
19429   effect(TEMP vtmp1, TEMP vtmp2);
19430   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19431   ins_encode %{
19432     int opcode = this->ideal_Opcode();
19433     int vlen = Matcher::vector_length(this, $src2);
19434     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19435   %}
19436   ins_pipe( pipe_slow );
19437 %}
19438 
19439 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19440   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19441   match(Set dst (AddReductionVL src1 src2));
19442   match(Set dst (MulReductionVL src1 src2));
19443   match(Set dst (AndReductionV  src1 src2));
19444   match(Set dst ( OrReductionV  src1 src2));
19445   match(Set dst (XorReductionV  src1 src2));
19446   match(Set dst (MinReductionV  src1 src2));
19447   match(Set dst (MaxReductionV  src1 src2));
19448   match(Set dst (UMinReductionV  src1 src2));
19449   match(Set dst (UMaxReductionV  src1 src2));
19450   effect(TEMP vtmp1, TEMP vtmp2);
19451   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19452   ins_encode %{
19453     int opcode = this->ideal_Opcode();
19454     int vlen = Matcher::vector_length(this, $src2);
19455     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19456   %}
19457   ins_pipe( pipe_slow );
19458 %}
19459 
19460 // =======================Float Reduction==========================================
19461 
19462 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19463   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19464   match(Set dst (AddReductionVF dst src));
19465   match(Set dst (MulReductionVF dst src));
19466   effect(TEMP dst, TEMP vtmp);
19467   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
19468   ins_encode %{
19469     int opcode = this->ideal_Opcode();
19470     int vlen = Matcher::vector_length(this, $src);
19471     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19472   %}
19473   ins_pipe( pipe_slow );
19474 %}
19475 
19476 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19477   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19478   match(Set dst (AddReductionVF dst src));
19479   match(Set dst (MulReductionVF dst src));
19480   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19481   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19482   ins_encode %{
19483     int opcode = this->ideal_Opcode();
19484     int vlen = Matcher::vector_length(this, $src);
19485     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19486   %}
19487   ins_pipe( pipe_slow );
19488 %}
19489 
19490 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19491   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19492   match(Set dst (AddReductionVF dst src));
19493   match(Set dst (MulReductionVF dst src));
19494   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19495   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19496   ins_encode %{
19497     int opcode = this->ideal_Opcode();
19498     int vlen = Matcher::vector_length(this, $src);
19499     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19500   %}
19501   ins_pipe( pipe_slow );
19502 %}
19503 
19504 
19505 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19506   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19507   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19508   // src1 contains reduction identity
19509   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19510   match(Set dst (AddReductionVF src1 src2));
19511   match(Set dst (MulReductionVF src1 src2));
19512   effect(TEMP dst);
19513   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
19514   ins_encode %{
19515     int opcode = this->ideal_Opcode();
19516     int vlen = Matcher::vector_length(this, $src2);
19517     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19518   %}
19519   ins_pipe( pipe_slow );
19520 %}
19521 
19522 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19523   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19524   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19525   // src1 contains reduction identity
19526   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19527   match(Set dst (AddReductionVF src1 src2));
19528   match(Set dst (MulReductionVF src1 src2));
19529   effect(TEMP dst, TEMP vtmp);
19530   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19531   ins_encode %{
19532     int opcode = this->ideal_Opcode();
19533     int vlen = Matcher::vector_length(this, $src2);
19534     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19535   %}
19536   ins_pipe( pipe_slow );
19537 %}
19538 
19539 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19540   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19541   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19542   // src1 contains reduction identity
19543   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19544   match(Set dst (AddReductionVF src1 src2));
19545   match(Set dst (MulReductionVF src1 src2));
19546   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19547   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19548   ins_encode %{
19549     int opcode = this->ideal_Opcode();
19550     int vlen = Matcher::vector_length(this, $src2);
19551     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19552   %}
19553   ins_pipe( pipe_slow );
19554 %}
19555 
19556 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19557   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19558   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19559   // src1 contains reduction identity
19560   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19561   match(Set dst (AddReductionVF src1 src2));
19562   match(Set dst (MulReductionVF src1 src2));
19563   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19564   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19565   ins_encode %{
19566     int opcode = this->ideal_Opcode();
19567     int vlen = Matcher::vector_length(this, $src2);
19568     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19569   %}
19570   ins_pipe( pipe_slow );
19571 %}
19572 
19573 // =======================Double Reduction==========================================
19574 
19575 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19576   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19577   match(Set dst (AddReductionVD dst src));
19578   match(Set dst (MulReductionVD dst src));
19579   effect(TEMP dst, TEMP vtmp);
19580   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19581   ins_encode %{
19582     int opcode = this->ideal_Opcode();
19583     int vlen = Matcher::vector_length(this, $src);
19584     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19585 %}
19586   ins_pipe( pipe_slow );
19587 %}
19588 
19589 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19590   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19591   match(Set dst (AddReductionVD dst src));
19592   match(Set dst (MulReductionVD dst src));
19593   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19594   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19595   ins_encode %{
19596     int opcode = this->ideal_Opcode();
19597     int vlen = Matcher::vector_length(this, $src);
19598     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19599   %}
19600   ins_pipe( pipe_slow );
19601 %}
19602 
19603 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19604   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19605   match(Set dst (AddReductionVD dst src));
19606   match(Set dst (MulReductionVD dst src));
19607   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19608   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19609   ins_encode %{
19610     int opcode = this->ideal_Opcode();
19611     int vlen = Matcher::vector_length(this, $src);
19612     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19613   %}
19614   ins_pipe( pipe_slow );
19615 %}
19616 
19617 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19618   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19619   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19620   // src1 contains reduction identity
19621   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19622   match(Set dst (AddReductionVD src1 src2));
19623   match(Set dst (MulReductionVD src1 src2));
19624   effect(TEMP dst);
19625   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19626   ins_encode %{
19627     int opcode = this->ideal_Opcode();
19628     int vlen = Matcher::vector_length(this, $src2);
19629     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19630 %}
19631   ins_pipe( pipe_slow );
19632 %}
19633 
19634 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19635   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19636   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19637   // src1 contains reduction identity
19638   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19639   match(Set dst (AddReductionVD src1 src2));
19640   match(Set dst (MulReductionVD src1 src2));
19641   effect(TEMP dst, TEMP vtmp);
19642   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19643   ins_encode %{
19644     int opcode = this->ideal_Opcode();
19645     int vlen = Matcher::vector_length(this, $src2);
19646     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19647   %}
19648   ins_pipe( pipe_slow );
19649 %}
19650 
19651 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19652   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19653   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19654   // src1 contains reduction identity
19655   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19656   match(Set dst (AddReductionVD src1 src2));
19657   match(Set dst (MulReductionVD src1 src2));
19658   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19659   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19660   ins_encode %{
19661     int opcode = this->ideal_Opcode();
19662     int vlen = Matcher::vector_length(this, $src2);
19663     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19664   %}
19665   ins_pipe( pipe_slow );
19666 %}
19667 
19668 // =======================Byte Reduction==========================================
19669 
19670 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19671   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19672   match(Set dst (AddReductionVI src1 src2));
19673   match(Set dst (AndReductionV  src1 src2));
19674   match(Set dst ( OrReductionV  src1 src2));
19675   match(Set dst (XorReductionV  src1 src2));
19676   match(Set dst (MinReductionV  src1 src2));
19677   match(Set dst (MaxReductionV  src1 src2));
19678   match(Set dst (UMinReductionV  src1 src2));
19679   match(Set dst (UMaxReductionV  src1 src2));
19680   effect(TEMP vtmp1, TEMP vtmp2);
19681   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19682   ins_encode %{
19683     int opcode = this->ideal_Opcode();
19684     int vlen = Matcher::vector_length(this, $src2);
19685     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19686   %}
19687   ins_pipe( pipe_slow );
19688 %}
19689 
19690 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19691   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19692   match(Set dst (AddReductionVI src1 src2));
19693   match(Set dst (AndReductionV  src1 src2));
19694   match(Set dst ( OrReductionV  src1 src2));
19695   match(Set dst (XorReductionV  src1 src2));
19696   match(Set dst (MinReductionV  src1 src2));
19697   match(Set dst (MaxReductionV  src1 src2));
19698   match(Set dst (UMinReductionV  src1 src2));
19699   match(Set dst (UMaxReductionV  src1 src2));
19700   effect(TEMP vtmp1, TEMP vtmp2);
19701   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19702   ins_encode %{
19703     int opcode = this->ideal_Opcode();
19704     int vlen = Matcher::vector_length(this, $src2);
19705     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19706   %}
19707   ins_pipe( pipe_slow );
19708 %}
19709 
19710 // =======================Short Reduction==========================================
19711 
19712 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19713   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19714   match(Set dst (AddReductionVI src1 src2));
19715   match(Set dst (MulReductionVI src1 src2));
19716   match(Set dst (AndReductionV  src1 src2));
19717   match(Set dst ( OrReductionV  src1 src2));
19718   match(Set dst (XorReductionV  src1 src2));
19719   match(Set dst (MinReductionV  src1 src2));
19720   match(Set dst (MaxReductionV  src1 src2));
19721   match(Set dst (UMinReductionV  src1 src2));
19722   match(Set dst (UMaxReductionV  src1 src2));
19723   effect(TEMP vtmp1, TEMP vtmp2);
19724   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19725   ins_encode %{
19726     int opcode = this->ideal_Opcode();
19727     int vlen = Matcher::vector_length(this, $src2);
19728     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19729   %}
19730   ins_pipe( pipe_slow );
19731 %}
19732 
19733 // =======================Mul Reduction==========================================
19734 
19735 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19736   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19737             Matcher::vector_length(n->in(2)) <= 32); // src2
19738   match(Set dst (MulReductionVI src1 src2));
19739   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19740   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19741   ins_encode %{
19742     int opcode = this->ideal_Opcode();
19743     int vlen = Matcher::vector_length(this, $src2);
19744     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19745   %}
19746   ins_pipe( pipe_slow );
19747 %}
19748 
19749 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19750   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19751             Matcher::vector_length(n->in(2)) == 64); // src2
19752   match(Set dst (MulReductionVI src1 src2));
19753   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19754   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19755   ins_encode %{
19756     int opcode = this->ideal_Opcode();
19757     int vlen = Matcher::vector_length(this, $src2);
19758     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19759   %}
19760   ins_pipe( pipe_slow );
19761 %}
19762 
19763 //--------------------Min/Max Float Reduction --------------------
19764 // Float Min Reduction
19765 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19766                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19767   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19768             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19769              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19770             Matcher::vector_length(n->in(2)) == 2);
19771   match(Set dst (MinReductionV src1 src2));
19772   match(Set dst (MaxReductionV src1 src2));
19773   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19774   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19775   ins_encode %{
19776     assert(UseAVX > 0, "sanity");
19777 
19778     int opcode = this->ideal_Opcode();
19779     int vlen = Matcher::vector_length(this, $src2);
19780     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19781                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19782   %}
19783   ins_pipe( pipe_slow );
19784 %}
19785 
19786 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19787                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19788   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19789             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19790              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19791             Matcher::vector_length(n->in(2)) >= 4);
19792   match(Set dst (MinReductionV src1 src2));
19793   match(Set dst (MaxReductionV src1 src2));
19794   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19795   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19796   ins_encode %{
19797     assert(UseAVX > 0, "sanity");
19798 
19799     int opcode = this->ideal_Opcode();
19800     int vlen = Matcher::vector_length(this, $src2);
19801     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19802                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19803   %}
19804   ins_pipe( pipe_slow );
19805 %}
19806 
19807 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19808                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19809   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19810             Matcher::vector_length(n->in(2)) == 2);
19811   match(Set dst (MinReductionV dst src));
19812   match(Set dst (MaxReductionV dst src));
19813   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19814   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19815   ins_encode %{
19816     assert(UseAVX > 0, "sanity");
19817 
19818     int opcode = this->ideal_Opcode();
19819     int vlen = Matcher::vector_length(this, $src);
19820     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19821                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19822   %}
19823   ins_pipe( pipe_slow );
19824 %}
19825 
19826 
19827 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19828                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19829   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19830             Matcher::vector_length(n->in(2)) >= 4);
19831   match(Set dst (MinReductionV dst src));
19832   match(Set dst (MaxReductionV dst src));
19833   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19834   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19835   ins_encode %{
19836     assert(UseAVX > 0, "sanity");
19837 
19838     int opcode = this->ideal_Opcode();
19839     int vlen = Matcher::vector_length(this, $src);
19840     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19841                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19842   %}
19843   ins_pipe( pipe_slow );
19844 %}
19845 
19846 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19847   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19848             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19849              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19850             Matcher::vector_length(n->in(2)) == 2);
19851   match(Set dst (MinReductionV src1 src2));
19852   match(Set dst (MaxReductionV src1 src2));
19853   effect(TEMP dst, TEMP xtmp1);
19854   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19855   ins_encode %{
19856     int opcode = this->ideal_Opcode();
19857     int vlen = Matcher::vector_length(this, $src2);
19858     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19859                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19860   %}
19861   ins_pipe( pipe_slow );
19862 %}
19863 
19864 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19865   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19866             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19867              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19868             Matcher::vector_length(n->in(2)) >= 4);
19869   match(Set dst (MinReductionV src1 src2));
19870   match(Set dst (MaxReductionV src1 src2));
19871   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19872   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19873   ins_encode %{
19874     int opcode = this->ideal_Opcode();
19875     int vlen = Matcher::vector_length(this, $src2);
19876     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19877                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19878   %}
19879   ins_pipe( pipe_slow );
19880 %}
19881 
19882 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19883   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19884             Matcher::vector_length(n->in(2)) == 2);
19885   match(Set dst (MinReductionV dst src));
19886   match(Set dst (MaxReductionV dst src));
19887   effect(TEMP dst, TEMP xtmp1);
19888   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19889   ins_encode %{
19890     int opcode = this->ideal_Opcode();
19891     int vlen = Matcher::vector_length(this, $src);
19892     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19893                          $xtmp1$$XMMRegister);
19894   %}
19895   ins_pipe( pipe_slow );
19896 %}
19897 
19898 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19899   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19900             Matcher::vector_length(n->in(2)) >= 4);
19901   match(Set dst (MinReductionV dst src));
19902   match(Set dst (MaxReductionV dst src));
19903   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19904   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19905   ins_encode %{
19906     int opcode = this->ideal_Opcode();
19907     int vlen = Matcher::vector_length(this, $src);
19908     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19909                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19910   %}
19911   ins_pipe( pipe_slow );
19912 %}
19913 
19914 //--------------------Min Double Reduction --------------------
19915 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19916                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19917   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19918             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19919              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19920             Matcher::vector_length(n->in(2)) == 2);
19921   match(Set dst (MinReductionV src1 src2));
19922   match(Set dst (MaxReductionV src1 src2));
19923   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19924   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19925   ins_encode %{
19926     assert(UseAVX > 0, "sanity");
19927 
19928     int opcode = this->ideal_Opcode();
19929     int vlen = Matcher::vector_length(this, $src2);
19930     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19931                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19932   %}
19933   ins_pipe( pipe_slow );
19934 %}
19935 
19936 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19937                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19938   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19939             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19940              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19941             Matcher::vector_length(n->in(2)) >= 4);
19942   match(Set dst (MinReductionV src1 src2));
19943   match(Set dst (MaxReductionV src1 src2));
19944   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19945   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19946   ins_encode %{
19947     assert(UseAVX > 0, "sanity");
19948 
19949     int opcode = this->ideal_Opcode();
19950     int vlen = Matcher::vector_length(this, $src2);
19951     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19952                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19953   %}
19954   ins_pipe( pipe_slow );
19955 %}
19956 
19957 
19958 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19959                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19960   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19961             Matcher::vector_length(n->in(2)) == 2);
19962   match(Set dst (MinReductionV dst src));
19963   match(Set dst (MaxReductionV dst src));
19964   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19965   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19966   ins_encode %{
19967     assert(UseAVX > 0, "sanity");
19968 
19969     int opcode = this->ideal_Opcode();
19970     int vlen = Matcher::vector_length(this, $src);
19971     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19972                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19973   %}
19974   ins_pipe( pipe_slow );
19975 %}
19976 
19977 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19978                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19979   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19980             Matcher::vector_length(n->in(2)) >= 4);
19981   match(Set dst (MinReductionV dst src));
19982   match(Set dst (MaxReductionV dst src));
19983   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19984   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19985   ins_encode %{
19986     assert(UseAVX > 0, "sanity");
19987 
19988     int opcode = this->ideal_Opcode();
19989     int vlen = Matcher::vector_length(this, $src);
19990     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19991                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19992   %}
19993   ins_pipe( pipe_slow );
19994 %}
19995 
19996 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19997   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19998             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19999              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20000             Matcher::vector_length(n->in(2)) == 2);
20001   match(Set dst (MinReductionV src1 src2));
20002   match(Set dst (MaxReductionV src1 src2));
20003   effect(TEMP dst, TEMP xtmp1);
20004   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20005   ins_encode %{
20006     int opcode = this->ideal_Opcode();
20007     int vlen = Matcher::vector_length(this, $src2);
20008     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20009                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
20010   %}
20011   ins_pipe( pipe_slow );
20012 %}
20013 
20014 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20015   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20016             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20017              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20018             Matcher::vector_length(n->in(2)) >= 4);
20019   match(Set dst (MinReductionV src1 src2));
20020   match(Set dst (MaxReductionV src1 src2));
20021   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20022   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20023   ins_encode %{
20024     int opcode = this->ideal_Opcode();
20025     int vlen = Matcher::vector_length(this, $src2);
20026     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20027                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20028   %}
20029   ins_pipe( pipe_slow );
20030 %}
20031 
20032 
20033 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20034   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20035             Matcher::vector_length(n->in(2)) == 2);
20036   match(Set dst (MinReductionV dst src));
20037   match(Set dst (MaxReductionV dst src));
20038   effect(TEMP dst, TEMP xtmp1);
20039   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20040   ins_encode %{
20041     int opcode = this->ideal_Opcode();
20042     int vlen = Matcher::vector_length(this, $src);
20043     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20044                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20045   %}
20046   ins_pipe( pipe_slow );
20047 %}
20048 
20049 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20050   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20051             Matcher::vector_length(n->in(2)) >= 4);
20052   match(Set dst (MinReductionV dst src));
20053   match(Set dst (MaxReductionV dst src));
20054   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20055   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20056   ins_encode %{
20057     int opcode = this->ideal_Opcode();
20058     int vlen = Matcher::vector_length(this, $src);
20059     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20060                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20061   %}
20062   ins_pipe( pipe_slow );
20063 %}
20064 
20065 // ====================VECTOR ARITHMETIC=======================================
20066 
20067 // --------------------------------- ADD --------------------------------------
20068 
20069 // Bytes vector add
20070 instruct vaddB(vec dst, vec src) %{
20071   predicate(UseAVX == 0);
20072   match(Set dst (AddVB dst src));
20073   format %{ "paddb   $dst,$src\t! add packedB" %}
20074   ins_encode %{
20075     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20076   %}
20077   ins_pipe( pipe_slow );
20078 %}
20079 
20080 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20081   predicate(UseAVX > 0);
20082   match(Set dst (AddVB src1 src2));
20083   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
20084   ins_encode %{
20085     int vlen_enc = vector_length_encoding(this);
20086     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20087   %}
20088   ins_pipe( pipe_slow );
20089 %}
20090 
20091 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20092   predicate((UseAVX > 0) &&
20093             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20094   match(Set dst (AddVB src (LoadVector mem)));
20095   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
20096   ins_encode %{
20097     int vlen_enc = vector_length_encoding(this);
20098     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20099   %}
20100   ins_pipe( pipe_slow );
20101 %}
20102 
20103 // Shorts/Chars vector add
20104 instruct vaddS(vec dst, vec src) %{
20105   predicate(UseAVX == 0);
20106   match(Set dst (AddVS dst src));
20107   format %{ "paddw   $dst,$src\t! add packedS" %}
20108   ins_encode %{
20109     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20110   %}
20111   ins_pipe( pipe_slow );
20112 %}
20113 
20114 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20115   predicate(UseAVX > 0);
20116   match(Set dst (AddVS src1 src2));
20117   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
20118   ins_encode %{
20119     int vlen_enc = vector_length_encoding(this);
20120     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20121   %}
20122   ins_pipe( pipe_slow );
20123 %}
20124 
20125 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20126   predicate((UseAVX > 0) &&
20127             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20128   match(Set dst (AddVS src (LoadVector mem)));
20129   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
20130   ins_encode %{
20131     int vlen_enc = vector_length_encoding(this);
20132     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20133   %}
20134   ins_pipe( pipe_slow );
20135 %}
20136 
20137 // Integers vector add
20138 instruct vaddI(vec dst, vec src) %{
20139   predicate(UseAVX == 0);
20140   match(Set dst (AddVI dst src));
20141   format %{ "paddd   $dst,$src\t! add packedI" %}
20142   ins_encode %{
20143     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20144   %}
20145   ins_pipe( pipe_slow );
20146 %}
20147 
20148 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20149   predicate(UseAVX > 0);
20150   match(Set dst (AddVI src1 src2));
20151   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
20152   ins_encode %{
20153     int vlen_enc = vector_length_encoding(this);
20154     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20155   %}
20156   ins_pipe( pipe_slow );
20157 %}
20158 
20159 
20160 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20161   predicate((UseAVX > 0) &&
20162             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20163   match(Set dst (AddVI src (LoadVector mem)));
20164   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
20165   ins_encode %{
20166     int vlen_enc = vector_length_encoding(this);
20167     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20168   %}
20169   ins_pipe( pipe_slow );
20170 %}
20171 
20172 // Longs vector add
20173 instruct vaddL(vec dst, vec src) %{
20174   predicate(UseAVX == 0);
20175   match(Set dst (AddVL dst src));
20176   format %{ "paddq   $dst,$src\t! add packedL" %}
20177   ins_encode %{
20178     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20179   %}
20180   ins_pipe( pipe_slow );
20181 %}
20182 
20183 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20184   predicate(UseAVX > 0);
20185   match(Set dst (AddVL src1 src2));
20186   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
20187   ins_encode %{
20188     int vlen_enc = vector_length_encoding(this);
20189     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20190   %}
20191   ins_pipe( pipe_slow );
20192 %}
20193 
20194 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20195   predicate((UseAVX > 0) &&
20196             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20197   match(Set dst (AddVL src (LoadVector mem)));
20198   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
20199   ins_encode %{
20200     int vlen_enc = vector_length_encoding(this);
20201     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20202   %}
20203   ins_pipe( pipe_slow );
20204 %}
20205 
20206 // Floats vector add
20207 instruct vaddF(vec dst, vec src) %{
20208   predicate(UseAVX == 0);
20209   match(Set dst (AddVF dst src));
20210   format %{ "addps   $dst,$src\t! add packedF" %}
20211   ins_encode %{
20212     __ addps($dst$$XMMRegister, $src$$XMMRegister);
20213   %}
20214   ins_pipe( pipe_slow );
20215 %}
20216 
20217 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20218   predicate(UseAVX > 0);
20219   match(Set dst (AddVF src1 src2));
20220   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
20221   ins_encode %{
20222     int vlen_enc = vector_length_encoding(this);
20223     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20224   %}
20225   ins_pipe( pipe_slow );
20226 %}
20227 
20228 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20229   predicate((UseAVX > 0) &&
20230             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20231   match(Set dst (AddVF src (LoadVector mem)));
20232   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
20233   ins_encode %{
20234     int vlen_enc = vector_length_encoding(this);
20235     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20236   %}
20237   ins_pipe( pipe_slow );
20238 %}
20239 
20240 // Doubles vector add
20241 instruct vaddD(vec dst, vec src) %{
20242   predicate(UseAVX == 0);
20243   match(Set dst (AddVD dst src));
20244   format %{ "addpd   $dst,$src\t! add packedD" %}
20245   ins_encode %{
20246     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20247   %}
20248   ins_pipe( pipe_slow );
20249 %}
20250 
20251 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20252   predicate(UseAVX > 0);
20253   match(Set dst (AddVD src1 src2));
20254   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
20255   ins_encode %{
20256     int vlen_enc = vector_length_encoding(this);
20257     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20258   %}
20259   ins_pipe( pipe_slow );
20260 %}
20261 
20262 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20263   predicate((UseAVX > 0) &&
20264             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20265   match(Set dst (AddVD src (LoadVector mem)));
20266   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
20267   ins_encode %{
20268     int vlen_enc = vector_length_encoding(this);
20269     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20270   %}
20271   ins_pipe( pipe_slow );
20272 %}
20273 
20274 // --------------------------------- SUB --------------------------------------
20275 
20276 // Bytes vector sub
20277 instruct vsubB(vec dst, vec src) %{
20278   predicate(UseAVX == 0);
20279   match(Set dst (SubVB dst src));
20280   format %{ "psubb   $dst,$src\t! sub packedB" %}
20281   ins_encode %{
20282     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20283   %}
20284   ins_pipe( pipe_slow );
20285 %}
20286 
20287 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20288   predicate(UseAVX > 0);
20289   match(Set dst (SubVB src1 src2));
20290   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
20291   ins_encode %{
20292     int vlen_enc = vector_length_encoding(this);
20293     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20294   %}
20295   ins_pipe( pipe_slow );
20296 %}
20297 
20298 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20299   predicate((UseAVX > 0) &&
20300             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20301   match(Set dst (SubVB src (LoadVector mem)));
20302   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
20303   ins_encode %{
20304     int vlen_enc = vector_length_encoding(this);
20305     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20306   %}
20307   ins_pipe( pipe_slow );
20308 %}
20309 
20310 // Shorts/Chars vector sub
20311 instruct vsubS(vec dst, vec src) %{
20312   predicate(UseAVX == 0);
20313   match(Set dst (SubVS dst src));
20314   format %{ "psubw   $dst,$src\t! sub packedS" %}
20315   ins_encode %{
20316     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20317   %}
20318   ins_pipe( pipe_slow );
20319 %}
20320 
20321 
20322 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20323   predicate(UseAVX > 0);
20324   match(Set dst (SubVS src1 src2));
20325   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
20326   ins_encode %{
20327     int vlen_enc = vector_length_encoding(this);
20328     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20329   %}
20330   ins_pipe( pipe_slow );
20331 %}
20332 
20333 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20334   predicate((UseAVX > 0) &&
20335             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20336   match(Set dst (SubVS src (LoadVector mem)));
20337   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
20338   ins_encode %{
20339     int vlen_enc = vector_length_encoding(this);
20340     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20341   %}
20342   ins_pipe( pipe_slow );
20343 %}
20344 
20345 // Integers vector sub
20346 instruct vsubI(vec dst, vec src) %{
20347   predicate(UseAVX == 0);
20348   match(Set dst (SubVI dst src));
20349   format %{ "psubd   $dst,$src\t! sub packedI" %}
20350   ins_encode %{
20351     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20352   %}
20353   ins_pipe( pipe_slow );
20354 %}
20355 
20356 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20357   predicate(UseAVX > 0);
20358   match(Set dst (SubVI src1 src2));
20359   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
20360   ins_encode %{
20361     int vlen_enc = vector_length_encoding(this);
20362     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20363   %}
20364   ins_pipe( pipe_slow );
20365 %}
20366 
20367 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20368   predicate((UseAVX > 0) &&
20369             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20370   match(Set dst (SubVI src (LoadVector mem)));
20371   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
20372   ins_encode %{
20373     int vlen_enc = vector_length_encoding(this);
20374     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20375   %}
20376   ins_pipe( pipe_slow );
20377 %}
20378 
20379 // Longs vector sub
20380 instruct vsubL(vec dst, vec src) %{
20381   predicate(UseAVX == 0);
20382   match(Set dst (SubVL dst src));
20383   format %{ "psubq   $dst,$src\t! sub packedL" %}
20384   ins_encode %{
20385     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20386   %}
20387   ins_pipe( pipe_slow );
20388 %}
20389 
20390 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20391   predicate(UseAVX > 0);
20392   match(Set dst (SubVL src1 src2));
20393   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
20394   ins_encode %{
20395     int vlen_enc = vector_length_encoding(this);
20396     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20397   %}
20398   ins_pipe( pipe_slow );
20399 %}
20400 
20401 
20402 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20403   predicate((UseAVX > 0) &&
20404             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20405   match(Set dst (SubVL src (LoadVector mem)));
20406   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
20407   ins_encode %{
20408     int vlen_enc = vector_length_encoding(this);
20409     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20410   %}
20411   ins_pipe( pipe_slow );
20412 %}
20413 
20414 // Floats vector sub
20415 instruct vsubF(vec dst, vec src) %{
20416   predicate(UseAVX == 0);
20417   match(Set dst (SubVF dst src));
20418   format %{ "subps   $dst,$src\t! sub packedF" %}
20419   ins_encode %{
20420     __ subps($dst$$XMMRegister, $src$$XMMRegister);
20421   %}
20422   ins_pipe( pipe_slow );
20423 %}
20424 
20425 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20426   predicate(UseAVX > 0);
20427   match(Set dst (SubVF src1 src2));
20428   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
20429   ins_encode %{
20430     int vlen_enc = vector_length_encoding(this);
20431     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20432   %}
20433   ins_pipe( pipe_slow );
20434 %}
20435 
20436 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20437   predicate((UseAVX > 0) &&
20438             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20439   match(Set dst (SubVF src (LoadVector mem)));
20440   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
20441   ins_encode %{
20442     int vlen_enc = vector_length_encoding(this);
20443     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20444   %}
20445   ins_pipe( pipe_slow );
20446 %}
20447 
20448 // Doubles vector sub
20449 instruct vsubD(vec dst, vec src) %{
20450   predicate(UseAVX == 0);
20451   match(Set dst (SubVD dst src));
20452   format %{ "subpd   $dst,$src\t! sub packedD" %}
20453   ins_encode %{
20454     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20455   %}
20456   ins_pipe( pipe_slow );
20457 %}
20458 
20459 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20460   predicate(UseAVX > 0);
20461   match(Set dst (SubVD src1 src2));
20462   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
20463   ins_encode %{
20464     int vlen_enc = vector_length_encoding(this);
20465     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20466   %}
20467   ins_pipe( pipe_slow );
20468 %}
20469 
20470 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20471   predicate((UseAVX > 0) &&
20472             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20473   match(Set dst (SubVD src (LoadVector mem)));
20474   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
20475   ins_encode %{
20476     int vlen_enc = vector_length_encoding(this);
20477     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20478   %}
20479   ins_pipe( pipe_slow );
20480 %}
20481 
20482 // --------------------------------- MUL --------------------------------------
20483 
20484 // Byte vector mul
20485 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20486   predicate(Matcher::vector_length_in_bytes(n) <= 8);
20487   match(Set dst (MulVB src1 src2));
20488   effect(TEMP dst, TEMP xtmp);
20489   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20490   ins_encode %{
20491     assert(UseSSE > 3, "required");
20492     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20493     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20494     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20495     __ psllw($dst$$XMMRegister, 8);
20496     __ psrlw($dst$$XMMRegister, 8);
20497     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20498   %}
20499   ins_pipe( pipe_slow );
20500 %}
20501 
20502 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20503   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20504   match(Set dst (MulVB src1 src2));
20505   effect(TEMP dst, TEMP xtmp);
20506   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20507   ins_encode %{
20508     assert(UseSSE > 3, "required");
20509     // Odd-index elements
20510     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20511     __ psrlw($dst$$XMMRegister, 8);
20512     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20513     __ psrlw($xtmp$$XMMRegister, 8);
20514     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20515     __ psllw($dst$$XMMRegister, 8);
20516     // Even-index elements
20517     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20518     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20519     __ psllw($xtmp$$XMMRegister, 8);
20520     __ psrlw($xtmp$$XMMRegister, 8);
20521     // Combine
20522     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20523   %}
20524   ins_pipe( pipe_slow );
20525 %}
20526 
20527 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20528   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20529   match(Set dst (MulVB src1 src2));
20530   effect(TEMP xtmp1, TEMP xtmp2);
20531   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20532   ins_encode %{
20533     int vlen_enc = vector_length_encoding(this);
20534     // Odd-index elements
20535     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20536     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20537     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20538     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20539     // Even-index elements
20540     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20541     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20542     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20543     // Combine
20544     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20545   %}
20546   ins_pipe( pipe_slow );
20547 %}
20548 
20549 // Shorts/Chars vector mul
20550 instruct vmulS(vec dst, vec src) %{
20551   predicate(UseAVX == 0);
20552   match(Set dst (MulVS dst src));
20553   format %{ "pmullw  $dst,$src\t! mul packedS" %}
20554   ins_encode %{
20555     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20556   %}
20557   ins_pipe( pipe_slow );
20558 %}
20559 
20560 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20561   predicate(UseAVX > 0);
20562   match(Set dst (MulVS src1 src2));
20563   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20564   ins_encode %{
20565     int vlen_enc = vector_length_encoding(this);
20566     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20567   %}
20568   ins_pipe( pipe_slow );
20569 %}
20570 
20571 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20572   predicate((UseAVX > 0) &&
20573             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20574   match(Set dst (MulVS src (LoadVector mem)));
20575   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20576   ins_encode %{
20577     int vlen_enc = vector_length_encoding(this);
20578     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20579   %}
20580   ins_pipe( pipe_slow );
20581 %}
20582 
20583 // Integers vector mul
20584 instruct vmulI(vec dst, vec src) %{
20585   predicate(UseAVX == 0);
20586   match(Set dst (MulVI dst src));
20587   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20588   ins_encode %{
20589     assert(UseSSE > 3, "required");
20590     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20591   %}
20592   ins_pipe( pipe_slow );
20593 %}
20594 
20595 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20596   predicate(UseAVX > 0);
20597   match(Set dst (MulVI src1 src2));
20598   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20599   ins_encode %{
20600     int vlen_enc = vector_length_encoding(this);
20601     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20602   %}
20603   ins_pipe( pipe_slow );
20604 %}
20605 
20606 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20607   predicate((UseAVX > 0) &&
20608             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20609   match(Set dst (MulVI src (LoadVector mem)));
20610   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20611   ins_encode %{
20612     int vlen_enc = vector_length_encoding(this);
20613     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20614   %}
20615   ins_pipe( pipe_slow );
20616 %}
20617 
20618 // Longs vector mul
20619 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20620   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20621              VM_Version::supports_avx512dq()) ||
20622             VM_Version::supports_avx512vldq());
20623   match(Set dst (MulVL src1 src2));
20624   ins_cost(500);
20625   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20626   ins_encode %{
20627     assert(UseAVX > 2, "required");
20628     int vlen_enc = vector_length_encoding(this);
20629     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20630   %}
20631   ins_pipe( pipe_slow );
20632 %}
20633 
20634 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20635   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20636              VM_Version::supports_avx512dq()) ||
20637             (Matcher::vector_length_in_bytes(n) > 8 &&
20638              VM_Version::supports_avx512vldq()));
20639   match(Set dst (MulVL src (LoadVector mem)));
20640   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20641   ins_cost(500);
20642   ins_encode %{
20643     assert(UseAVX > 2, "required");
20644     int vlen_enc = vector_length_encoding(this);
20645     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20646   %}
20647   ins_pipe( pipe_slow );
20648 %}
20649 
20650 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20651   predicate(UseAVX == 0);
20652   match(Set dst (MulVL src1 src2));
20653   ins_cost(500);
20654   effect(TEMP dst, TEMP xtmp);
20655   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20656   ins_encode %{
20657     assert(VM_Version::supports_sse4_1(), "required");
20658     // Get the lo-hi products, only the lower 32 bits is in concerns
20659     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20660     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20661     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20662     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20663     __ psllq($dst$$XMMRegister, 32);
20664     // Get the lo-lo products
20665     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20666     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20667     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20668   %}
20669   ins_pipe( pipe_slow );
20670 %}
20671 
20672 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20673   predicate(UseAVX > 0 &&
20674             ((Matcher::vector_length_in_bytes(n) == 64 &&
20675               !VM_Version::supports_avx512dq()) ||
20676              (Matcher::vector_length_in_bytes(n) < 64 &&
20677               !VM_Version::supports_avx512vldq())));
20678   match(Set dst (MulVL src1 src2));
20679   effect(TEMP xtmp1, TEMP xtmp2);
20680   ins_cost(500);
20681   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20682   ins_encode %{
20683     int vlen_enc = vector_length_encoding(this);
20684     // Get the lo-hi products, only the lower 32 bits is in concerns
20685     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20686     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20687     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20688     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20689     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20690     // Get the lo-lo products
20691     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20692     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20693   %}
20694   ins_pipe( pipe_slow );
20695 %}
20696 
20697 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20698   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20699   match(Set dst (MulVL src1 src2));
20700   ins_cost(100);
20701   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20702   ins_encode %{
20703     int vlen_enc = vector_length_encoding(this);
20704     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20705   %}
20706   ins_pipe( pipe_slow );
20707 %}
20708 
20709 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20710   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20711   match(Set dst (MulVL src1 src2));
20712   ins_cost(100);
20713   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20714   ins_encode %{
20715     int vlen_enc = vector_length_encoding(this);
20716     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20717   %}
20718   ins_pipe( pipe_slow );
20719 %}
20720 
20721 // Floats vector mul
20722 instruct vmulF(vec dst, vec src) %{
20723   predicate(UseAVX == 0);
20724   match(Set dst (MulVF dst src));
20725   format %{ "mulps   $dst,$src\t! mul packedF" %}
20726   ins_encode %{
20727     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20728   %}
20729   ins_pipe( pipe_slow );
20730 %}
20731 
20732 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20733   predicate(UseAVX > 0);
20734   match(Set dst (MulVF src1 src2));
20735   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
20736   ins_encode %{
20737     int vlen_enc = vector_length_encoding(this);
20738     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20739   %}
20740   ins_pipe( pipe_slow );
20741 %}
20742 
20743 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20744   predicate((UseAVX > 0) &&
20745             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20746   match(Set dst (MulVF src (LoadVector mem)));
20747   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
20748   ins_encode %{
20749     int vlen_enc = vector_length_encoding(this);
20750     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20751   %}
20752   ins_pipe( pipe_slow );
20753 %}
20754 
20755 // Doubles vector mul
20756 instruct vmulD(vec dst, vec src) %{
20757   predicate(UseAVX == 0);
20758   match(Set dst (MulVD dst src));
20759   format %{ "mulpd   $dst,$src\t! mul packedD" %}
20760   ins_encode %{
20761     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20762   %}
20763   ins_pipe( pipe_slow );
20764 %}
20765 
20766 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20767   predicate(UseAVX > 0);
20768   match(Set dst (MulVD src1 src2));
20769   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
20770   ins_encode %{
20771     int vlen_enc = vector_length_encoding(this);
20772     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20773   %}
20774   ins_pipe( pipe_slow );
20775 %}
20776 
20777 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20778   predicate((UseAVX > 0) &&
20779             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20780   match(Set dst (MulVD src (LoadVector mem)));
20781   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
20782   ins_encode %{
20783     int vlen_enc = vector_length_encoding(this);
20784     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20785   %}
20786   ins_pipe( pipe_slow );
20787 %}
20788 
20789 // --------------------------------- DIV --------------------------------------
20790 
20791 // Floats vector div
20792 instruct vdivF(vec dst, vec src) %{
20793   predicate(UseAVX == 0);
20794   match(Set dst (DivVF dst src));
20795   format %{ "divps   $dst,$src\t! div packedF" %}
20796   ins_encode %{
20797     __ divps($dst$$XMMRegister, $src$$XMMRegister);
20798   %}
20799   ins_pipe( pipe_slow );
20800 %}
20801 
20802 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20803   predicate(UseAVX > 0);
20804   match(Set dst (DivVF src1 src2));
20805   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
20806   ins_encode %{
20807     int vlen_enc = vector_length_encoding(this);
20808     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20809   %}
20810   ins_pipe( pipe_slow );
20811 %}
20812 
20813 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20814   predicate((UseAVX > 0) &&
20815             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20816   match(Set dst (DivVF src (LoadVector mem)));
20817   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
20818   ins_encode %{
20819     int vlen_enc = vector_length_encoding(this);
20820     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20821   %}
20822   ins_pipe( pipe_slow );
20823 %}
20824 
20825 // Doubles vector div
20826 instruct vdivD(vec dst, vec src) %{
20827   predicate(UseAVX == 0);
20828   match(Set dst (DivVD dst src));
20829   format %{ "divpd   $dst,$src\t! div packedD" %}
20830   ins_encode %{
20831     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20832   %}
20833   ins_pipe( pipe_slow );
20834 %}
20835 
20836 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20837   predicate(UseAVX > 0);
20838   match(Set dst (DivVD src1 src2));
20839   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
20840   ins_encode %{
20841     int vlen_enc = vector_length_encoding(this);
20842     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20843   %}
20844   ins_pipe( pipe_slow );
20845 %}
20846 
20847 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20848   predicate((UseAVX > 0) &&
20849             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20850   match(Set dst (DivVD src (LoadVector mem)));
20851   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
20852   ins_encode %{
20853     int vlen_enc = vector_length_encoding(this);
20854     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20855   %}
20856   ins_pipe( pipe_slow );
20857 %}
20858 
20859 // ------------------------------ MinMax ---------------------------------------
20860 
20861 // Byte, Short, Int vector Min/Max
20862 instruct minmax_reg_sse(vec dst, vec src) %{
20863   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20864             UseAVX == 0);
20865   match(Set dst (MinV dst src));
20866   match(Set dst (MaxV dst src));
20867   format %{ "vector_minmax  $dst,$src\t!  " %}
20868   ins_encode %{
20869     assert(UseSSE >= 4, "required");
20870 
20871     int opcode = this->ideal_Opcode();
20872     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20873     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20874   %}
20875   ins_pipe( pipe_slow );
20876 %}
20877 
20878 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20879   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20880             UseAVX > 0);
20881   match(Set dst (MinV src1 src2));
20882   match(Set dst (MaxV src1 src2));
20883   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
20884   ins_encode %{
20885     int opcode = this->ideal_Opcode();
20886     int vlen_enc = vector_length_encoding(this);
20887     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20888 
20889     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20890   %}
20891   ins_pipe( pipe_slow );
20892 %}
20893 
20894 // Long vector Min/Max
20895 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20896   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20897             UseAVX == 0);
20898   match(Set dst (MinV dst src));
20899   match(Set dst (MaxV src dst));
20900   effect(TEMP dst, TEMP tmp);
20901   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
20902   ins_encode %{
20903     assert(UseSSE >= 4, "required");
20904 
20905     int opcode = this->ideal_Opcode();
20906     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20907     assert(elem_bt == T_LONG, "sanity");
20908 
20909     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20910   %}
20911   ins_pipe( pipe_slow );
20912 %}
20913 
20914 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20915   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20916             UseAVX > 0 && !VM_Version::supports_avx512vl());
20917   match(Set dst (MinV src1 src2));
20918   match(Set dst (MaxV src1 src2));
20919   effect(TEMP dst);
20920   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
20921   ins_encode %{
20922     int vlen_enc = vector_length_encoding(this);
20923     int opcode = this->ideal_Opcode();
20924     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20925     assert(elem_bt == T_LONG, "sanity");
20926 
20927     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20928   %}
20929   ins_pipe( pipe_slow );
20930 %}
20931 
20932 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20933   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20934             Matcher::vector_element_basic_type(n) == T_LONG);
20935   match(Set dst (MinV src1 src2));
20936   match(Set dst (MaxV src1 src2));
20937   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
20938   ins_encode %{
20939     assert(UseAVX > 2, "required");
20940 
20941     int vlen_enc = vector_length_encoding(this);
20942     int opcode = this->ideal_Opcode();
20943     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20944     assert(elem_bt == T_LONG, "sanity");
20945 
20946     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20947   %}
20948   ins_pipe( pipe_slow );
20949 %}
20950 
20951 // Float/Double vector Min/Max
20952 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20953   predicate(VM_Version::supports_avx10_2() &&
20954             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20955   match(Set dst (MinV a b));
20956   match(Set dst (MaxV a b));
20957   format %{ "vector_minmaxFP  $dst, $a, $b" %}
20958   ins_encode %{
20959     int vlen_enc = vector_length_encoding(this);
20960     int opcode = this->ideal_Opcode();
20961     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20962     __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20963   %}
20964   ins_pipe( pipe_slow );
20965 %}
20966 
20967 // Float/Double vector Min/Max
20968 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20969   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20970             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20971             UseAVX > 0);
20972   match(Set dst (MinV a b));
20973   match(Set dst (MaxV a b));
20974   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20975   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20976   ins_encode %{
20977     assert(UseAVX > 0, "required");
20978 
20979     int opcode = this->ideal_Opcode();
20980     int vlen_enc = vector_length_encoding(this);
20981     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20982 
20983     __ vminmax_fp(opcode, elem_bt,
20984                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20985                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20986   %}
20987   ins_pipe( pipe_slow );
20988 %}
20989 
20990 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20991   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20992             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20993   match(Set dst (MinV a b));
20994   match(Set dst (MaxV a b));
20995   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20996   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20997   ins_encode %{
20998     assert(UseAVX > 2, "required");
20999 
21000     int opcode = this->ideal_Opcode();
21001     int vlen_enc = vector_length_encoding(this);
21002     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21003 
21004     __ evminmax_fp(opcode, elem_bt,
21005                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21006                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21007   %}
21008   ins_pipe( pipe_slow );
21009 %}
21010 
21011 // ------------------------------ Unsigned vector Min/Max ----------------------
21012 
21013 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21014   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21015   match(Set dst (UMinV a b));
21016   match(Set dst (UMaxV a b));
21017   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21018   ins_encode %{
21019     int opcode = this->ideal_Opcode();
21020     int vlen_enc = vector_length_encoding(this);
21021     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21022     assert(is_integral_type(elem_bt), "");
21023     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21024   %}
21025   ins_pipe( pipe_slow );
21026 %}
21027 
21028 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21029   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21030   match(Set dst (UMinV a (LoadVector b)));
21031   match(Set dst (UMaxV a (LoadVector b)));
21032   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21033   ins_encode %{
21034     int opcode = this->ideal_Opcode();
21035     int vlen_enc = vector_length_encoding(this);
21036     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21037     assert(is_integral_type(elem_bt), "");
21038     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21039   %}
21040   ins_pipe( pipe_slow );
21041 %}
21042 
21043 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21044   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21045   match(Set dst (UMinV a b));
21046   match(Set dst (UMaxV a b));
21047   effect(TEMP xtmp1, TEMP xtmp2);
21048   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21049   ins_encode %{
21050     int opcode = this->ideal_Opcode();
21051     int vlen_enc = vector_length_encoding(this);
21052     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21053   %}
21054   ins_pipe( pipe_slow );
21055 %}
21056 
21057 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21058   match(Set dst (UMinV (Binary dst src2) mask));
21059   match(Set dst (UMaxV (Binary dst src2) mask));
21060   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21061   ins_encode %{
21062     int vlen_enc = vector_length_encoding(this);
21063     BasicType bt = Matcher::vector_element_basic_type(this);
21064     int opc = this->ideal_Opcode();
21065     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21066                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21067   %}
21068   ins_pipe( pipe_slow );
21069 %}
21070 
21071 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21072   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21073   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21074   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21075   ins_encode %{
21076     int vlen_enc = vector_length_encoding(this);
21077     BasicType bt = Matcher::vector_element_basic_type(this);
21078     int opc = this->ideal_Opcode();
21079     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21080                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21081   %}
21082   ins_pipe( pipe_slow );
21083 %}
21084 
21085 // --------------------------------- Signum/CopySign ---------------------------
21086 
21087 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21088   match(Set dst (SignumF dst (Binary zero one)));
21089   effect(KILL cr);
21090   format %{ "signumF $dst, $dst" %}
21091   ins_encode %{
21092     int opcode = this->ideal_Opcode();
21093     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21094   %}
21095   ins_pipe( pipe_slow );
21096 %}
21097 
21098 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21099   match(Set dst (SignumD dst (Binary zero one)));
21100   effect(KILL cr);
21101   format %{ "signumD $dst, $dst" %}
21102   ins_encode %{
21103     int opcode = this->ideal_Opcode();
21104     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21105   %}
21106   ins_pipe( pipe_slow );
21107 %}
21108 
21109 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21110   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21111   match(Set dst (SignumVF src (Binary zero one)));
21112   match(Set dst (SignumVD src (Binary zero one)));
21113   effect(TEMP dst, TEMP xtmp1);
21114   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21115   ins_encode %{
21116     int opcode = this->ideal_Opcode();
21117     int vec_enc = vector_length_encoding(this);
21118     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21119                          $xtmp1$$XMMRegister, vec_enc);
21120   %}
21121   ins_pipe( pipe_slow );
21122 %}
21123 
21124 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21125   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21126   match(Set dst (SignumVF src (Binary zero one)));
21127   match(Set dst (SignumVD src (Binary zero one)));
21128   effect(TEMP dst, TEMP ktmp1);
21129   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21130   ins_encode %{
21131     int opcode = this->ideal_Opcode();
21132     int vec_enc = vector_length_encoding(this);
21133     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21134                           $ktmp1$$KRegister, vec_enc);
21135   %}
21136   ins_pipe( pipe_slow );
21137 %}
21138 
21139 // ---------------------------------------
21140 // For copySign use 0xE4 as writemask for vpternlog
21141 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21142 // C (xmm2) is set to 0x7FFFFFFF
21143 // Wherever xmm2 is 0, we want to pick from B (sign)
21144 // Wherever xmm2 is 1, we want to pick from A (src)
21145 //
21146 // A B C Result
21147 // 0 0 0 0
21148 // 0 0 1 0
21149 // 0 1 0 1
21150 // 0 1 1 0
21151 // 1 0 0 0
21152 // 1 0 1 1
21153 // 1 1 0 1
21154 // 1 1 1 1
21155 //
21156 // Result going from high bit to low bit is 0x11100100 = 0xe4
21157 // ---------------------------------------
21158 
21159 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21160   match(Set dst (CopySignF dst src));
21161   effect(TEMP tmp1, TEMP tmp2);
21162   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21163   ins_encode %{
21164     __ movl($tmp2$$Register, 0x7FFFFFFF);
21165     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21166     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21167   %}
21168   ins_pipe( pipe_slow );
21169 %}
21170 
21171 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21172   match(Set dst (CopySignD dst (Binary src zero)));
21173   ins_cost(100);
21174   effect(TEMP tmp1, TEMP tmp2);
21175   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21176   ins_encode %{
21177     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21178     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21179     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21180   %}
21181   ins_pipe( pipe_slow );
21182 %}
21183 
21184 //----------------------------- CompressBits/ExpandBits ------------------------
21185 
21186 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21187   predicate(n->bottom_type()->isa_int());
21188   match(Set dst (CompressBits src mask));
21189   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21190   ins_encode %{
21191     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21192   %}
21193   ins_pipe( pipe_slow );
21194 %}
21195 
21196 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21197   predicate(n->bottom_type()->isa_int());
21198   match(Set dst (ExpandBits src mask));
21199   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21200   ins_encode %{
21201     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21202   %}
21203   ins_pipe( pipe_slow );
21204 %}
21205 
21206 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21207   predicate(n->bottom_type()->isa_int());
21208   match(Set dst (CompressBits src (LoadI mask)));
21209   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21210   ins_encode %{
21211     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21212   %}
21213   ins_pipe( pipe_slow );
21214 %}
21215 
21216 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21217   predicate(n->bottom_type()->isa_int());
21218   match(Set dst (ExpandBits src (LoadI mask)));
21219   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21220   ins_encode %{
21221     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21222   %}
21223   ins_pipe( pipe_slow );
21224 %}
21225 
21226 // --------------------------------- Sqrt --------------------------------------
21227 
21228 instruct vsqrtF_reg(vec dst, vec src) %{
21229   match(Set dst (SqrtVF src));
21230   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
21231   ins_encode %{
21232     assert(UseAVX > 0, "required");
21233     int vlen_enc = vector_length_encoding(this);
21234     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21235   %}
21236   ins_pipe( pipe_slow );
21237 %}
21238 
21239 instruct vsqrtF_mem(vec dst, memory mem) %{
21240   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21241   match(Set dst (SqrtVF (LoadVector mem)));
21242   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
21243   ins_encode %{
21244     assert(UseAVX > 0, "required");
21245     int vlen_enc = vector_length_encoding(this);
21246     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21247   %}
21248   ins_pipe( pipe_slow );
21249 %}
21250 
21251 // Floating point vector sqrt
21252 instruct vsqrtD_reg(vec dst, vec src) %{
21253   match(Set dst (SqrtVD src));
21254   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
21255   ins_encode %{
21256     assert(UseAVX > 0, "required");
21257     int vlen_enc = vector_length_encoding(this);
21258     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21259   %}
21260   ins_pipe( pipe_slow );
21261 %}
21262 
21263 instruct vsqrtD_mem(vec dst, memory mem) %{
21264   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21265   match(Set dst (SqrtVD (LoadVector mem)));
21266   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
21267   ins_encode %{
21268     assert(UseAVX > 0, "required");
21269     int vlen_enc = vector_length_encoding(this);
21270     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21271   %}
21272   ins_pipe( pipe_slow );
21273 %}
21274 
21275 // ------------------------------ Shift ---------------------------------------
21276 
21277 // Left and right shift count vectors are the same on x86
21278 // (only lowest bits of xmm reg are used for count).
21279 instruct vshiftcnt(vec dst, rRegI cnt) %{
21280   match(Set dst (LShiftCntV cnt));
21281   match(Set dst (RShiftCntV cnt));
21282   format %{ "movdl    $dst,$cnt\t! load shift count" %}
21283   ins_encode %{
21284     __ movdl($dst$$XMMRegister, $cnt$$Register);
21285   %}
21286   ins_pipe( pipe_slow );
21287 %}
21288 
21289 // Byte vector shift
21290 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21291   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21292   match(Set dst ( LShiftVB src shift));
21293   match(Set dst ( RShiftVB src shift));
21294   match(Set dst (URShiftVB src shift));
21295   effect(TEMP dst, USE src, USE shift, TEMP tmp);
21296   format %{"vector_byte_shift $dst,$src,$shift" %}
21297   ins_encode %{
21298     assert(UseSSE > 3, "required");
21299     int opcode = this->ideal_Opcode();
21300     bool sign = (opcode != Op_URShiftVB);
21301     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21302     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21303     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21304     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21305     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21306   %}
21307   ins_pipe( pipe_slow );
21308 %}
21309 
21310 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21311   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21312             UseAVX <= 1);
21313   match(Set dst ( LShiftVB src shift));
21314   match(Set dst ( RShiftVB src shift));
21315   match(Set dst (URShiftVB src shift));
21316   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21317   format %{"vector_byte_shift $dst,$src,$shift" %}
21318   ins_encode %{
21319     assert(UseSSE > 3, "required");
21320     int opcode = this->ideal_Opcode();
21321     bool sign = (opcode != Op_URShiftVB);
21322     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21323     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21324     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21325     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21326     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21327     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21328     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21329     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21330     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21331   %}
21332   ins_pipe( pipe_slow );
21333 %}
21334 
21335 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21336   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21337             UseAVX > 1);
21338   match(Set dst ( LShiftVB src shift));
21339   match(Set dst ( RShiftVB src shift));
21340   match(Set dst (URShiftVB src shift));
21341   effect(TEMP dst, TEMP tmp);
21342   format %{"vector_byte_shift $dst,$src,$shift" %}
21343   ins_encode %{
21344     int opcode = this->ideal_Opcode();
21345     bool sign = (opcode != Op_URShiftVB);
21346     int vlen_enc = Assembler::AVX_256bit;
21347     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21348     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21349     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21350     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21351     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21352   %}
21353   ins_pipe( pipe_slow );
21354 %}
21355 
21356 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21357   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21358   match(Set dst ( LShiftVB src shift));
21359   match(Set dst ( RShiftVB src shift));
21360   match(Set dst (URShiftVB src shift));
21361   effect(TEMP dst, TEMP tmp);
21362   format %{"vector_byte_shift $dst,$src,$shift" %}
21363   ins_encode %{
21364     assert(UseAVX > 1, "required");
21365     int opcode = this->ideal_Opcode();
21366     bool sign = (opcode != Op_URShiftVB);
21367     int vlen_enc = Assembler::AVX_256bit;
21368     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21369     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21370     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21371     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21372     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21373     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21374     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21375     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21376     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21377   %}
21378   ins_pipe( pipe_slow );
21379 %}
21380 
21381 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21382   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21383   match(Set dst ( LShiftVB src shift));
21384   match(Set dst  (RShiftVB src shift));
21385   match(Set dst (URShiftVB src shift));
21386   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21387   format %{"vector_byte_shift $dst,$src,$shift" %}
21388   ins_encode %{
21389     assert(UseAVX > 2, "required");
21390     int opcode = this->ideal_Opcode();
21391     bool sign = (opcode != Op_URShiftVB);
21392     int vlen_enc = Assembler::AVX_512bit;
21393     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21394     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21395     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21396     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21397     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21398     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21399     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21400     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21401     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21402     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21403     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21404     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21405   %}
21406   ins_pipe( pipe_slow );
21407 %}
21408 
21409 // Shorts vector logical right shift produces incorrect Java result
21410 // for negative data because java code convert short value into int with
21411 // sign extension before a shift. But char vectors are fine since chars are
21412 // unsigned values.
21413 // Shorts/Chars vector left shift
21414 instruct vshiftS(vec dst, vec src, vec shift) %{
21415   predicate(!n->as_ShiftV()->is_var_shift());
21416   match(Set dst ( LShiftVS src shift));
21417   match(Set dst ( RShiftVS src shift));
21418   match(Set dst (URShiftVS src shift));
21419   effect(TEMP dst, USE src, USE shift);
21420   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
21421   ins_encode %{
21422     int opcode = this->ideal_Opcode();
21423     if (UseAVX > 0) {
21424       int vlen_enc = vector_length_encoding(this);
21425       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21426     } else {
21427       int vlen = Matcher::vector_length(this);
21428       if (vlen == 2) {
21429         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21430         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21431       } else if (vlen == 4) {
21432         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21433         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21434       } else {
21435         assert (vlen == 8, "sanity");
21436         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21437         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21438       }
21439     }
21440   %}
21441   ins_pipe( pipe_slow );
21442 %}
21443 
21444 // Integers vector left shift
21445 instruct vshiftI(vec dst, vec src, vec shift) %{
21446   predicate(!n->as_ShiftV()->is_var_shift());
21447   match(Set dst ( LShiftVI src shift));
21448   match(Set dst ( RShiftVI src shift));
21449   match(Set dst (URShiftVI src shift));
21450   effect(TEMP dst, USE src, USE shift);
21451   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
21452   ins_encode %{
21453     int opcode = this->ideal_Opcode();
21454     if (UseAVX > 0) {
21455       int vlen_enc = vector_length_encoding(this);
21456       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21457     } else {
21458       int vlen = Matcher::vector_length(this);
21459       if (vlen == 2) {
21460         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21461         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21462       } else {
21463         assert(vlen == 4, "sanity");
21464         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21465         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21466       }
21467     }
21468   %}
21469   ins_pipe( pipe_slow );
21470 %}
21471 
21472 // Integers vector left constant shift
21473 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21474   match(Set dst (LShiftVI src (LShiftCntV shift)));
21475   match(Set dst (RShiftVI src (RShiftCntV shift)));
21476   match(Set dst (URShiftVI src (RShiftCntV shift)));
21477   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
21478   ins_encode %{
21479     int opcode = this->ideal_Opcode();
21480     if (UseAVX > 0) {
21481       int vector_len = vector_length_encoding(this);
21482       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21483     } else {
21484       int vlen = Matcher::vector_length(this);
21485       if (vlen == 2) {
21486         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21487         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21488       } else {
21489         assert(vlen == 4, "sanity");
21490         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21491         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21492       }
21493     }
21494   %}
21495   ins_pipe( pipe_slow );
21496 %}
21497 
21498 // Longs vector shift
21499 instruct vshiftL(vec dst, vec src, vec shift) %{
21500   predicate(!n->as_ShiftV()->is_var_shift());
21501   match(Set dst ( LShiftVL src shift));
21502   match(Set dst (URShiftVL src shift));
21503   effect(TEMP dst, USE src, USE shift);
21504   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
21505   ins_encode %{
21506     int opcode = this->ideal_Opcode();
21507     if (UseAVX > 0) {
21508       int vlen_enc = vector_length_encoding(this);
21509       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21510     } else {
21511       assert(Matcher::vector_length(this) == 2, "");
21512       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21513       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21514     }
21515   %}
21516   ins_pipe( pipe_slow );
21517 %}
21518 
21519 // Longs vector constant shift
21520 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21521   match(Set dst (LShiftVL src (LShiftCntV shift)));
21522   match(Set dst (URShiftVL src (RShiftCntV shift)));
21523   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
21524   ins_encode %{
21525     int opcode = this->ideal_Opcode();
21526     if (UseAVX > 0) {
21527       int vector_len = vector_length_encoding(this);
21528       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21529     } else {
21530       assert(Matcher::vector_length(this) == 2, "");
21531       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21532       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21533     }
21534   %}
21535   ins_pipe( pipe_slow );
21536 %}
21537 
21538 // -------------------ArithmeticRightShift -----------------------------------
21539 // Long vector arithmetic right shift
21540 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21541   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21542   match(Set dst (RShiftVL src shift));
21543   effect(TEMP dst, TEMP tmp);
21544   format %{ "vshiftq $dst,$src,$shift" %}
21545   ins_encode %{
21546     uint vlen = Matcher::vector_length(this);
21547     if (vlen == 2) {
21548       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21549       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21550       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21551       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21552       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21553       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21554     } else {
21555       assert(vlen == 4, "sanity");
21556       assert(UseAVX > 1, "required");
21557       int vlen_enc = Assembler::AVX_256bit;
21558       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21559       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21560       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21561       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21562       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21563     }
21564   %}
21565   ins_pipe( pipe_slow );
21566 %}
21567 
21568 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21569   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21570   match(Set dst (RShiftVL src shift));
21571   format %{ "vshiftq $dst,$src,$shift" %}
21572   ins_encode %{
21573     int vlen_enc = vector_length_encoding(this);
21574     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21575   %}
21576   ins_pipe( pipe_slow );
21577 %}
21578 
21579 // ------------------- Variable Shift -----------------------------
21580 // Byte variable shift
21581 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21582   predicate(Matcher::vector_length(n) <= 8 &&
21583             n->as_ShiftV()->is_var_shift() &&
21584             !VM_Version::supports_avx512bw());
21585   match(Set dst ( LShiftVB src shift));
21586   match(Set dst ( RShiftVB src shift));
21587   match(Set dst (URShiftVB src shift));
21588   effect(TEMP dst, TEMP vtmp);
21589   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21590   ins_encode %{
21591     assert(UseAVX >= 2, "required");
21592 
21593     int opcode = this->ideal_Opcode();
21594     int vlen_enc = Assembler::AVX_128bit;
21595     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21596     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21597   %}
21598   ins_pipe( pipe_slow );
21599 %}
21600 
21601 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21602   predicate(Matcher::vector_length(n) == 16 &&
21603             n->as_ShiftV()->is_var_shift() &&
21604             !VM_Version::supports_avx512bw());
21605   match(Set dst ( LShiftVB src shift));
21606   match(Set dst ( RShiftVB src shift));
21607   match(Set dst (URShiftVB src shift));
21608   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21609   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21610   ins_encode %{
21611     assert(UseAVX >= 2, "required");
21612 
21613     int opcode = this->ideal_Opcode();
21614     int vlen_enc = Assembler::AVX_128bit;
21615     // Shift lower half and get word result in dst
21616     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21617 
21618     // Shift upper half and get word result in vtmp1
21619     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21620     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21621     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21622 
21623     // Merge and down convert the two word results to byte in dst
21624     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21625   %}
21626   ins_pipe( pipe_slow );
21627 %}
21628 
21629 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21630   predicate(Matcher::vector_length(n) == 32 &&
21631             n->as_ShiftV()->is_var_shift() &&
21632             !VM_Version::supports_avx512bw());
21633   match(Set dst ( LShiftVB src shift));
21634   match(Set dst ( RShiftVB src shift));
21635   match(Set dst (URShiftVB src shift));
21636   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21637   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21638   ins_encode %{
21639     assert(UseAVX >= 2, "required");
21640 
21641     int opcode = this->ideal_Opcode();
21642     int vlen_enc = Assembler::AVX_128bit;
21643     // Process lower 128 bits and get result in dst
21644     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21645     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21646     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21647     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21648     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21649 
21650     // Process higher 128 bits and get result in vtmp3
21651     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21652     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21653     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21654     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21655     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21656     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21657     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21658 
21659     // Merge the two results in dst
21660     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21661   %}
21662   ins_pipe( pipe_slow );
21663 %}
21664 
21665 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21666   predicate(Matcher::vector_length(n) <= 32 &&
21667             n->as_ShiftV()->is_var_shift() &&
21668             VM_Version::supports_avx512bw());
21669   match(Set dst ( LShiftVB src shift));
21670   match(Set dst ( RShiftVB src shift));
21671   match(Set dst (URShiftVB src shift));
21672   effect(TEMP dst, TEMP vtmp);
21673   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21674   ins_encode %{
21675     assert(UseAVX > 2, "required");
21676 
21677     int opcode = this->ideal_Opcode();
21678     int vlen_enc = vector_length_encoding(this);
21679     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21680   %}
21681   ins_pipe( pipe_slow );
21682 %}
21683 
21684 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21685   predicate(Matcher::vector_length(n) == 64 &&
21686             n->as_ShiftV()->is_var_shift() &&
21687             VM_Version::supports_avx512bw());
21688   match(Set dst ( LShiftVB src shift));
21689   match(Set dst ( RShiftVB src shift));
21690   match(Set dst (URShiftVB src shift));
21691   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21692   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21693   ins_encode %{
21694     assert(UseAVX > 2, "required");
21695 
21696     int opcode = this->ideal_Opcode();
21697     int vlen_enc = Assembler::AVX_256bit;
21698     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21699     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21700     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21701     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21702     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21703   %}
21704   ins_pipe( pipe_slow );
21705 %}
21706 
21707 // Short variable shift
21708 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21709   predicate(Matcher::vector_length(n) <= 8 &&
21710             n->as_ShiftV()->is_var_shift() &&
21711             !VM_Version::supports_avx512bw());
21712   match(Set dst ( LShiftVS src shift));
21713   match(Set dst ( RShiftVS src shift));
21714   match(Set dst (URShiftVS src shift));
21715   effect(TEMP dst, TEMP vtmp);
21716   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21717   ins_encode %{
21718     assert(UseAVX >= 2, "required");
21719 
21720     int opcode = this->ideal_Opcode();
21721     bool sign = (opcode != Op_URShiftVS);
21722     int vlen_enc = Assembler::AVX_256bit;
21723     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21724     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21725     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21726     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21727     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21728     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21729   %}
21730   ins_pipe( pipe_slow );
21731 %}
21732 
21733 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21734   predicate(Matcher::vector_length(n) == 16 &&
21735             n->as_ShiftV()->is_var_shift() &&
21736             !VM_Version::supports_avx512bw());
21737   match(Set dst ( LShiftVS src shift));
21738   match(Set dst ( RShiftVS src shift));
21739   match(Set dst (URShiftVS src shift));
21740   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21741   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21742   ins_encode %{
21743     assert(UseAVX >= 2, "required");
21744 
21745     int opcode = this->ideal_Opcode();
21746     bool sign = (opcode != Op_URShiftVS);
21747     int vlen_enc = Assembler::AVX_256bit;
21748     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21749     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21750     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21751     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21752     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21753 
21754     // Shift upper half, with result in dst using vtmp1 as TEMP
21755     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21756     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21757     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21758     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21759     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21760     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21761 
21762     // Merge lower and upper half result into dst
21763     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21764     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21765   %}
21766   ins_pipe( pipe_slow );
21767 %}
21768 
21769 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21770   predicate(n->as_ShiftV()->is_var_shift() &&
21771             VM_Version::supports_avx512bw());
21772   match(Set dst ( LShiftVS src shift));
21773   match(Set dst ( RShiftVS src shift));
21774   match(Set dst (URShiftVS src shift));
21775   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21776   ins_encode %{
21777     assert(UseAVX > 2, "required");
21778 
21779     int opcode = this->ideal_Opcode();
21780     int vlen_enc = vector_length_encoding(this);
21781     if (!VM_Version::supports_avx512vl()) {
21782       vlen_enc = Assembler::AVX_512bit;
21783     }
21784     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21785   %}
21786   ins_pipe( pipe_slow );
21787 %}
21788 
21789 //Integer variable shift
21790 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21791   predicate(n->as_ShiftV()->is_var_shift());
21792   match(Set dst ( LShiftVI src shift));
21793   match(Set dst ( RShiftVI src shift));
21794   match(Set dst (URShiftVI src shift));
21795   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21796   ins_encode %{
21797     assert(UseAVX >= 2, "required");
21798 
21799     int opcode = this->ideal_Opcode();
21800     int vlen_enc = vector_length_encoding(this);
21801     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21802   %}
21803   ins_pipe( pipe_slow );
21804 %}
21805 
21806 //Long variable shift
21807 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21808   predicate(n->as_ShiftV()->is_var_shift());
21809   match(Set dst ( LShiftVL src shift));
21810   match(Set dst (URShiftVL src shift));
21811   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21812   ins_encode %{
21813     assert(UseAVX >= 2, "required");
21814 
21815     int opcode = this->ideal_Opcode();
21816     int vlen_enc = vector_length_encoding(this);
21817     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21818   %}
21819   ins_pipe( pipe_slow );
21820 %}
21821 
21822 //Long variable right shift arithmetic
21823 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21824   predicate(Matcher::vector_length(n) <= 4 &&
21825             n->as_ShiftV()->is_var_shift() &&
21826             UseAVX == 2);
21827   match(Set dst (RShiftVL src shift));
21828   effect(TEMP dst, TEMP vtmp);
21829   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21830   ins_encode %{
21831     int opcode = this->ideal_Opcode();
21832     int vlen_enc = vector_length_encoding(this);
21833     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21834                  $vtmp$$XMMRegister);
21835   %}
21836   ins_pipe( pipe_slow );
21837 %}
21838 
21839 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21840   predicate(n->as_ShiftV()->is_var_shift() &&
21841             UseAVX > 2);
21842   match(Set dst (RShiftVL src shift));
21843   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21844   ins_encode %{
21845     int opcode = this->ideal_Opcode();
21846     int vlen_enc = vector_length_encoding(this);
21847     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21848   %}
21849   ins_pipe( pipe_slow );
21850 %}
21851 
21852 // --------------------------------- AND --------------------------------------
21853 
21854 instruct vand(vec dst, vec src) %{
21855   predicate(UseAVX == 0);
21856   match(Set dst (AndV dst src));
21857   format %{ "pand    $dst,$src\t! and vectors" %}
21858   ins_encode %{
21859     __ pand($dst$$XMMRegister, $src$$XMMRegister);
21860   %}
21861   ins_pipe( pipe_slow );
21862 %}
21863 
21864 instruct vand_reg(vec dst, vec src1, vec src2) %{
21865   predicate(UseAVX > 0);
21866   match(Set dst (AndV src1 src2));
21867   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
21868   ins_encode %{
21869     int vlen_enc = vector_length_encoding(this);
21870     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21871   %}
21872   ins_pipe( pipe_slow );
21873 %}
21874 
21875 instruct vand_mem(vec dst, vec src, memory mem) %{
21876   predicate((UseAVX > 0) &&
21877             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21878   match(Set dst (AndV src (LoadVector mem)));
21879   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
21880   ins_encode %{
21881     int vlen_enc = vector_length_encoding(this);
21882     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21883   %}
21884   ins_pipe( pipe_slow );
21885 %}
21886 
21887 // --------------------------------- OR ---------------------------------------
21888 
21889 instruct vor(vec dst, vec src) %{
21890   predicate(UseAVX == 0);
21891   match(Set dst (OrV dst src));
21892   format %{ "por     $dst,$src\t! or vectors" %}
21893   ins_encode %{
21894     __ por($dst$$XMMRegister, $src$$XMMRegister);
21895   %}
21896   ins_pipe( pipe_slow );
21897 %}
21898 
21899 instruct vor_reg(vec dst, vec src1, vec src2) %{
21900   predicate(UseAVX > 0);
21901   match(Set dst (OrV src1 src2));
21902   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
21903   ins_encode %{
21904     int vlen_enc = vector_length_encoding(this);
21905     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21906   %}
21907   ins_pipe( pipe_slow );
21908 %}
21909 
21910 instruct vor_mem(vec dst, vec src, memory mem) %{
21911   predicate((UseAVX > 0) &&
21912             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21913   match(Set dst (OrV src (LoadVector mem)));
21914   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
21915   ins_encode %{
21916     int vlen_enc = vector_length_encoding(this);
21917     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21918   %}
21919   ins_pipe( pipe_slow );
21920 %}
21921 
21922 // --------------------------------- XOR --------------------------------------
21923 
21924 instruct vxor(vec dst, vec src) %{
21925   predicate(UseAVX == 0);
21926   match(Set dst (XorV dst src));
21927   format %{ "pxor    $dst,$src\t! xor vectors" %}
21928   ins_encode %{
21929     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21930   %}
21931   ins_pipe( pipe_slow );
21932 %}
21933 
21934 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21935   predicate(UseAVX > 0);
21936   match(Set dst (XorV src1 src2));
21937   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
21938   ins_encode %{
21939     int vlen_enc = vector_length_encoding(this);
21940     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21941   %}
21942   ins_pipe( pipe_slow );
21943 %}
21944 
21945 instruct vxor_mem(vec dst, vec src, memory mem) %{
21946   predicate((UseAVX > 0) &&
21947             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21948   match(Set dst (XorV src (LoadVector mem)));
21949   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
21950   ins_encode %{
21951     int vlen_enc = vector_length_encoding(this);
21952     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21953   %}
21954   ins_pipe( pipe_slow );
21955 %}
21956 
21957 // --------------------------------- VectorCast --------------------------------------
21958 
21959 instruct vcastBtoX(vec dst, vec src) %{
21960   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21961   match(Set dst (VectorCastB2X src));
21962   format %{ "vector_cast_b2x $dst,$src\t!" %}
21963   ins_encode %{
21964     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21965     int vlen_enc = vector_length_encoding(this);
21966     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21967   %}
21968   ins_pipe( pipe_slow );
21969 %}
21970 
21971 instruct vcastBtoD(legVec dst, legVec src) %{
21972   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21973   match(Set dst (VectorCastB2X src));
21974   format %{ "vector_cast_b2x $dst,$src\t!" %}
21975   ins_encode %{
21976     int vlen_enc = vector_length_encoding(this);
21977     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21978   %}
21979   ins_pipe( pipe_slow );
21980 %}
21981 
21982 instruct castStoX(vec dst, vec src) %{
21983   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21984             Matcher::vector_length(n->in(1)) <= 8 && // src
21985             Matcher::vector_element_basic_type(n) == T_BYTE);
21986   match(Set dst (VectorCastS2X src));
21987   format %{ "vector_cast_s2x $dst,$src" %}
21988   ins_encode %{
21989     assert(UseAVX > 0, "required");
21990 
21991     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21992     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21993   %}
21994   ins_pipe( pipe_slow );
21995 %}
21996 
21997 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21998   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21999             Matcher::vector_length(n->in(1)) == 16 && // src
22000             Matcher::vector_element_basic_type(n) == T_BYTE);
22001   effect(TEMP dst, TEMP vtmp);
22002   match(Set dst (VectorCastS2X src));
22003   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22004   ins_encode %{
22005     assert(UseAVX > 0, "required");
22006 
22007     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22008     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22009     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22010     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22011   %}
22012   ins_pipe( pipe_slow );
22013 %}
22014 
22015 instruct vcastStoX_evex(vec dst, vec src) %{
22016   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22017             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22018   match(Set dst (VectorCastS2X src));
22019   format %{ "vector_cast_s2x $dst,$src\t!" %}
22020   ins_encode %{
22021     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22022     int src_vlen_enc = vector_length_encoding(this, $src);
22023     int vlen_enc = vector_length_encoding(this);
22024     switch (to_elem_bt) {
22025       case T_BYTE:
22026         if (!VM_Version::supports_avx512vl()) {
22027           vlen_enc = Assembler::AVX_512bit;
22028         }
22029         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22030         break;
22031       case T_INT:
22032         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22033         break;
22034       case T_FLOAT:
22035         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22036         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22037         break;
22038       case T_LONG:
22039         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22040         break;
22041       case T_DOUBLE: {
22042         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22043         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22044         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22045         break;
22046       }
22047       default:
22048         ShouldNotReachHere();
22049     }
22050   %}
22051   ins_pipe( pipe_slow );
22052 %}
22053 
22054 instruct castItoX(vec dst, vec src) %{
22055   predicate(UseAVX <= 2 &&
22056             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22057             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22058   match(Set dst (VectorCastI2X src));
22059   format %{ "vector_cast_i2x $dst,$src" %}
22060   ins_encode %{
22061     assert(UseAVX > 0, "required");
22062 
22063     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22064     int vlen_enc = vector_length_encoding(this, $src);
22065 
22066     if (to_elem_bt == T_BYTE) {
22067       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22068       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22069       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22070     } else {
22071       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22072       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22073       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22074     }
22075   %}
22076   ins_pipe( pipe_slow );
22077 %}
22078 
22079 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22080   predicate(UseAVX <= 2 &&
22081             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22082             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22083   match(Set dst (VectorCastI2X src));
22084   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22085   effect(TEMP dst, TEMP vtmp);
22086   ins_encode %{
22087     assert(UseAVX > 0, "required");
22088 
22089     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22090     int vlen_enc = vector_length_encoding(this, $src);
22091 
22092     if (to_elem_bt == T_BYTE) {
22093       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22094       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22095       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22096       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22097     } else {
22098       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22099       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22100       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22101       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22102     }
22103   %}
22104   ins_pipe( pipe_slow );
22105 %}
22106 
22107 instruct vcastItoX_evex(vec dst, vec src) %{
22108   predicate(UseAVX > 2 ||
22109             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22110   match(Set dst (VectorCastI2X src));
22111   format %{ "vector_cast_i2x $dst,$src\t!" %}
22112   ins_encode %{
22113     assert(UseAVX > 0, "required");
22114 
22115     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22116     int src_vlen_enc = vector_length_encoding(this, $src);
22117     int dst_vlen_enc = vector_length_encoding(this);
22118     switch (dst_elem_bt) {
22119       case T_BYTE:
22120         if (!VM_Version::supports_avx512vl()) {
22121           src_vlen_enc = Assembler::AVX_512bit;
22122         }
22123         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22124         break;
22125       case T_SHORT:
22126         if (!VM_Version::supports_avx512vl()) {
22127           src_vlen_enc = Assembler::AVX_512bit;
22128         }
22129         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22130         break;
22131       case T_FLOAT:
22132         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22133         break;
22134       case T_LONG:
22135         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22136         break;
22137       case T_DOUBLE:
22138         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22139         break;
22140       default:
22141         ShouldNotReachHere();
22142     }
22143   %}
22144   ins_pipe( pipe_slow );
22145 %}
22146 
22147 instruct vcastLtoBS(vec dst, vec src) %{
22148   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22149             UseAVX <= 2);
22150   match(Set dst (VectorCastL2X src));
22151   format %{ "vector_cast_l2x  $dst,$src" %}
22152   ins_encode %{
22153     assert(UseAVX > 0, "required");
22154 
22155     int vlen = Matcher::vector_length_in_bytes(this, $src);
22156     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
22157     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22158                                                       : ExternalAddress(vector_int_to_short_mask());
22159     if (vlen <= 16) {
22160       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22161       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22162       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22163     } else {
22164       assert(vlen <= 32, "required");
22165       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22166       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22167       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22168       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22169     }
22170     if (to_elem_bt == T_BYTE) {
22171       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22172     }
22173   %}
22174   ins_pipe( pipe_slow );
22175 %}
22176 
22177 instruct vcastLtoX_evex(vec dst, vec src) %{
22178   predicate(UseAVX > 2 ||
22179             (Matcher::vector_element_basic_type(n) == T_INT ||
22180              Matcher::vector_element_basic_type(n) == T_FLOAT ||
22181              Matcher::vector_element_basic_type(n) == T_DOUBLE));
22182   match(Set dst (VectorCastL2X src));
22183   format %{ "vector_cast_l2x  $dst,$src\t!" %}
22184   ins_encode %{
22185     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22186     int vlen = Matcher::vector_length_in_bytes(this, $src);
22187     int vlen_enc = vector_length_encoding(this, $src);
22188     switch (to_elem_bt) {
22189       case T_BYTE:
22190         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22191           vlen_enc = Assembler::AVX_512bit;
22192         }
22193         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22194         break;
22195       case T_SHORT:
22196         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22197           vlen_enc = Assembler::AVX_512bit;
22198         }
22199         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22200         break;
22201       case T_INT:
22202         if (vlen == 8) {
22203           if ($dst$$XMMRegister != $src$$XMMRegister) {
22204             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22205           }
22206         } else if (vlen == 16) {
22207           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22208         } else if (vlen == 32) {
22209           if (UseAVX > 2) {
22210             if (!VM_Version::supports_avx512vl()) {
22211               vlen_enc = Assembler::AVX_512bit;
22212             }
22213             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22214           } else {
22215             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22216             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22217           }
22218         } else { // vlen == 64
22219           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22220         }
22221         break;
22222       case T_FLOAT:
22223         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22224         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22225         break;
22226       case T_DOUBLE:
22227         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22228         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22229         break;
22230 
22231       default: assert(false, "%s", type2name(to_elem_bt));
22232     }
22233   %}
22234   ins_pipe( pipe_slow );
22235 %}
22236 
22237 instruct vcastFtoD_reg(vec dst, vec src) %{
22238   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22239   match(Set dst (VectorCastF2X src));
22240   format %{ "vector_cast_f2d  $dst,$src\t!" %}
22241   ins_encode %{
22242     int vlen_enc = vector_length_encoding(this);
22243     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22244   %}
22245   ins_pipe( pipe_slow );
22246 %}
22247 
22248 
22249 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22250   predicate(!VM_Version::supports_avx10_2() &&
22251             !VM_Version::supports_avx512vl() &&
22252             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22253             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22254             is_integral_type(Matcher::vector_element_basic_type(n)));
22255   match(Set dst (VectorCastF2X src));
22256   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22257   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22258   ins_encode %{
22259     int vlen_enc = vector_length_encoding(this, $src);
22260     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22261     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22262     // 32 bit addresses for register indirect addressing mode since stub constants
22263     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22264     // However, targets are free to increase this limit, but having a large code cache size
22265     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22266     // cap we save a temporary register allocation which in limiting case can prevent
22267     // spilling in high register pressure blocks.
22268     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22269                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22270                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22271   %}
22272   ins_pipe( pipe_slow );
22273 %}
22274 
22275 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22276   predicate(!VM_Version::supports_avx10_2() &&
22277             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22278             is_integral_type(Matcher::vector_element_basic_type(n)));
22279   match(Set dst (VectorCastF2X src));
22280   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22281   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22282   ins_encode %{
22283     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22284     if (to_elem_bt == T_LONG) {
22285       int vlen_enc = vector_length_encoding(this);
22286       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22287                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22288                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22289     } else {
22290       int vlen_enc = vector_length_encoding(this, $src);
22291       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22292                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22293                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22294     }
22295   %}
22296   ins_pipe( pipe_slow );
22297 %}
22298 
22299 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22300   predicate(VM_Version::supports_avx10_2() &&
22301             is_integral_type(Matcher::vector_element_basic_type(n)));
22302   match(Set dst (VectorCastF2X src));
22303   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22304   ins_encode %{
22305     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22306     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22307     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22308   %}
22309   ins_pipe( pipe_slow );
22310 %}
22311 
22312 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22313   predicate(VM_Version::supports_avx10_2() &&
22314             is_integral_type(Matcher::vector_element_basic_type(n)));
22315   match(Set dst (VectorCastF2X (LoadVector src)));
22316   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22317   ins_encode %{
22318     int vlen = Matcher::vector_length(this);
22319     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22320     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22321     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22322   %}
22323   ins_pipe( pipe_slow );
22324 %}
22325 
22326 instruct vcastDtoF_reg(vec dst, vec src) %{
22327   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22328   match(Set dst (VectorCastD2X src));
22329   format %{ "vector_cast_d2x  $dst,$src\t!" %}
22330   ins_encode %{
22331     int vlen_enc = vector_length_encoding(this, $src);
22332     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22333   %}
22334   ins_pipe( pipe_slow );
22335 %}
22336 
22337 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22338   predicate(!VM_Version::supports_avx10_2() &&
22339             !VM_Version::supports_avx512vl() &&
22340             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22341             is_integral_type(Matcher::vector_element_basic_type(n)));
22342   match(Set dst (VectorCastD2X src));
22343   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22344   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22345   ins_encode %{
22346     int vlen_enc = vector_length_encoding(this, $src);
22347     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22348     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22349                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22350                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22351   %}
22352   ins_pipe( pipe_slow );
22353 %}
22354 
22355 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22356   predicate(!VM_Version::supports_avx10_2() &&
22357             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22358             is_integral_type(Matcher::vector_element_basic_type(n)));
22359   match(Set dst (VectorCastD2X src));
22360   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22361   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22362   ins_encode %{
22363     int vlen_enc = vector_length_encoding(this, $src);
22364     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22365     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22366                               ExternalAddress(vector_float_signflip());
22367     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22368                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22369   %}
22370   ins_pipe( pipe_slow );
22371 %}
22372 
22373 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22374   predicate(VM_Version::supports_avx10_2() &&
22375             is_integral_type(Matcher::vector_element_basic_type(n)));
22376   match(Set dst (VectorCastD2X src));
22377   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22378   ins_encode %{
22379     int vlen_enc = vector_length_encoding(this, $src);
22380     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22381     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22382   %}
22383   ins_pipe( pipe_slow );
22384 %}
22385 
22386 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22387   predicate(VM_Version::supports_avx10_2() &&
22388             is_integral_type(Matcher::vector_element_basic_type(n)));
22389   match(Set dst (VectorCastD2X (LoadVector src)));
22390   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22391   ins_encode %{
22392     int vlen = Matcher::vector_length(this);
22393     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22394     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22395     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22396   %}
22397   ins_pipe( pipe_slow );
22398 %}
22399 
22400 instruct vucast(vec dst, vec src) %{
22401   match(Set dst (VectorUCastB2X src));
22402   match(Set dst (VectorUCastS2X src));
22403   match(Set dst (VectorUCastI2X src));
22404   format %{ "vector_ucast $dst,$src\t!" %}
22405   ins_encode %{
22406     assert(UseAVX > 0, "required");
22407 
22408     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22409     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22410     int vlen_enc = vector_length_encoding(this);
22411     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22412   %}
22413   ins_pipe( pipe_slow );
22414 %}
22415 
22416 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22417   predicate(!VM_Version::supports_avx512vl() &&
22418             Matcher::vector_length_in_bytes(n) < 64 &&
22419             Matcher::vector_element_basic_type(n) == T_INT);
22420   match(Set dst (RoundVF src));
22421   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22422   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22423   ins_encode %{
22424     int vlen_enc = vector_length_encoding(this);
22425     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22426     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22427                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22428                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22429   %}
22430   ins_pipe( pipe_slow );
22431 %}
22432 
22433 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22434   predicate((VM_Version::supports_avx512vl() ||
22435              Matcher::vector_length_in_bytes(n) == 64) &&
22436              Matcher::vector_element_basic_type(n) == T_INT);
22437   match(Set dst (RoundVF src));
22438   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22439   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22440   ins_encode %{
22441     int vlen_enc = vector_length_encoding(this);
22442     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22443     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22444                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22445                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22446   %}
22447   ins_pipe( pipe_slow );
22448 %}
22449 
22450 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22451   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22452   match(Set dst (RoundVD src));
22453   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
22454   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22455   ins_encode %{
22456     int vlen_enc = vector_length_encoding(this);
22457     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22458     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22459                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22460                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22461   %}
22462   ins_pipe( pipe_slow );
22463 %}
22464 
22465 // --------------------------------- VectorMaskCmp --------------------------------------
22466 
22467 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22468   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22469             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
22470             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22471             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22472   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22473   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22474   ins_encode %{
22475     int vlen_enc = vector_length_encoding(this, $src1);
22476     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22477     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22478       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22479     } else {
22480       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22481     }
22482   %}
22483   ins_pipe( pipe_slow );
22484 %}
22485 
22486 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22487   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22488             n->bottom_type()->isa_vectmask() == nullptr &&
22489             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22490   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22491   effect(TEMP ktmp);
22492   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22493   ins_encode %{
22494     int vlen_enc = Assembler::AVX_512bit;
22495     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22496     KRegister mask = k0; // The comparison itself is not being masked.
22497     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22498       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22499       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22500     } else {
22501       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22502       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22503     }
22504   %}
22505   ins_pipe( pipe_slow );
22506 %}
22507 
22508 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22509   predicate(n->bottom_type()->isa_vectmask() &&
22510             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22511   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22512   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22513   ins_encode %{
22514     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22515     int vlen_enc = vector_length_encoding(this, $src1);
22516     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22517     KRegister mask = k0; // The comparison itself is not being masked.
22518     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22519       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22520     } else {
22521       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22522     }
22523   %}
22524   ins_pipe( pipe_slow );
22525 %}
22526 
22527 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22528   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22529             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22530             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22531             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22532             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22533             (n->in(2)->get_int() == BoolTest::eq ||
22534              n->in(2)->get_int() == BoolTest::lt ||
22535              n->in(2)->get_int() == BoolTest::gt)); // cond
22536   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22537   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22538   ins_encode %{
22539     int vlen_enc = vector_length_encoding(this, $src1);
22540     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22541     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22542     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22543   %}
22544   ins_pipe( pipe_slow );
22545 %}
22546 
22547 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22548   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22549             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22550             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22551             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22552             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22553             (n->in(2)->get_int() == BoolTest::ne ||
22554              n->in(2)->get_int() == BoolTest::le ||
22555              n->in(2)->get_int() == BoolTest::ge)); // cond
22556   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22557   effect(TEMP dst, TEMP xtmp);
22558   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22559   ins_encode %{
22560     int vlen_enc = vector_length_encoding(this, $src1);
22561     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22562     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22563     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22564   %}
22565   ins_pipe( pipe_slow );
22566 %}
22567 
22568 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22569   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22570             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22571             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22572             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22573             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22574   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22575   effect(TEMP dst, TEMP xtmp);
22576   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22577   ins_encode %{
22578     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22579     int vlen_enc = vector_length_encoding(this, $src1);
22580     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22581     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22582 
22583     if (vlen_enc == Assembler::AVX_128bit) {
22584       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22585     } else {
22586       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22587     }
22588     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22589     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22590     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22591   %}
22592   ins_pipe( pipe_slow );
22593 %}
22594 
22595 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22596   predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22597              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22598              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22599   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22600   effect(TEMP ktmp);
22601   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22602   ins_encode %{
22603     assert(UseAVX > 2, "required");
22604 
22605     int vlen_enc = vector_length_encoding(this, $src1);
22606     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22607     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22608     KRegister mask = k0; // The comparison itself is not being masked.
22609     bool merge = false;
22610     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22611 
22612     switch (src1_elem_bt) {
22613       case T_INT: {
22614         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22615         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22616         break;
22617       }
22618       case T_LONG: {
22619         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22620         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22621         break;
22622       }
22623       default: assert(false, "%s", type2name(src1_elem_bt));
22624     }
22625   %}
22626   ins_pipe( pipe_slow );
22627 %}
22628 
22629 
22630 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22631   predicate(n->bottom_type()->isa_vectmask() &&
22632             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22633   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22634   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22635   ins_encode %{
22636     assert(UseAVX > 2, "required");
22637     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22638 
22639     int vlen_enc = vector_length_encoding(this, $src1);
22640     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22641     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22642     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22643 
22644     // Comparison i
22645     switch (src1_elem_bt) {
22646       case T_BYTE: {
22647         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22648         break;
22649       }
22650       case T_SHORT: {
22651         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22652         break;
22653       }
22654       case T_INT: {
22655         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22656         break;
22657       }
22658       case T_LONG: {
22659         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22660         break;
22661       }
22662       default: assert(false, "%s", type2name(src1_elem_bt));
22663     }
22664   %}
22665   ins_pipe( pipe_slow );
22666 %}
22667 
22668 // Extract
22669 
22670 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22671   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22672   match(Set dst (ExtractI src idx));
22673   match(Set dst (ExtractS src idx));
22674   match(Set dst (ExtractB src idx));
22675   format %{ "extractI $dst,$src,$idx\t!" %}
22676   ins_encode %{
22677     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22678 
22679     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22680     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22681   %}
22682   ins_pipe( pipe_slow );
22683 %}
22684 
22685 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22686   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22687             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
22688   match(Set dst (ExtractI src idx));
22689   match(Set dst (ExtractS src idx));
22690   match(Set dst (ExtractB src idx));
22691   effect(TEMP vtmp);
22692   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22693   ins_encode %{
22694     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22695 
22696     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22697     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22698     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22699   %}
22700   ins_pipe( pipe_slow );
22701 %}
22702 
22703 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22704   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22705   match(Set dst (ExtractL src idx));
22706   format %{ "extractL $dst,$src,$idx\t!" %}
22707   ins_encode %{
22708     assert(UseSSE >= 4, "required");
22709     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22710 
22711     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22712   %}
22713   ins_pipe( pipe_slow );
22714 %}
22715 
22716 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22717   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22718             Matcher::vector_length(n->in(1)) == 8);  // src
22719   match(Set dst (ExtractL src idx));
22720   effect(TEMP vtmp);
22721   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22722   ins_encode %{
22723     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22724 
22725     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22726     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22727   %}
22728   ins_pipe( pipe_slow );
22729 %}
22730 
22731 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22732   predicate(Matcher::vector_length(n->in(1)) <= 4);
22733   match(Set dst (ExtractF src idx));
22734   effect(TEMP dst, TEMP vtmp);
22735   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22736   ins_encode %{
22737     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22738 
22739     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22740   %}
22741   ins_pipe( pipe_slow );
22742 %}
22743 
22744 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22745   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22746             Matcher::vector_length(n->in(1)/*src*/) == 16);
22747   match(Set dst (ExtractF src idx));
22748   effect(TEMP vtmp);
22749   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22750   ins_encode %{
22751     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22752 
22753     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22754     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22755   %}
22756   ins_pipe( pipe_slow );
22757 %}
22758 
22759 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22760   predicate(Matcher::vector_length(n->in(1)) == 2); // src
22761   match(Set dst (ExtractD src idx));
22762   format %{ "extractD $dst,$src,$idx\t!" %}
22763   ins_encode %{
22764     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22765 
22766     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22767   %}
22768   ins_pipe( pipe_slow );
22769 %}
22770 
22771 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22772   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22773             Matcher::vector_length(n->in(1)) == 8);  // src
22774   match(Set dst (ExtractD src idx));
22775   effect(TEMP vtmp);
22776   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22777   ins_encode %{
22778     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22779 
22780     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22781     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22782   %}
22783   ins_pipe( pipe_slow );
22784 %}
22785 
22786 // --------------------------------- Vector Blend --------------------------------------
22787 
22788 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22789   predicate(UseAVX == 0);
22790   match(Set dst (VectorBlend (Binary dst src) mask));
22791   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
22792   effect(TEMP tmp);
22793   ins_encode %{
22794     assert(UseSSE >= 4, "required");
22795 
22796     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22797       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22798     }
22799     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22800   %}
22801   ins_pipe( pipe_slow );
22802 %}
22803 
22804 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22805   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22806             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22807             Matcher::vector_length_in_bytes(n) <= 32 &&
22808             is_integral_type(Matcher::vector_element_basic_type(n)));
22809   match(Set dst (VectorBlend (Binary src1 src2) mask));
22810   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22811   ins_encode %{
22812     int vlen_enc = vector_length_encoding(this);
22813     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22814   %}
22815   ins_pipe( pipe_slow );
22816 %}
22817 
22818 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22819   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22820             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22821             Matcher::vector_length_in_bytes(n) <= 32 &&
22822             !is_integral_type(Matcher::vector_element_basic_type(n)));
22823   match(Set dst (VectorBlend (Binary src1 src2) mask));
22824   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22825   ins_encode %{
22826     int vlen_enc = vector_length_encoding(this);
22827     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22828   %}
22829   ins_pipe( pipe_slow );
22830 %}
22831 
22832 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22833   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22834             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22835             Matcher::vector_length_in_bytes(n) <= 32);
22836   match(Set dst (VectorBlend (Binary src1 src2) mask));
22837   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22838   effect(TEMP vtmp, TEMP dst);
22839   ins_encode %{
22840     int vlen_enc = vector_length_encoding(this);
22841     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22842     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22843     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
22844   %}
22845   ins_pipe( pipe_slow );
22846 %}
22847 
22848 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22849   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22850             n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22851   match(Set dst (VectorBlend (Binary src1 src2) mask));
22852   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22853   effect(TEMP ktmp);
22854   ins_encode %{
22855      int vlen_enc = Assembler::AVX_512bit;
22856      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22857     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22858     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22859   %}
22860   ins_pipe( pipe_slow );
22861 %}
22862 
22863 
22864 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22865   predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22866             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22867              VM_Version::supports_avx512bw()));
22868   match(Set dst (VectorBlend (Binary src1 src2) mask));
22869   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22870   ins_encode %{
22871     int vlen_enc = vector_length_encoding(this);
22872     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22873     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22874   %}
22875   ins_pipe( pipe_slow );
22876 %}
22877 
22878 // --------------------------------- ABS --------------------------------------
22879 // a = |a|
22880 instruct vabsB_reg(vec dst, vec src) %{
22881   match(Set dst (AbsVB  src));
22882   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22883   ins_encode %{
22884     uint vlen = Matcher::vector_length(this);
22885     if (vlen <= 16) {
22886       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22887     } else {
22888       int vlen_enc = vector_length_encoding(this);
22889       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22890     }
22891   %}
22892   ins_pipe( pipe_slow );
22893 %}
22894 
22895 instruct vabsS_reg(vec dst, vec src) %{
22896   match(Set dst (AbsVS  src));
22897   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22898   ins_encode %{
22899     uint vlen = Matcher::vector_length(this);
22900     if (vlen <= 8) {
22901       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22902     } else {
22903       int vlen_enc = vector_length_encoding(this);
22904       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22905     }
22906   %}
22907   ins_pipe( pipe_slow );
22908 %}
22909 
22910 instruct vabsI_reg(vec dst, vec src) %{
22911   match(Set dst (AbsVI  src));
22912   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22913   ins_encode %{
22914     uint vlen = Matcher::vector_length(this);
22915     if (vlen <= 4) {
22916       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22917     } else {
22918       int vlen_enc = vector_length_encoding(this);
22919       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22920     }
22921   %}
22922   ins_pipe( pipe_slow );
22923 %}
22924 
22925 instruct vabsL_reg(vec dst, vec src) %{
22926   match(Set dst (AbsVL  src));
22927   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22928   ins_encode %{
22929     assert(UseAVX > 2, "required");
22930     int vlen_enc = vector_length_encoding(this);
22931     if (!VM_Version::supports_avx512vl()) {
22932       vlen_enc = Assembler::AVX_512bit;
22933     }
22934     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22935   %}
22936   ins_pipe( pipe_slow );
22937 %}
22938 
22939 // --------------------------------- ABSNEG --------------------------------------
22940 
22941 instruct vabsnegF(vec dst, vec src) %{
22942   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22943   match(Set dst (AbsVF src));
22944   match(Set dst (NegVF src));
22945   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22946   ins_cost(150);
22947   ins_encode %{
22948     int opcode = this->ideal_Opcode();
22949     int vlen = Matcher::vector_length(this);
22950     if (vlen == 2) {
22951       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22952     } else {
22953       assert(vlen == 8 || vlen == 16, "required");
22954       int vlen_enc = vector_length_encoding(this);
22955       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22956     }
22957   %}
22958   ins_pipe( pipe_slow );
22959 %}
22960 
22961 instruct vabsneg4F(vec dst) %{
22962   predicate(Matcher::vector_length(n) == 4);
22963   match(Set dst (AbsVF dst));
22964   match(Set dst (NegVF dst));
22965   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22966   ins_cost(150);
22967   ins_encode %{
22968     int opcode = this->ideal_Opcode();
22969     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22970   %}
22971   ins_pipe( pipe_slow );
22972 %}
22973 
22974 instruct vabsnegD(vec dst, vec src) %{
22975   match(Set dst (AbsVD  src));
22976   match(Set dst (NegVD  src));
22977   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22978   ins_encode %{
22979     int opcode = this->ideal_Opcode();
22980     uint vlen = Matcher::vector_length(this);
22981     if (vlen == 2) {
22982       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22983     } else {
22984       int vlen_enc = vector_length_encoding(this);
22985       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22986     }
22987   %}
22988   ins_pipe( pipe_slow );
22989 %}
22990 
22991 //------------------------------------- VectorTest --------------------------------------------
22992 
22993 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22994   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22995   match(Set cr (VectorTest src1 src2));
22996   effect(TEMP vtmp);
22997   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
22998   ins_encode %{
22999     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23000     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23001     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23002   %}
23003   ins_pipe( pipe_slow );
23004 %}
23005 
23006 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23007   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23008   match(Set cr (VectorTest src1 src2));
23009   format %{ "vptest_ge16  $src1, $src2\n\t" %}
23010   ins_encode %{
23011     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23012     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23013     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23014   %}
23015   ins_pipe( pipe_slow );
23016 %}
23017 
23018 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23019   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23020              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23021             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23022   match(Set cr (VectorTest src1 src2));
23023   effect(TEMP tmp);
23024   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23025   ins_encode %{
23026     uint masklen = Matcher::vector_length(this, $src1);
23027     __ kmovwl($tmp$$Register, $src1$$KRegister);
23028     __ andl($tmp$$Register, (1 << masklen) - 1);
23029     __ cmpl($tmp$$Register, (1 << masklen) - 1);
23030   %}
23031   ins_pipe( pipe_slow );
23032 %}
23033 
23034 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23035   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23036              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23037             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23038   match(Set cr (VectorTest src1 src2));
23039   effect(TEMP tmp);
23040   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23041   ins_encode %{
23042     uint masklen = Matcher::vector_length(this, $src1);
23043     __ kmovwl($tmp$$Register, $src1$$KRegister);
23044     __ andl($tmp$$Register, (1 << masklen) - 1);
23045   %}
23046   ins_pipe( pipe_slow );
23047 %}
23048 
23049 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23050   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23051             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23052   match(Set cr (VectorTest src1 src2));
23053   format %{ "ktest_ge8  $src1, $src2\n\t" %}
23054   ins_encode %{
23055     uint masklen = Matcher::vector_length(this, $src1);
23056     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23057   %}
23058   ins_pipe( pipe_slow );
23059 %}
23060 
23061 //------------------------------------- LoadMask --------------------------------------------
23062 
23063 instruct loadMask(legVec dst, legVec src) %{
23064   predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23065   match(Set dst (VectorLoadMask src));
23066   effect(TEMP dst);
23067   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23068   ins_encode %{
23069     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23070     BasicType elem_bt = Matcher::vector_element_basic_type(this);
23071     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23072   %}
23073   ins_pipe( pipe_slow );
23074 %}
23075 
23076 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23077   predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23078   match(Set dst (VectorLoadMask src));
23079   effect(TEMP xtmp);
23080   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23081   ins_encode %{
23082     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23083                         true, Assembler::AVX_512bit);
23084   %}
23085   ins_pipe( pipe_slow );
23086 %}
23087 
23088 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
23089   predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23090   match(Set dst (VectorLoadMask src));
23091   effect(TEMP xtmp);
23092   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23093   ins_encode %{
23094     int vlen_enc = vector_length_encoding(in(1));
23095     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23096                         false, vlen_enc);
23097   %}
23098   ins_pipe( pipe_slow );
23099 %}
23100 
23101 //------------------------------------- StoreMask --------------------------------------------
23102 
23103 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23104   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23105   match(Set dst (VectorStoreMask src size));
23106   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23107   ins_encode %{
23108     int vlen = Matcher::vector_length(this);
23109     if (vlen <= 16 && UseAVX <= 2) {
23110       assert(UseSSE >= 3, "required");
23111       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23112     } else {
23113       assert(UseAVX > 0, "required");
23114       int src_vlen_enc = vector_length_encoding(this, $src);
23115       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23116     }
23117   %}
23118   ins_pipe( pipe_slow );
23119 %}
23120 
23121 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23122   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23123   match(Set dst (VectorStoreMask src size));
23124   effect(TEMP_DEF dst, TEMP xtmp);
23125   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23126   ins_encode %{
23127     int vlen_enc = Assembler::AVX_128bit;
23128     int vlen = Matcher::vector_length(this);
23129     if (vlen <= 8) {
23130       assert(UseSSE >= 3, "required");
23131       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23132       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23133       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23134     } else {
23135       assert(UseAVX > 0, "required");
23136       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23137       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23138       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23139     }
23140   %}
23141   ins_pipe( pipe_slow );
23142 %}
23143 
23144 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23145   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23146   match(Set dst (VectorStoreMask src size));
23147   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23148   effect(TEMP_DEF dst, TEMP xtmp);
23149   ins_encode %{
23150     int vlen_enc = Assembler::AVX_128bit;
23151     int vlen = Matcher::vector_length(this);
23152     if (vlen <= 4) {
23153       assert(UseSSE >= 3, "required");
23154       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23155       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23156       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23157       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23158     } else {
23159       assert(UseAVX > 0, "required");
23160       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23161       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23162       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23163       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23164       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23165     }
23166   %}
23167   ins_pipe( pipe_slow );
23168 %}
23169 
23170 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23171   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23172   match(Set dst (VectorStoreMask src size));
23173   effect(TEMP_DEF dst, TEMP xtmp);
23174   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23175   ins_encode %{
23176     assert(UseSSE >= 3, "required");
23177     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23178     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23179     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23180     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23181     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23182   %}
23183   ins_pipe( pipe_slow );
23184 %}
23185 
23186 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23187   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23188   match(Set dst (VectorStoreMask src size));
23189   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23190   effect(TEMP_DEF dst, TEMP vtmp);
23191   ins_encode %{
23192     int vlen_enc = Assembler::AVX_128bit;
23193     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23194     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23195     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23196     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23197     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23198     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23199     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23200   %}
23201   ins_pipe( pipe_slow );
23202 %}
23203 
23204 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23205   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23206   match(Set dst (VectorStoreMask src size));
23207   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23208   ins_encode %{
23209     int src_vlen_enc = vector_length_encoding(this, $src);
23210     int dst_vlen_enc = vector_length_encoding(this);
23211     if (!VM_Version::supports_avx512vl()) {
23212       src_vlen_enc = Assembler::AVX_512bit;
23213     }
23214     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23215     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23216   %}
23217   ins_pipe( pipe_slow );
23218 %}
23219 
23220 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23221   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23222   match(Set dst (VectorStoreMask src size));
23223   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23224   ins_encode %{
23225     int src_vlen_enc = vector_length_encoding(this, $src);
23226     int dst_vlen_enc = vector_length_encoding(this);
23227     if (!VM_Version::supports_avx512vl()) {
23228       src_vlen_enc = Assembler::AVX_512bit;
23229     }
23230     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23231     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23232   %}
23233   ins_pipe( pipe_slow );
23234 %}
23235 
23236 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23237   predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23238   match(Set dst (VectorStoreMask mask size));
23239   effect(TEMP_DEF dst);
23240   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23241   ins_encode %{
23242     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23243     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23244                  false, Assembler::AVX_512bit, noreg);
23245     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23246   %}
23247   ins_pipe( pipe_slow );
23248 %}
23249 
23250 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23251   predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23252   match(Set dst (VectorStoreMask mask size));
23253   effect(TEMP_DEF dst);
23254   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23255   ins_encode %{
23256     int dst_vlen_enc = vector_length_encoding(this);
23257     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23258     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23259   %}
23260   ins_pipe( pipe_slow );
23261 %}
23262 
23263 instruct vmaskcast_evex(kReg dst) %{
23264   match(Set dst (VectorMaskCast dst));
23265   ins_cost(0);
23266   format %{ "vector_mask_cast $dst" %}
23267   ins_encode %{
23268     // empty
23269   %}
23270   ins_pipe(empty);
23271 %}
23272 
23273 instruct vmaskcast(vec dst) %{
23274   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23275   match(Set dst (VectorMaskCast dst));
23276   ins_cost(0);
23277   format %{ "vector_mask_cast $dst" %}
23278   ins_encode %{
23279     // empty
23280   %}
23281   ins_pipe(empty);
23282 %}
23283 
23284 instruct vmaskcast_avx(vec dst, vec src) %{
23285   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23286   match(Set dst (VectorMaskCast src));
23287   format %{ "vector_mask_cast $dst, $src" %}
23288   ins_encode %{
23289     int vlen = Matcher::vector_length(this);
23290     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23291     BasicType dst_bt = Matcher::vector_element_basic_type(this);
23292     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23293   %}
23294   ins_pipe(pipe_slow);
23295 %}
23296 
23297 //-------------------------------- Load Iota Indices ----------------------------------
23298 
23299 instruct loadIotaIndices(vec dst, immI_0 src) %{
23300   match(Set dst (VectorLoadConst src));
23301   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23302   ins_encode %{
23303      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23304      BasicType bt = Matcher::vector_element_basic_type(this);
23305      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23306   %}
23307   ins_pipe( pipe_slow );
23308 %}
23309 
23310 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23311   match(Set dst (PopulateIndex src1 src2));
23312   effect(TEMP dst, TEMP vtmp);
23313   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23314   ins_encode %{
23315      assert($src2$$constant == 1, "required");
23316      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23317      int vlen_enc = vector_length_encoding(this);
23318      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23319      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23320      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23321      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23322   %}
23323   ins_pipe( pipe_slow );
23324 %}
23325 
23326 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23327   match(Set dst (PopulateIndex src1 src2));
23328   effect(TEMP dst, TEMP vtmp);
23329   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23330   ins_encode %{
23331      assert($src2$$constant == 1, "required");
23332      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23333      int vlen_enc = vector_length_encoding(this);
23334      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23335      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23336      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23337      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23338   %}
23339   ins_pipe( pipe_slow );
23340 %}
23341 
23342 //-------------------------------- Rearrange ----------------------------------
23343 
23344 // LoadShuffle/Rearrange for Byte
23345 instruct rearrangeB(vec dst, vec shuffle) %{
23346   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23347             Matcher::vector_length(n) < 32);
23348   match(Set dst (VectorRearrange dst shuffle));
23349   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23350   ins_encode %{
23351     assert(UseSSE >= 4, "required");
23352     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23353   %}
23354   ins_pipe( pipe_slow );
23355 %}
23356 
23357 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23358   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23359             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23360   match(Set dst (VectorRearrange src shuffle));
23361   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23362   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23363   ins_encode %{
23364     assert(UseAVX >= 2, "required");
23365     // Swap src into vtmp1
23366     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23367     // Shuffle swapped src to get entries from other 128 bit lane
23368     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23369     // Shuffle original src to get entries from self 128 bit lane
23370     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23371     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23372     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23373     // Perform the blend
23374     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23375   %}
23376   ins_pipe( pipe_slow );
23377 %}
23378 
23379 
23380 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23381   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23382             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23383   match(Set dst (VectorRearrange src shuffle));
23384   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23385   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23386   ins_encode %{
23387     int vlen_enc = vector_length_encoding(this);
23388     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23389                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23390                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23391   %}
23392   ins_pipe( pipe_slow );
23393 %}
23394 
23395 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23396   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23397             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23398   match(Set dst (VectorRearrange src shuffle));
23399   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23400   ins_encode %{
23401     int vlen_enc = vector_length_encoding(this);
23402     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23403   %}
23404   ins_pipe( pipe_slow );
23405 %}
23406 
23407 // LoadShuffle/Rearrange for Short
23408 
23409 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23410   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23411             !VM_Version::supports_avx512bw());
23412   match(Set dst (VectorLoadShuffle src));
23413   effect(TEMP dst, TEMP vtmp);
23414   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23415   ins_encode %{
23416     // Create a byte shuffle mask from short shuffle mask
23417     // only byte shuffle instruction available on these platforms
23418     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23419     if (UseAVX == 0) {
23420       assert(vlen_in_bytes <= 16, "required");
23421       // Multiply each shuffle by two to get byte index
23422       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23423       __ psllw($vtmp$$XMMRegister, 1);
23424 
23425       // Duplicate to create 2 copies of byte index
23426       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23427       __ psllw($dst$$XMMRegister, 8);
23428       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23429 
23430       // Add one to get alternate byte index
23431       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23432       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23433     } else {
23434       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23435       int vlen_enc = vector_length_encoding(this);
23436       // Multiply each shuffle by two to get byte index
23437       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23438 
23439       // Duplicate to create 2 copies of byte index
23440       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
23441       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23442 
23443       // Add one to get alternate byte index
23444       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23445     }
23446   %}
23447   ins_pipe( pipe_slow );
23448 %}
23449 
23450 instruct rearrangeS(vec dst, vec shuffle) %{
23451   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23452             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23453   match(Set dst (VectorRearrange dst shuffle));
23454   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23455   ins_encode %{
23456     assert(UseSSE >= 4, "required");
23457     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23458   %}
23459   ins_pipe( pipe_slow );
23460 %}
23461 
23462 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23463   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23464             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23465   match(Set dst (VectorRearrange src shuffle));
23466   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23467   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23468   ins_encode %{
23469     assert(UseAVX >= 2, "required");
23470     // Swap src into vtmp1
23471     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23472     // Shuffle swapped src to get entries from other 128 bit lane
23473     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23474     // Shuffle original src to get entries from self 128 bit lane
23475     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23476     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23477     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23478     // Perform the blend
23479     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23480   %}
23481   ins_pipe( pipe_slow );
23482 %}
23483 
23484 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23485   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23486             VM_Version::supports_avx512bw());
23487   match(Set dst (VectorRearrange src shuffle));
23488   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23489   ins_encode %{
23490     int vlen_enc = vector_length_encoding(this);
23491     if (!VM_Version::supports_avx512vl()) {
23492       vlen_enc = Assembler::AVX_512bit;
23493     }
23494     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23495   %}
23496   ins_pipe( pipe_slow );
23497 %}
23498 
23499 // LoadShuffle/Rearrange for Integer and Float
23500 
23501 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23502   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23503             Matcher::vector_length(n) == 4 && UseAVX == 0);
23504   match(Set dst (VectorLoadShuffle src));
23505   effect(TEMP dst, TEMP vtmp);
23506   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23507   ins_encode %{
23508     assert(UseSSE >= 4, "required");
23509 
23510     // Create a byte shuffle mask from int shuffle mask
23511     // only byte shuffle instruction available on these platforms
23512 
23513     // Duplicate and multiply each shuffle by 4
23514     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23515     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23516     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23517     __ psllw($vtmp$$XMMRegister, 2);
23518 
23519     // Duplicate again to create 4 copies of byte index
23520     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23521     __ psllw($dst$$XMMRegister, 8);
23522     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23523 
23524     // Add 3,2,1,0 to get alternate byte index
23525     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23526     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23527   %}
23528   ins_pipe( pipe_slow );
23529 %}
23530 
23531 instruct rearrangeI(vec dst, vec shuffle) %{
23532   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23533             UseAVX == 0);
23534   match(Set dst (VectorRearrange dst shuffle));
23535   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23536   ins_encode %{
23537     assert(UseSSE >= 4, "required");
23538     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23539   %}
23540   ins_pipe( pipe_slow );
23541 %}
23542 
23543 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23544   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23545             UseAVX > 0);
23546   match(Set dst (VectorRearrange src shuffle));
23547   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23548   ins_encode %{
23549     int vlen_enc = vector_length_encoding(this);
23550     BasicType bt = Matcher::vector_element_basic_type(this);
23551     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23552   %}
23553   ins_pipe( pipe_slow );
23554 %}
23555 
23556 // LoadShuffle/Rearrange for Long and Double
23557 
23558 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23559   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23560             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23561   match(Set dst (VectorLoadShuffle src));
23562   effect(TEMP dst, TEMP vtmp);
23563   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23564   ins_encode %{
23565     assert(UseAVX >= 2, "required");
23566 
23567     int vlen_enc = vector_length_encoding(this);
23568     // Create a double word shuffle mask from long shuffle mask
23569     // only double word shuffle instruction available on these platforms
23570 
23571     // Multiply each shuffle by two to get double word index
23572     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23573 
23574     // Duplicate each double word shuffle
23575     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23576     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23577 
23578     // Add one to get alternate double word index
23579     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23580   %}
23581   ins_pipe( pipe_slow );
23582 %}
23583 
23584 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23585   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23586             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23587   match(Set dst (VectorRearrange src shuffle));
23588   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23589   ins_encode %{
23590     assert(UseAVX >= 2, "required");
23591 
23592     int vlen_enc = vector_length_encoding(this);
23593     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23594   %}
23595   ins_pipe( pipe_slow );
23596 %}
23597 
23598 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23599   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23600             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23601   match(Set dst (VectorRearrange src shuffle));
23602   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23603   ins_encode %{
23604     assert(UseAVX > 2, "required");
23605 
23606     int vlen_enc = vector_length_encoding(this);
23607     if (vlen_enc == Assembler::AVX_128bit) {
23608       vlen_enc = Assembler::AVX_256bit;
23609     }
23610     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23611   %}
23612   ins_pipe( pipe_slow );
23613 %}
23614 
23615 // --------------------------------- FMA --------------------------------------
23616 // a * b + c
23617 
23618 instruct vfmaF_reg(vec a, vec b, vec c) %{
23619   match(Set c (FmaVF  c (Binary a b)));
23620   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23621   ins_cost(150);
23622   ins_encode %{
23623     assert(UseFMA, "not enabled");
23624     int vlen_enc = vector_length_encoding(this);
23625     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23626   %}
23627   ins_pipe( pipe_slow );
23628 %}
23629 
23630 instruct vfmaF_mem(vec a, memory b, vec c) %{
23631   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23632   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23633   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23634   ins_cost(150);
23635   ins_encode %{
23636     assert(UseFMA, "not enabled");
23637     int vlen_enc = vector_length_encoding(this);
23638     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23639   %}
23640   ins_pipe( pipe_slow );
23641 %}
23642 
23643 instruct vfmaD_reg(vec a, vec b, vec c) %{
23644   match(Set c (FmaVD  c (Binary a b)));
23645   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23646   ins_cost(150);
23647   ins_encode %{
23648     assert(UseFMA, "not enabled");
23649     int vlen_enc = vector_length_encoding(this);
23650     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23651   %}
23652   ins_pipe( pipe_slow );
23653 %}
23654 
23655 instruct vfmaD_mem(vec a, memory b, vec c) %{
23656   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23657   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23658   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23659   ins_cost(150);
23660   ins_encode %{
23661     assert(UseFMA, "not enabled");
23662     int vlen_enc = vector_length_encoding(this);
23663     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23664   %}
23665   ins_pipe( pipe_slow );
23666 %}
23667 
23668 // --------------------------------- Vector Multiply Add --------------------------------------
23669 
23670 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23671   predicate(UseAVX == 0);
23672   match(Set dst (MulAddVS2VI dst src1));
23673   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23674   ins_encode %{
23675     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23676   %}
23677   ins_pipe( pipe_slow );
23678 %}
23679 
23680 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23681   predicate(UseAVX > 0);
23682   match(Set dst (MulAddVS2VI src1 src2));
23683   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23684   ins_encode %{
23685     int vlen_enc = vector_length_encoding(this);
23686     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23687   %}
23688   ins_pipe( pipe_slow );
23689 %}
23690 
23691 // --------------------------------- Vector Multiply Add Add ----------------------------------
23692 
23693 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23694   predicate(VM_Version::supports_avx512_vnni());
23695   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23696   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23697   ins_encode %{
23698     assert(UseAVX > 2, "required");
23699     int vlen_enc = vector_length_encoding(this);
23700     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23701   %}
23702   ins_pipe( pipe_slow );
23703   ins_cost(10);
23704 %}
23705 
23706 // --------------------------------- PopCount --------------------------------------
23707 
23708 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23709   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23710   match(Set dst (PopCountVI src));
23711   match(Set dst (PopCountVL src));
23712   format %{ "vector_popcount_integral $dst, $src" %}
23713   ins_encode %{
23714     int opcode = this->ideal_Opcode();
23715     int vlen_enc = vector_length_encoding(this, $src);
23716     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23717     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23718   %}
23719   ins_pipe( pipe_slow );
23720 %}
23721 
23722 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23723   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23724   match(Set dst (PopCountVI src mask));
23725   match(Set dst (PopCountVL src mask));
23726   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23727   ins_encode %{
23728     int vlen_enc = vector_length_encoding(this, $src);
23729     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23730     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23731     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23732   %}
23733   ins_pipe( pipe_slow );
23734 %}
23735 
23736 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23737   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23738   match(Set dst (PopCountVI src));
23739   match(Set dst (PopCountVL src));
23740   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23741   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23742   ins_encode %{
23743     int opcode = this->ideal_Opcode();
23744     int vlen_enc = vector_length_encoding(this, $src);
23745     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23746     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23747                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23748   %}
23749   ins_pipe( pipe_slow );
23750 %}
23751 
23752 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23753 
23754 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23755   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23756                                               Matcher::vector_length_in_bytes(n->in(1))));
23757   match(Set dst (CountTrailingZerosV src));
23758   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23759   ins_cost(400);
23760   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23761   ins_encode %{
23762     int vlen_enc = vector_length_encoding(this, $src);
23763     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23764     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23765                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23766   %}
23767   ins_pipe( pipe_slow );
23768 %}
23769 
23770 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23771   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23772             VM_Version::supports_avx512cd() &&
23773             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23774   match(Set dst (CountTrailingZerosV src));
23775   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23776   ins_cost(400);
23777   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23778   ins_encode %{
23779     int vlen_enc = vector_length_encoding(this, $src);
23780     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23781     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23782                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23783   %}
23784   ins_pipe( pipe_slow );
23785 %}
23786 
23787 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23788   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23789   match(Set dst (CountTrailingZerosV src));
23790   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23791   ins_cost(400);
23792   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23793   ins_encode %{
23794     int vlen_enc = vector_length_encoding(this, $src);
23795     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23796     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23797                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23798                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23799   %}
23800   ins_pipe( pipe_slow );
23801 %}
23802 
23803 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23804   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23805   match(Set dst (CountTrailingZerosV src));
23806   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23807   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23808   ins_encode %{
23809     int vlen_enc = vector_length_encoding(this, $src);
23810     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23811     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23812                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23813   %}
23814   ins_pipe( pipe_slow );
23815 %}
23816 
23817 
23818 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23819 
23820 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23821   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23822   effect(TEMP dst);
23823   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23824   ins_encode %{
23825     int vector_len = vector_length_encoding(this);
23826     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23827   %}
23828   ins_pipe( pipe_slow );
23829 %}
23830 
23831 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23832   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23833   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23834   effect(TEMP dst);
23835   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23836   ins_encode %{
23837     int vector_len = vector_length_encoding(this);
23838     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23839   %}
23840   ins_pipe( pipe_slow );
23841 %}
23842 
23843 // --------------------------------- Rotation Operations ----------------------------------
23844 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23845   match(Set dst (RotateLeftV src shift));
23846   match(Set dst (RotateRightV src shift));
23847   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23848   ins_encode %{
23849     int opcode      = this->ideal_Opcode();
23850     int vector_len  = vector_length_encoding(this);
23851     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23852     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23853   %}
23854   ins_pipe( pipe_slow );
23855 %}
23856 
23857 instruct vprorate(vec dst, vec src, vec shift) %{
23858   match(Set dst (RotateLeftV src shift));
23859   match(Set dst (RotateRightV src shift));
23860   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23861   ins_encode %{
23862     int opcode      = this->ideal_Opcode();
23863     int vector_len  = vector_length_encoding(this);
23864     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23865     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23866   %}
23867   ins_pipe( pipe_slow );
23868 %}
23869 
23870 // ---------------------------------- Masked Operations ------------------------------------
23871 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23872   predicate(!n->in(3)->bottom_type()->isa_vectmask());
23873   match(Set dst (LoadVectorMasked mem mask));
23874   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23875   ins_encode %{
23876     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23877     int vlen_enc = vector_length_encoding(this);
23878     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23879   %}
23880   ins_pipe( pipe_slow );
23881 %}
23882 
23883 
23884 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23885   predicate(n->in(3)->bottom_type()->isa_vectmask());
23886   match(Set dst (LoadVectorMasked mem mask));
23887   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23888   ins_encode %{
23889     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
23890     int vector_len = vector_length_encoding(this);
23891     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23892   %}
23893   ins_pipe( pipe_slow );
23894 %}
23895 
23896 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23897   predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23898   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23899   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23900   ins_encode %{
23901     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23902     int vlen_enc = vector_length_encoding(src_node);
23903     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23904     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23905   %}
23906   ins_pipe( pipe_slow );
23907 %}
23908 
23909 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23910   predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23911   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23912   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23913   ins_encode %{
23914     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23915     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23916     int vlen_enc = vector_length_encoding(src_node);
23917     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23918   %}
23919   ins_pipe( pipe_slow );
23920 %}
23921 
23922 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23923   match(Set addr (VerifyVectorAlignment addr mask));
23924   effect(KILL cr);
23925   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23926   ins_encode %{
23927     Label Lskip;
23928     // check if masked bits of addr are zero
23929     __ testq($addr$$Register, $mask$$constant);
23930     __ jccb(Assembler::equal, Lskip);
23931     __ stop("verify_vector_alignment found a misaligned vector memory access");
23932     __ bind(Lskip);
23933   %}
23934   ins_pipe(pipe_slow);
23935 %}
23936 
23937 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23938   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23939   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23940   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23941   ins_encode %{
23942     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23943     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23944 
23945     Label DONE;
23946     int vlen_enc = vector_length_encoding(this, $src1);
23947     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23948 
23949     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23950     __ mov64($dst$$Register, -1L);
23951     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23952     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23953     __ jccb(Assembler::carrySet, DONE);
23954     __ kmovql($dst$$Register, $ktmp1$$KRegister);
23955     __ notq($dst$$Register);
23956     __ tzcntq($dst$$Register, $dst$$Register);
23957     __ bind(DONE);
23958   %}
23959   ins_pipe( pipe_slow );
23960 %}
23961 
23962 
23963 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23964   match(Set dst (VectorMaskGen len));
23965   effect(TEMP temp, KILL cr);
23966   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23967   ins_encode %{
23968     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23969   %}
23970   ins_pipe( pipe_slow );
23971 %}
23972 
23973 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23974   match(Set dst (VectorMaskGen len));
23975   format %{ "vector_mask_gen $len \t! vector mask generator" %}
23976   effect(TEMP temp);
23977   ins_encode %{
23978     __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
23979     __ kmovql($dst$$KRegister, $temp$$Register);
23980   %}
23981   ins_pipe( pipe_slow );
23982 %}
23983 
23984 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23985   predicate(n->in(1)->bottom_type()->isa_vectmask());
23986   match(Set dst (VectorMaskToLong mask));
23987   effect(TEMP dst, KILL cr);
23988   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23989   ins_encode %{
23990     int opcode = this->ideal_Opcode();
23991     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23992     int mask_len = Matcher::vector_length(this, $mask);
23993     int mask_size = mask_len * type2aelembytes(mbt);
23994     int vlen_enc = vector_length_encoding(this, $mask);
23995     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23996                              $dst$$Register, mask_len, mask_size, vlen_enc);
23997   %}
23998   ins_pipe( pipe_slow );
23999 %}
24000 
24001 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24002   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24003   match(Set dst (VectorMaskToLong mask));
24004   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24005   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24006   ins_encode %{
24007     int opcode = this->ideal_Opcode();
24008     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24009     int mask_len = Matcher::vector_length(this, $mask);
24010     int vlen_enc = vector_length_encoding(this, $mask);
24011     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24012                              $dst$$Register, mask_len, mbt, vlen_enc);
24013   %}
24014   ins_pipe( pipe_slow );
24015 %}
24016 
24017 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24018   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24019   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24020   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24021   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24022   ins_encode %{
24023     int opcode = this->ideal_Opcode();
24024     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24025     int mask_len = Matcher::vector_length(this, $mask);
24026     int vlen_enc = vector_length_encoding(this, $mask);
24027     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24028                              $dst$$Register, mask_len, mbt, vlen_enc);
24029   %}
24030   ins_pipe( pipe_slow );
24031 %}
24032 
24033 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24034   predicate(n->in(1)->bottom_type()->isa_vectmask());
24035   match(Set dst (VectorMaskTrueCount mask));
24036   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24037   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24038   ins_encode %{
24039     int opcode = this->ideal_Opcode();
24040     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24041     int mask_len = Matcher::vector_length(this, $mask);
24042     int mask_size = mask_len * type2aelembytes(mbt);
24043     int vlen_enc = vector_length_encoding(this, $mask);
24044     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24045                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24046   %}
24047   ins_pipe( pipe_slow );
24048 %}
24049 
24050 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24051   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24052   match(Set dst (VectorMaskTrueCount mask));
24053   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24054   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24055   ins_encode %{
24056     int opcode = this->ideal_Opcode();
24057     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24058     int mask_len = Matcher::vector_length(this, $mask);
24059     int vlen_enc = vector_length_encoding(this, $mask);
24060     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24061                              $tmp$$Register, mask_len, mbt, vlen_enc);
24062   %}
24063   ins_pipe( pipe_slow );
24064 %}
24065 
24066 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24067   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24068   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24069   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24070   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24071   ins_encode %{
24072     int opcode = this->ideal_Opcode();
24073     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24074     int mask_len = Matcher::vector_length(this, $mask);
24075     int vlen_enc = vector_length_encoding(this, $mask);
24076     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24077                              $tmp$$Register, mask_len, mbt, vlen_enc);
24078   %}
24079   ins_pipe( pipe_slow );
24080 %}
24081 
24082 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24083   predicate(n->in(1)->bottom_type()->isa_vectmask());
24084   match(Set dst (VectorMaskFirstTrue mask));
24085   match(Set dst (VectorMaskLastTrue mask));
24086   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24087   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24088   ins_encode %{
24089     int opcode = this->ideal_Opcode();
24090     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24091     int mask_len = Matcher::vector_length(this, $mask);
24092     int mask_size = mask_len * type2aelembytes(mbt);
24093     int vlen_enc = vector_length_encoding(this, $mask);
24094     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24095                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24096   %}
24097   ins_pipe( pipe_slow );
24098 %}
24099 
24100 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24101   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24102   match(Set dst (VectorMaskFirstTrue mask));
24103   match(Set dst (VectorMaskLastTrue mask));
24104   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24105   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24106   ins_encode %{
24107     int opcode = this->ideal_Opcode();
24108     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24109     int mask_len = Matcher::vector_length(this, $mask);
24110     int vlen_enc = vector_length_encoding(this, $mask);
24111     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24112                              $tmp$$Register, mask_len, mbt, vlen_enc);
24113   %}
24114   ins_pipe( pipe_slow );
24115 %}
24116 
24117 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24118   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24119   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24120   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24121   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24122   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24123   ins_encode %{
24124     int opcode = this->ideal_Opcode();
24125     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24126     int mask_len = Matcher::vector_length(this, $mask);
24127     int vlen_enc = vector_length_encoding(this, $mask);
24128     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24129                              $tmp$$Register, mask_len, mbt, vlen_enc);
24130   %}
24131   ins_pipe( pipe_slow );
24132 %}
24133 
24134 // --------------------------------- Compress/Expand Operations ---------------------------
24135 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24136   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24137   match(Set dst (CompressV src mask));
24138   match(Set dst (ExpandV src mask));
24139   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24140   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24141   ins_encode %{
24142     int opcode = this->ideal_Opcode();
24143     int vlen_enc = vector_length_encoding(this);
24144     BasicType bt  = Matcher::vector_element_basic_type(this);
24145     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24146                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24147   %}
24148   ins_pipe( pipe_slow );
24149 %}
24150 
24151 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24152   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24153   match(Set dst (CompressV src mask));
24154   match(Set dst (ExpandV src mask));
24155   format %{ "vector_compress_expand $dst, $src, $mask" %}
24156   ins_encode %{
24157     int opcode = this->ideal_Opcode();
24158     int vector_len = vector_length_encoding(this);
24159     BasicType bt  = Matcher::vector_element_basic_type(this);
24160     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24161   %}
24162   ins_pipe( pipe_slow );
24163 %}
24164 
24165 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24166   match(Set dst (CompressM mask));
24167   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24168   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24169   ins_encode %{
24170     assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24171     int mask_len = Matcher::vector_length(this);
24172     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24173   %}
24174   ins_pipe( pipe_slow );
24175 %}
24176 
24177 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24178 
24179 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24180   predicate(!VM_Version::supports_gfni());
24181   match(Set dst (ReverseV src));
24182   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24183   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24184   ins_encode %{
24185     int vec_enc = vector_length_encoding(this);
24186     BasicType bt = Matcher::vector_element_basic_type(this);
24187     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24188                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24189   %}
24190   ins_pipe( pipe_slow );
24191 %}
24192 
24193 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24194   predicate(VM_Version::supports_gfni());
24195   match(Set dst (ReverseV src));
24196   effect(TEMP dst, TEMP xtmp);
24197   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24198   ins_encode %{
24199     int vec_enc = vector_length_encoding(this);
24200     BasicType bt  = Matcher::vector_element_basic_type(this);
24201     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24202     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24203                                $xtmp$$XMMRegister);
24204   %}
24205   ins_pipe( pipe_slow );
24206 %}
24207 
24208 instruct vreverse_byte_reg(vec dst, vec src) %{
24209   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24210   match(Set dst (ReverseBytesV src));
24211   effect(TEMP dst);
24212   format %{ "vector_reverse_byte $dst, $src" %}
24213   ins_encode %{
24214     int vec_enc = vector_length_encoding(this);
24215     BasicType bt = Matcher::vector_element_basic_type(this);
24216     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24217   %}
24218   ins_pipe( pipe_slow );
24219 %}
24220 
24221 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24222   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24223   match(Set dst (ReverseBytesV src));
24224   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24225   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24226   ins_encode %{
24227     int vec_enc = vector_length_encoding(this);
24228     BasicType bt = Matcher::vector_element_basic_type(this);
24229     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24230                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24231   %}
24232   ins_pipe( pipe_slow );
24233 %}
24234 
24235 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24236 
24237 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24238   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24239                                               Matcher::vector_length_in_bytes(n->in(1))));
24240   match(Set dst (CountLeadingZerosV src));
24241   format %{ "vector_count_leading_zeros $dst, $src" %}
24242   ins_encode %{
24243      int vlen_enc = vector_length_encoding(this, $src);
24244      BasicType bt = Matcher::vector_element_basic_type(this, $src);
24245      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24246                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24247   %}
24248   ins_pipe( pipe_slow );
24249 %}
24250 
24251 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24252   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24253                                               Matcher::vector_length_in_bytes(n->in(1))));
24254   match(Set dst (CountLeadingZerosV src mask));
24255   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24256   ins_encode %{
24257     int vlen_enc = vector_length_encoding(this, $src);
24258     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24259     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24260     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24261                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24262   %}
24263   ins_pipe( pipe_slow );
24264 %}
24265 
24266 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24267   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24268             VM_Version::supports_avx512cd() &&
24269             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24270   match(Set dst (CountLeadingZerosV src));
24271   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24272   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24273   ins_encode %{
24274     int vlen_enc = vector_length_encoding(this, $src);
24275     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24276     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24277                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24278   %}
24279   ins_pipe( pipe_slow );
24280 %}
24281 
24282 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24283   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24284   match(Set dst (CountLeadingZerosV src));
24285   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24286   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24287   ins_encode %{
24288     int vlen_enc = vector_length_encoding(this, $src);
24289     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24290     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24291                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24292                                        $rtmp$$Register, true, vlen_enc);
24293   %}
24294   ins_pipe( pipe_slow );
24295 %}
24296 
24297 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24298   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24299             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24300   match(Set dst (CountLeadingZerosV src));
24301   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24302   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24303   ins_encode %{
24304     int vlen_enc = vector_length_encoding(this, $src);
24305     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24306     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24307                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24308   %}
24309   ins_pipe( pipe_slow );
24310 %}
24311 
24312 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24313   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24314             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24315   match(Set dst (CountLeadingZerosV src));
24316   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24317   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24318   ins_encode %{
24319     int vlen_enc = vector_length_encoding(this, $src);
24320     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24321     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24322                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24323   %}
24324   ins_pipe( pipe_slow );
24325 %}
24326 
24327 // ---------------------------------- Vector Masked Operations ------------------------------------
24328 
24329 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24330   match(Set dst (AddVB (Binary dst src2) mask));
24331   match(Set dst (AddVS (Binary dst src2) mask));
24332   match(Set dst (AddVI (Binary dst src2) mask));
24333   match(Set dst (AddVL (Binary dst src2) mask));
24334   match(Set dst (AddVF (Binary dst src2) mask));
24335   match(Set dst (AddVD (Binary dst src2) mask));
24336   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24337   ins_encode %{
24338     int vlen_enc = vector_length_encoding(this);
24339     BasicType bt = Matcher::vector_element_basic_type(this);
24340     int opc = this->ideal_Opcode();
24341     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24342                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24343   %}
24344   ins_pipe( pipe_slow );
24345 %}
24346 
24347 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24348   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24349   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24350   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24351   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24352   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24353   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24354   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24355   ins_encode %{
24356     int vlen_enc = vector_length_encoding(this);
24357     BasicType bt = Matcher::vector_element_basic_type(this);
24358     int opc = this->ideal_Opcode();
24359     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24360                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24361   %}
24362   ins_pipe( pipe_slow );
24363 %}
24364 
24365 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24366   match(Set dst (XorV (Binary dst src2) mask));
24367   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24368   ins_encode %{
24369     int vlen_enc = vector_length_encoding(this);
24370     BasicType bt = Matcher::vector_element_basic_type(this);
24371     int opc = this->ideal_Opcode();
24372     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24373                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24374   %}
24375   ins_pipe( pipe_slow );
24376 %}
24377 
24378 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24379   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24380   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24381   ins_encode %{
24382     int vlen_enc = vector_length_encoding(this);
24383     BasicType bt = Matcher::vector_element_basic_type(this);
24384     int opc = this->ideal_Opcode();
24385     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24386                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24387   %}
24388   ins_pipe( pipe_slow );
24389 %}
24390 
24391 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24392   match(Set dst (OrV (Binary dst src2) mask));
24393   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24394   ins_encode %{
24395     int vlen_enc = vector_length_encoding(this);
24396     BasicType bt = Matcher::vector_element_basic_type(this);
24397     int opc = this->ideal_Opcode();
24398     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24399                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24400   %}
24401   ins_pipe( pipe_slow );
24402 %}
24403 
24404 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24405   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24406   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24407   ins_encode %{
24408     int vlen_enc = vector_length_encoding(this);
24409     BasicType bt = Matcher::vector_element_basic_type(this);
24410     int opc = this->ideal_Opcode();
24411     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24412                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24413   %}
24414   ins_pipe( pipe_slow );
24415 %}
24416 
24417 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24418   match(Set dst (AndV (Binary dst src2) mask));
24419   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24420   ins_encode %{
24421     int vlen_enc = vector_length_encoding(this);
24422     BasicType bt = Matcher::vector_element_basic_type(this);
24423     int opc = this->ideal_Opcode();
24424     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24425                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24426   %}
24427   ins_pipe( pipe_slow );
24428 %}
24429 
24430 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24431   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24432   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24433   ins_encode %{
24434     int vlen_enc = vector_length_encoding(this);
24435     BasicType bt = Matcher::vector_element_basic_type(this);
24436     int opc = this->ideal_Opcode();
24437     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24438                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24439   %}
24440   ins_pipe( pipe_slow );
24441 %}
24442 
24443 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24444   match(Set dst (SubVB (Binary dst src2) mask));
24445   match(Set dst (SubVS (Binary dst src2) mask));
24446   match(Set dst (SubVI (Binary dst src2) mask));
24447   match(Set dst (SubVL (Binary dst src2) mask));
24448   match(Set dst (SubVF (Binary dst src2) mask));
24449   match(Set dst (SubVD (Binary dst src2) mask));
24450   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24451   ins_encode %{
24452     int vlen_enc = vector_length_encoding(this);
24453     BasicType bt = Matcher::vector_element_basic_type(this);
24454     int opc = this->ideal_Opcode();
24455     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24456                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24457   %}
24458   ins_pipe( pipe_slow );
24459 %}
24460 
24461 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24462   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24463   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24464   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24465   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24466   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24467   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24468   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24469   ins_encode %{
24470     int vlen_enc = vector_length_encoding(this);
24471     BasicType bt = Matcher::vector_element_basic_type(this);
24472     int opc = this->ideal_Opcode();
24473     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24474                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24475   %}
24476   ins_pipe( pipe_slow );
24477 %}
24478 
24479 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24480   match(Set dst (MulVS (Binary dst src2) mask));
24481   match(Set dst (MulVI (Binary dst src2) mask));
24482   match(Set dst (MulVL (Binary dst src2) mask));
24483   match(Set dst (MulVF (Binary dst src2) mask));
24484   match(Set dst (MulVD (Binary dst src2) mask));
24485   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24486   ins_encode %{
24487     int vlen_enc = vector_length_encoding(this);
24488     BasicType bt = Matcher::vector_element_basic_type(this);
24489     int opc = this->ideal_Opcode();
24490     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24491                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24492   %}
24493   ins_pipe( pipe_slow );
24494 %}
24495 
24496 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24497   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24498   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24499   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24500   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24501   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24502   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24503   ins_encode %{
24504     int vlen_enc = vector_length_encoding(this);
24505     BasicType bt = Matcher::vector_element_basic_type(this);
24506     int opc = this->ideal_Opcode();
24507     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24508                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24509   %}
24510   ins_pipe( pipe_slow );
24511 %}
24512 
24513 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24514   match(Set dst (SqrtVF dst mask));
24515   match(Set dst (SqrtVD dst mask));
24516   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24517   ins_encode %{
24518     int vlen_enc = vector_length_encoding(this);
24519     BasicType bt = Matcher::vector_element_basic_type(this);
24520     int opc = this->ideal_Opcode();
24521     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24522                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24523   %}
24524   ins_pipe( pipe_slow );
24525 %}
24526 
24527 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24528   match(Set dst (DivVF (Binary dst src2) mask));
24529   match(Set dst (DivVD (Binary dst src2) mask));
24530   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24531   ins_encode %{
24532     int vlen_enc = vector_length_encoding(this);
24533     BasicType bt = Matcher::vector_element_basic_type(this);
24534     int opc = this->ideal_Opcode();
24535     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24536                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24537   %}
24538   ins_pipe( pipe_slow );
24539 %}
24540 
24541 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24542   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24543   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24544   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24545   ins_encode %{
24546     int vlen_enc = vector_length_encoding(this);
24547     BasicType bt = Matcher::vector_element_basic_type(this);
24548     int opc = this->ideal_Opcode();
24549     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24550                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24551   %}
24552   ins_pipe( pipe_slow );
24553 %}
24554 
24555 
24556 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24557   match(Set dst (RotateLeftV (Binary dst shift) mask));
24558   match(Set dst (RotateRightV (Binary dst shift) mask));
24559   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24560   ins_encode %{
24561     int vlen_enc = vector_length_encoding(this);
24562     BasicType bt = Matcher::vector_element_basic_type(this);
24563     int opc = this->ideal_Opcode();
24564     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24565                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24566   %}
24567   ins_pipe( pipe_slow );
24568 %}
24569 
24570 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24571   match(Set dst (RotateLeftV (Binary dst src2) mask));
24572   match(Set dst (RotateRightV (Binary dst src2) mask));
24573   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24574   ins_encode %{
24575     int vlen_enc = vector_length_encoding(this);
24576     BasicType bt = Matcher::vector_element_basic_type(this);
24577     int opc = this->ideal_Opcode();
24578     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24579                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24580   %}
24581   ins_pipe( pipe_slow );
24582 %}
24583 
24584 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24585   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24586   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24587   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24588   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24589   ins_encode %{
24590     int vlen_enc = vector_length_encoding(this);
24591     BasicType bt = Matcher::vector_element_basic_type(this);
24592     int opc = this->ideal_Opcode();
24593     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24594                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24595   %}
24596   ins_pipe( pipe_slow );
24597 %}
24598 
24599 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24600   predicate(!n->as_ShiftV()->is_var_shift());
24601   match(Set dst (LShiftVS (Binary dst src2) mask));
24602   match(Set dst (LShiftVI (Binary dst src2) mask));
24603   match(Set dst (LShiftVL (Binary dst src2) mask));
24604   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24605   ins_encode %{
24606     int vlen_enc = vector_length_encoding(this);
24607     BasicType bt = Matcher::vector_element_basic_type(this);
24608     int opc = this->ideal_Opcode();
24609     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24610                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24611   %}
24612   ins_pipe( pipe_slow );
24613 %}
24614 
24615 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24616   predicate(n->as_ShiftV()->is_var_shift());
24617   match(Set dst (LShiftVS (Binary dst src2) mask));
24618   match(Set dst (LShiftVI (Binary dst src2) mask));
24619   match(Set dst (LShiftVL (Binary dst src2) mask));
24620   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24621   ins_encode %{
24622     int vlen_enc = vector_length_encoding(this);
24623     BasicType bt = Matcher::vector_element_basic_type(this);
24624     int opc = this->ideal_Opcode();
24625     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24626                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24627   %}
24628   ins_pipe( pipe_slow );
24629 %}
24630 
24631 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24632   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24633   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24634   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24635   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24636   ins_encode %{
24637     int vlen_enc = vector_length_encoding(this);
24638     BasicType bt = Matcher::vector_element_basic_type(this);
24639     int opc = this->ideal_Opcode();
24640     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24641                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24642   %}
24643   ins_pipe( pipe_slow );
24644 %}
24645 
24646 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24647   predicate(!n->as_ShiftV()->is_var_shift());
24648   match(Set dst (RShiftVS (Binary dst src2) mask));
24649   match(Set dst (RShiftVI (Binary dst src2) mask));
24650   match(Set dst (RShiftVL (Binary dst src2) mask));
24651   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24652   ins_encode %{
24653     int vlen_enc = vector_length_encoding(this);
24654     BasicType bt = Matcher::vector_element_basic_type(this);
24655     int opc = this->ideal_Opcode();
24656     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24657                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24658   %}
24659   ins_pipe( pipe_slow );
24660 %}
24661 
24662 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24663   predicate(n->as_ShiftV()->is_var_shift());
24664   match(Set dst (RShiftVS (Binary dst src2) mask));
24665   match(Set dst (RShiftVI (Binary dst src2) mask));
24666   match(Set dst (RShiftVL (Binary dst src2) mask));
24667   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24668   ins_encode %{
24669     int vlen_enc = vector_length_encoding(this);
24670     BasicType bt = Matcher::vector_element_basic_type(this);
24671     int opc = this->ideal_Opcode();
24672     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24673                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24674   %}
24675   ins_pipe( pipe_slow );
24676 %}
24677 
24678 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24679   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24680   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24681   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24682   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24683   ins_encode %{
24684     int vlen_enc = vector_length_encoding(this);
24685     BasicType bt = Matcher::vector_element_basic_type(this);
24686     int opc = this->ideal_Opcode();
24687     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24688                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24689   %}
24690   ins_pipe( pipe_slow );
24691 %}
24692 
24693 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24694   predicate(!n->as_ShiftV()->is_var_shift());
24695   match(Set dst (URShiftVS (Binary dst src2) mask));
24696   match(Set dst (URShiftVI (Binary dst src2) mask));
24697   match(Set dst (URShiftVL (Binary dst src2) mask));
24698   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24699   ins_encode %{
24700     int vlen_enc = vector_length_encoding(this);
24701     BasicType bt = Matcher::vector_element_basic_type(this);
24702     int opc = this->ideal_Opcode();
24703     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24704                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24705   %}
24706   ins_pipe( pipe_slow );
24707 %}
24708 
24709 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24710   predicate(n->as_ShiftV()->is_var_shift());
24711   match(Set dst (URShiftVS (Binary dst src2) mask));
24712   match(Set dst (URShiftVI (Binary dst src2) mask));
24713   match(Set dst (URShiftVL (Binary dst src2) mask));
24714   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24715   ins_encode %{
24716     int vlen_enc = vector_length_encoding(this);
24717     BasicType bt = Matcher::vector_element_basic_type(this);
24718     int opc = this->ideal_Opcode();
24719     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24720                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24721   %}
24722   ins_pipe( pipe_slow );
24723 %}
24724 
24725 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24726   match(Set dst (MaxV (Binary dst src2) mask));
24727   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24728   ins_encode %{
24729     int vlen_enc = vector_length_encoding(this);
24730     BasicType bt = Matcher::vector_element_basic_type(this);
24731     int opc = this->ideal_Opcode();
24732     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24733                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24734   %}
24735   ins_pipe( pipe_slow );
24736 %}
24737 
24738 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24739   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24740   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24741   ins_encode %{
24742     int vlen_enc = vector_length_encoding(this);
24743     BasicType bt = Matcher::vector_element_basic_type(this);
24744     int opc = this->ideal_Opcode();
24745     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24746                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24747   %}
24748   ins_pipe( pipe_slow );
24749 %}
24750 
24751 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24752   match(Set dst (MinV (Binary dst src2) mask));
24753   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24754   ins_encode %{
24755     int vlen_enc = vector_length_encoding(this);
24756     BasicType bt = Matcher::vector_element_basic_type(this);
24757     int opc = this->ideal_Opcode();
24758     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24759                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24760   %}
24761   ins_pipe( pipe_slow );
24762 %}
24763 
24764 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24765   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24766   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24767   ins_encode %{
24768     int vlen_enc = vector_length_encoding(this);
24769     BasicType bt = Matcher::vector_element_basic_type(this);
24770     int opc = this->ideal_Opcode();
24771     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24772                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24773   %}
24774   ins_pipe( pipe_slow );
24775 %}
24776 
24777 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24778   match(Set dst (VectorRearrange (Binary dst src2) mask));
24779   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24780   ins_encode %{
24781     int vlen_enc = vector_length_encoding(this);
24782     BasicType bt = Matcher::vector_element_basic_type(this);
24783     int opc = this->ideal_Opcode();
24784     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24785                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24786   %}
24787   ins_pipe( pipe_slow );
24788 %}
24789 
24790 instruct vabs_masked(vec dst, kReg mask) %{
24791   match(Set dst (AbsVB dst mask));
24792   match(Set dst (AbsVS dst mask));
24793   match(Set dst (AbsVI dst mask));
24794   match(Set dst (AbsVL dst mask));
24795   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24796   ins_encode %{
24797     int vlen_enc = vector_length_encoding(this);
24798     BasicType bt = Matcher::vector_element_basic_type(this);
24799     int opc = this->ideal_Opcode();
24800     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24801                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24802   %}
24803   ins_pipe( pipe_slow );
24804 %}
24805 
24806 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24807   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24808   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24809   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24810   ins_encode %{
24811     assert(UseFMA, "Needs FMA instructions support.");
24812     int vlen_enc = vector_length_encoding(this);
24813     BasicType bt = Matcher::vector_element_basic_type(this);
24814     int opc = this->ideal_Opcode();
24815     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24816                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24817   %}
24818   ins_pipe( pipe_slow );
24819 %}
24820 
24821 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24822   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24823   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24824   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24825   ins_encode %{
24826     assert(UseFMA, "Needs FMA instructions support.");
24827     int vlen_enc = vector_length_encoding(this);
24828     BasicType bt = Matcher::vector_element_basic_type(this);
24829     int opc = this->ideal_Opcode();
24830     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24831                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24832   %}
24833   ins_pipe( pipe_slow );
24834 %}
24835 
24836 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24837   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24838   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24839   ins_encode %{
24840     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24841     int vlen_enc = vector_length_encoding(this, $src1);
24842     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24843 
24844     // Comparison i
24845     switch (src1_elem_bt) {
24846       case T_BYTE: {
24847         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24848         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24849         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24850         break;
24851       }
24852       case T_SHORT: {
24853         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24854         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24855         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24856         break;
24857       }
24858       case T_INT: {
24859         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24860         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24861         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24862         break;
24863       }
24864       case T_LONG: {
24865         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24866         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24867         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24868         break;
24869       }
24870       case T_FLOAT: {
24871         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24872         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24873         break;
24874       }
24875       case T_DOUBLE: {
24876         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24877         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24878         break;
24879       }
24880       default: assert(false, "%s", type2name(src1_elem_bt)); break;
24881     }
24882   %}
24883   ins_pipe( pipe_slow );
24884 %}
24885 
24886 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24887   predicate(Matcher::vector_length(n) <= 32);
24888   match(Set dst (MaskAll src));
24889   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24890   ins_encode %{
24891     int mask_len = Matcher::vector_length(this);
24892     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24893   %}
24894   ins_pipe( pipe_slow );
24895 %}
24896 
24897 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24898   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24899   match(Set dst (XorVMask src (MaskAll cnt)));
24900   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24901   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24902   ins_encode %{
24903     uint masklen = Matcher::vector_length(this);
24904     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24905   %}
24906   ins_pipe( pipe_slow );
24907 %}
24908 
24909 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24910   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24911             (Matcher::vector_length(n) == 16) ||
24912             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24913   match(Set dst (XorVMask src (MaskAll cnt)));
24914   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24915   ins_encode %{
24916     uint masklen = Matcher::vector_length(this);
24917     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24918   %}
24919   ins_pipe( pipe_slow );
24920 %}
24921 
24922 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24923   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24924   match(Set dst (VectorLongToMask src));
24925   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24926   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24927   ins_encode %{
24928     int mask_len = Matcher::vector_length(this);
24929     int vec_enc  = vector_length_encoding(mask_len);
24930     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24931                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24932   %}
24933   ins_pipe( pipe_slow );
24934 %}
24935 
24936 
24937 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24938   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24939   match(Set dst (VectorLongToMask src));
24940   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24941   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24942   ins_encode %{
24943     int mask_len = Matcher::vector_length(this);
24944     assert(mask_len <= 32, "invalid mask length");
24945     int vec_enc  = vector_length_encoding(mask_len);
24946     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24947                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24948   %}
24949   ins_pipe( pipe_slow );
24950 %}
24951 
24952 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24953   predicate(n->bottom_type()->isa_vectmask());
24954   match(Set dst (VectorLongToMask src));
24955   format %{ "long_to_mask_evex $dst, $src\t!" %}
24956   ins_encode %{
24957     __ kmov($dst$$KRegister, $src$$Register);
24958   %}
24959   ins_pipe( pipe_slow );
24960 %}
24961 
24962 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24963   match(Set dst (AndVMask src1 src2));
24964   match(Set dst (OrVMask src1 src2));
24965   match(Set dst (XorVMask src1 src2));
24966   effect(TEMP kscratch);
24967   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24968   ins_encode %{
24969     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24970     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24971     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24972     uint masklen = Matcher::vector_length(this);
24973     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24974     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24975   %}
24976   ins_pipe( pipe_slow );
24977 %}
24978 
24979 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24980   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24981   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24982   ins_encode %{
24983     int vlen_enc = vector_length_encoding(this);
24984     BasicType bt = Matcher::vector_element_basic_type(this);
24985     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24986                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24987   %}
24988   ins_pipe( pipe_slow );
24989 %}
24990 
24991 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24992   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24993   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24994   ins_encode %{
24995     int vlen_enc = vector_length_encoding(this);
24996     BasicType bt = Matcher::vector_element_basic_type(this);
24997     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24998                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24999   %}
25000   ins_pipe( pipe_slow );
25001 %}
25002 
25003 instruct castMM(kReg dst)
25004 %{
25005   match(Set dst (CastVV dst));
25006 
25007   size(0);
25008   format %{ "# castVV of $dst" %}
25009   ins_encode(/* empty encoding */);
25010   ins_cost(0);
25011   ins_pipe(empty);
25012 %}
25013 
25014 instruct castVV(vec dst)
25015 %{
25016   match(Set dst (CastVV dst));
25017 
25018   size(0);
25019   format %{ "# castVV of $dst" %}
25020   ins_encode(/* empty encoding */);
25021   ins_cost(0);
25022   ins_pipe(empty);
25023 %}
25024 
25025 instruct castVVLeg(legVec dst)
25026 %{
25027   match(Set dst (CastVV dst));
25028 
25029   size(0);
25030   format %{ "# castVV of $dst" %}
25031   ins_encode(/* empty encoding */);
25032   ins_cost(0);
25033   ins_pipe(empty);
25034 %}
25035 
25036 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25037 %{
25038   match(Set dst (IsInfiniteF src));
25039   effect(TEMP ktmp, KILL cr);
25040   format %{ "float_class_check $dst, $src" %}
25041   ins_encode %{
25042     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25043     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25044   %}
25045   ins_pipe(pipe_slow);
25046 %}
25047 
25048 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25049 %{
25050   match(Set dst (IsInfiniteD src));
25051   effect(TEMP ktmp, KILL cr);
25052   format %{ "double_class_check $dst, $src" %}
25053   ins_encode %{
25054     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25055     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25056   %}
25057   ins_pipe(pipe_slow);
25058 %}
25059 
25060 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25061 %{
25062   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25063             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25064   match(Set dst (SaturatingAddV src1 src2));
25065   match(Set dst (SaturatingSubV src1 src2));
25066   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25067   ins_encode %{
25068     int vlen_enc = vector_length_encoding(this);
25069     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25070     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25071                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25072   %}
25073   ins_pipe(pipe_slow);
25074 %}
25075 
25076 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25077 %{
25078   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25079             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25080   match(Set dst (SaturatingAddV src1 src2));
25081   match(Set dst (SaturatingSubV src1 src2));
25082   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25083   ins_encode %{
25084     int vlen_enc = vector_length_encoding(this);
25085     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25086     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25087                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25088   %}
25089   ins_pipe(pipe_slow);
25090 %}
25091 
25092 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25093 %{
25094   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25095             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25096             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25097   match(Set dst (SaturatingAddV src1 src2));
25098   match(Set dst (SaturatingSubV src1 src2));
25099   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25100   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25101   ins_encode %{
25102     int vlen_enc = vector_length_encoding(this);
25103     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25104     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25105                                         $src1$$XMMRegister, $src2$$XMMRegister,
25106                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25107                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25108   %}
25109   ins_pipe(pipe_slow);
25110 %}
25111 
25112 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25113 %{
25114   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25115             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25116             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25117   match(Set dst (SaturatingAddV src1 src2));
25118   match(Set dst (SaturatingSubV src1 src2));
25119   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25120   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25121   ins_encode %{
25122     int vlen_enc = vector_length_encoding(this);
25123     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25124     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25125                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25126                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25127   %}
25128   ins_pipe(pipe_slow);
25129 %}
25130 
25131 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25132 %{
25133   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25134             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25135             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25136   match(Set dst (SaturatingAddV src1 src2));
25137   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25138   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25139   ins_encode %{
25140     int vlen_enc = vector_length_encoding(this);
25141     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25142     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25143                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25144   %}
25145   ins_pipe(pipe_slow);
25146 %}
25147 
25148 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25149 %{
25150   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25151             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25152             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25153   match(Set dst (SaturatingAddV src1 src2));
25154   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25155   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25156   ins_encode %{
25157     int vlen_enc = vector_length_encoding(this);
25158     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25159     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25160                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25161   %}
25162   ins_pipe(pipe_slow);
25163 %}
25164 
25165 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25166 %{
25167   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25168             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25169             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25170   match(Set dst (SaturatingSubV src1 src2));
25171   effect(TEMP ktmp);
25172   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25173   ins_encode %{
25174     int vlen_enc = vector_length_encoding(this);
25175     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25176     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25177                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25178   %}
25179   ins_pipe(pipe_slow);
25180 %}
25181 
25182 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25183 %{
25184   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25185             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25186             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25187   match(Set dst (SaturatingSubV src1 src2));
25188   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25189   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25190   ins_encode %{
25191     int vlen_enc = vector_length_encoding(this);
25192     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25193     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25194                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25195   %}
25196   ins_pipe(pipe_slow);
25197 %}
25198 
25199 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25200 %{
25201   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25202             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25203   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25204   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25205   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25206   ins_encode %{
25207     int vlen_enc = vector_length_encoding(this);
25208     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25209     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25210                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25211   %}
25212   ins_pipe(pipe_slow);
25213 %}
25214 
25215 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25216 %{
25217   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25218             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25219   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25220   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25221   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25222   ins_encode %{
25223     int vlen_enc = vector_length_encoding(this);
25224     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25225     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25226                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25227   %}
25228   ins_pipe(pipe_slow);
25229 %}
25230 
25231 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25232   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25233             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25234   match(Set dst (SaturatingAddV (Binary dst src) mask));
25235   match(Set dst (SaturatingSubV (Binary dst src) mask));
25236   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25237   ins_encode %{
25238     int vlen_enc = vector_length_encoding(this);
25239     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25240     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25241                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25242   %}
25243   ins_pipe( pipe_slow );
25244 %}
25245 
25246 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25247   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25248             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25249   match(Set dst (SaturatingAddV (Binary dst src) mask));
25250   match(Set dst (SaturatingSubV (Binary dst src) mask));
25251   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25252   ins_encode %{
25253     int vlen_enc = vector_length_encoding(this);
25254     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25255     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25256                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25257   %}
25258   ins_pipe( pipe_slow );
25259 %}
25260 
25261 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25262   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25263             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25264   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25265   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25266   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25267   ins_encode %{
25268     int vlen_enc = vector_length_encoding(this);
25269     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25270     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25271                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25272   %}
25273   ins_pipe( pipe_slow );
25274 %}
25275 
25276 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25277   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25278             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25279   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25280   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25281   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25282   ins_encode %{
25283     int vlen_enc = vector_length_encoding(this);
25284     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25285     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25286                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25287   %}
25288   ins_pipe( pipe_slow );
25289 %}
25290 
25291 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25292 %{
25293   match(Set index (SelectFromTwoVector (Binary index src1) src2));
25294   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25295   ins_encode %{
25296     int vlen_enc = vector_length_encoding(this);
25297     BasicType bt = Matcher::vector_element_basic_type(this);
25298     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25299   %}
25300   ins_pipe(pipe_slow);
25301 %}
25302 
25303 instruct reinterpretS2HF(regF dst, rRegI src)
25304 %{
25305   match(Set dst (ReinterpretS2HF src));
25306   format %{ "vmovw $dst, $src" %}
25307   ins_encode %{
25308     __ vmovw($dst$$XMMRegister, $src$$Register);
25309   %}
25310   ins_pipe(pipe_slow);
25311 %}
25312 
25313 instruct reinterpretHF2S(rRegI dst, regF src)
25314 %{
25315   match(Set dst (ReinterpretHF2S src));
25316   format %{ "vmovw $dst, $src" %}
25317   ins_encode %{
25318     __ vmovw($dst$$Register, $src$$XMMRegister);
25319   %}
25320   ins_pipe(pipe_slow);
25321 %}
25322 
25323 instruct convF2HFAndS2HF(regF dst, regF src)
25324 %{
25325   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25326   format %{ "convF2HFAndS2HF $dst, $src" %}
25327   ins_encode %{
25328     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25329   %}
25330   ins_pipe(pipe_slow);
25331 %}
25332 
25333 instruct convHF2SAndHF2F(regF dst, regF src)
25334 %{
25335   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25336   format %{ "convHF2SAndHF2F $dst, $src" %}
25337   ins_encode %{
25338     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25339   %}
25340   ins_pipe(pipe_slow);
25341 %}
25342 
25343 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25344 %{
25345   match(Set dst (SqrtHF src));
25346   format %{ "scalar_sqrt_fp16 $dst, $src" %}
25347   ins_encode %{
25348     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25349   %}
25350   ins_pipe(pipe_slow);
25351 %}
25352 
25353 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25354 %{
25355   match(Set dst (AddHF src1 src2));
25356   match(Set dst (DivHF src1 src2));
25357   match(Set dst (MulHF src1 src2));
25358   match(Set dst (SubHF src1 src2));
25359   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25360   ins_encode %{
25361     int opcode = this->ideal_Opcode();
25362     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25363   %}
25364   ins_pipe(pipe_slow);
25365 %}
25366 
25367 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25368 %{
25369   predicate(VM_Version::supports_avx10_2());
25370   match(Set dst (MaxHF src1 src2));
25371   match(Set dst (MinHF src1 src2));
25372   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25373   ins_encode %{
25374     int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25375     __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25376   %}
25377   ins_pipe( pipe_slow );
25378 %}
25379 
25380 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25381 %{
25382   predicate(!VM_Version::supports_avx10_2());
25383   match(Set dst (MaxHF src1 src2));
25384   match(Set dst (MinHF src1 src2));
25385   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25386   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25387   ins_encode %{
25388     int opcode = this->ideal_Opcode();
25389     __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25390                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25391   %}
25392   ins_pipe( pipe_slow );
25393 %}
25394 
25395 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25396 %{
25397   match(Set dst (FmaHF  src2 (Binary dst src1)));
25398   effect(DEF dst);
25399   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25400   ins_encode %{
25401     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25402   %}
25403   ins_pipe( pipe_slow );
25404 %}
25405 
25406 
25407 instruct vector_sqrt_HF_reg(vec dst, vec src)
25408 %{
25409   match(Set dst (SqrtVHF src));
25410   format %{ "vector_sqrt_fp16 $dst, $src" %}
25411   ins_encode %{
25412     int vlen_enc = vector_length_encoding(this);
25413     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25414   %}
25415   ins_pipe(pipe_slow);
25416 %}
25417 
25418 instruct vector_sqrt_HF_mem(vec dst, memory src)
25419 %{
25420   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25421   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25422   ins_encode %{
25423     int vlen_enc = vector_length_encoding(this);
25424     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25425   %}
25426   ins_pipe(pipe_slow);
25427 %}
25428 
25429 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25430 %{
25431   match(Set dst (AddVHF src1 src2));
25432   match(Set dst (DivVHF src1 src2));
25433   match(Set dst (MulVHF src1 src2));
25434   match(Set dst (SubVHF src1 src2));
25435   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25436   ins_encode %{
25437     int vlen_enc = vector_length_encoding(this);
25438     int opcode = this->ideal_Opcode();
25439     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25440   %}
25441   ins_pipe(pipe_slow);
25442 %}
25443 
25444 
25445 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25446 %{
25447   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25448   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25449   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25450   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25451   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25452   ins_encode %{
25453     int vlen_enc = vector_length_encoding(this);
25454     int opcode = this->ideal_Opcode();
25455     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25456   %}
25457   ins_pipe(pipe_slow);
25458 %}
25459 
25460 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25461 %{
25462   match(Set dst (FmaVHF src2 (Binary dst src1)));
25463   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25464   ins_encode %{
25465     int vlen_enc = vector_length_encoding(this);
25466     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25467   %}
25468   ins_pipe( pipe_slow );
25469 %}
25470 
25471 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25472 %{
25473   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25474   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25475   ins_encode %{
25476     int vlen_enc = vector_length_encoding(this);
25477     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25478   %}
25479   ins_pipe( pipe_slow );
25480 %}
25481 
25482 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25483 %{
25484   predicate(VM_Version::supports_avx10_2());
25485   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25486   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25487   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25488   ins_encode %{
25489     int vlen_enc = vector_length_encoding(this);
25490     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25491     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25492   %}
25493   ins_pipe( pipe_slow );
25494 %}
25495 
25496 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25497 %{
25498   predicate(VM_Version::supports_avx10_2());
25499   match(Set dst (MinVHF src1 src2));
25500   match(Set dst (MaxVHF src1 src2));
25501   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25502   ins_encode %{
25503     int vlen_enc = vector_length_encoding(this);
25504     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25505     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25506   %}
25507   ins_pipe( pipe_slow );
25508 %}
25509 
25510 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25511 %{
25512   predicate(!VM_Version::supports_avx10_2());
25513   match(Set dst (MinVHF src1 src2));
25514   match(Set dst (MaxVHF src1 src2));
25515   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25516   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25517   ins_encode %{
25518     int vlen_enc = vector_length_encoding(this);
25519     int opcode = this->ideal_Opcode();
25520     __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25521                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25522   %}
25523   ins_pipe( pipe_slow );
25524 %}
25525 
25526 //----------PEEPHOLE RULES-----------------------------------------------------
25527 // These must follow all instruction definitions as they use the names
25528 // defined in the instructions definitions.
25529 //
25530 // peeppredicate ( rule_predicate );
25531 // // the predicate unless which the peephole rule will be ignored
25532 //
25533 // peepmatch ( root_instr_name [preceding_instruction]* );
25534 //
25535 // peepprocedure ( procedure_name );
25536 // // provide a procedure name to perform the optimization, the procedure should
25537 // // reside in the architecture dependent peephole file, the method has the
25538 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25539 // // with the arguments being the basic block, the current node index inside the
25540 // // block, the register allocator, the functions upon invoked return a new node
25541 // // defined in peepreplace, and the rules of the nodes appearing in the
25542 // // corresponding peepmatch, the function return true if successful, else
25543 // // return false
25544 //
25545 // peepconstraint %{
25546 // (instruction_number.operand_name relational_op instruction_number.operand_name
25547 //  [, ...] );
25548 // // instruction numbers are zero-based using left to right order in peepmatch
25549 //
25550 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25551 // // provide an instruction_number.operand_name for each operand that appears
25552 // // in the replacement instruction's match rule
25553 //
25554 // ---------VM FLAGS---------------------------------------------------------
25555 //
25556 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25557 //
25558 // Each peephole rule is given an identifying number starting with zero and
25559 // increasing by one in the order seen by the parser.  An individual peephole
25560 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25561 // on the command-line.
25562 //
25563 // ---------CURRENT LIMITATIONS----------------------------------------------
25564 //
25565 // Only transformations inside a basic block (do we need more for peephole)
25566 //
25567 // ---------EXAMPLE----------------------------------------------------------
25568 //
25569 // // pertinent parts of existing instructions in architecture description
25570 // instruct movI(rRegI dst, rRegI src)
25571 // %{
25572 //   match(Set dst (CopyI src));
25573 // %}
25574 //
25575 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25576 // %{
25577 //   match(Set dst (AddI dst src));
25578 //   effect(KILL cr);
25579 // %}
25580 //
25581 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25582 // %{
25583 //   match(Set dst (AddI dst src));
25584 // %}
25585 //
25586 // 1. Simple replacement
25587 // - Only match adjacent instructions in same basic block
25588 // - Only equality constraints
25589 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25590 // - Only one replacement instruction
25591 //
25592 // // Change (inc mov) to lea
25593 // peephole %{
25594 //   // lea should only be emitted when beneficial
25595 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25596 //   // increment preceded by register-register move
25597 //   peepmatch ( incI_rReg movI );
25598 //   // require that the destination register of the increment
25599 //   // match the destination register of the move
25600 //   peepconstraint ( 0.dst == 1.dst );
25601 //   // construct a replacement instruction that sets
25602 //   // the destination to ( move's source register + one )
25603 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25604 // %}
25605 //
25606 // 2. Procedural replacement
25607 // - More flexible finding relevent nodes
25608 // - More flexible constraints
25609 // - More flexible transformations
25610 // - May utilise architecture-dependent API more effectively
25611 // - Currently only one replacement instruction due to adlc parsing capabilities
25612 //
25613 // // Change (inc mov) to lea
25614 // peephole %{
25615 //   // lea should only be emitted when beneficial
25616 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25617 //   // the rule numbers of these nodes inside are passed into the function below
25618 //   peepmatch ( incI_rReg movI );
25619 //   // the method that takes the responsibility of transformation
25620 //   peepprocedure ( inc_mov_to_lea );
25621 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25622 //   // node is passed into the function above
25623 //   peepreplace ( leaI_rReg_immI() );
25624 // %}
25625 
25626 // These instructions is not matched by the matcher but used by the peephole
25627 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25628 %{
25629   predicate(false);
25630   match(Set dst (AddI src1 src2));
25631   format %{ "leal    $dst, [$src1 + $src2]" %}
25632   ins_encode %{
25633     Register dst = $dst$$Register;
25634     Register src1 = $src1$$Register;
25635     Register src2 = $src2$$Register;
25636     if (src1 != rbp && src1 != r13) {
25637       __ leal(dst, Address(src1, src2, Address::times_1));
25638     } else {
25639       assert(src2 != rbp && src2 != r13, "");
25640       __ leal(dst, Address(src2, src1, Address::times_1));
25641     }
25642   %}
25643   ins_pipe(ialu_reg_reg);
25644 %}
25645 
25646 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25647 %{
25648   predicate(false);
25649   match(Set dst (AddI src1 src2));
25650   format %{ "leal    $dst, [$src1 + $src2]" %}
25651   ins_encode %{
25652     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25653   %}
25654   ins_pipe(ialu_reg_reg);
25655 %}
25656 
25657 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25658 %{
25659   predicate(false);
25660   match(Set dst (LShiftI src shift));
25661   format %{ "leal    $dst, [$src << $shift]" %}
25662   ins_encode %{
25663     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25664     Register src = $src$$Register;
25665     if (scale == Address::times_2 && src != rbp && src != r13) {
25666       __ leal($dst$$Register, Address(src, src, Address::times_1));
25667     } else {
25668       __ leal($dst$$Register, Address(noreg, src, scale));
25669     }
25670   %}
25671   ins_pipe(ialu_reg_reg);
25672 %}
25673 
25674 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25675 %{
25676   predicate(false);
25677   match(Set dst (AddL src1 src2));
25678   format %{ "leaq    $dst, [$src1 + $src2]" %}
25679   ins_encode %{
25680     Register dst = $dst$$Register;
25681     Register src1 = $src1$$Register;
25682     Register src2 = $src2$$Register;
25683     if (src1 != rbp && src1 != r13) {
25684       __ leaq(dst, Address(src1, src2, Address::times_1));
25685     } else {
25686       assert(src2 != rbp && src2 != r13, "");
25687       __ leaq(dst, Address(src2, src1, Address::times_1));
25688     }
25689   %}
25690   ins_pipe(ialu_reg_reg);
25691 %}
25692 
25693 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25694 %{
25695   predicate(false);
25696   match(Set dst (AddL src1 src2));
25697   format %{ "leaq    $dst, [$src1 + $src2]" %}
25698   ins_encode %{
25699     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25700   %}
25701   ins_pipe(ialu_reg_reg);
25702 %}
25703 
25704 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25705 %{
25706   predicate(false);
25707   match(Set dst (LShiftL src shift));
25708   format %{ "leaq    $dst, [$src << $shift]" %}
25709   ins_encode %{
25710     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25711     Register src = $src$$Register;
25712     if (scale == Address::times_2 && src != rbp && src != r13) {
25713       __ leaq($dst$$Register, Address(src, src, Address::times_1));
25714     } else {
25715       __ leaq($dst$$Register, Address(noreg, src, scale));
25716     }
25717   %}
25718   ins_pipe(ialu_reg_reg);
25719 %}
25720 
25721 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25722 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25723 // processors with at least partial ALU support for lea
25724 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25725 // beneficial for processors with full ALU support
25726 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25727 
25728 peephole
25729 %{
25730   peeppredicate(VM_Version::supports_fast_2op_lea());
25731   peepmatch (addI_rReg);
25732   peepprocedure (lea_coalesce_reg);
25733   peepreplace (leaI_rReg_rReg_peep());
25734 %}
25735 
25736 peephole
25737 %{
25738   peeppredicate(VM_Version::supports_fast_2op_lea());
25739   peepmatch (addI_rReg_imm);
25740   peepprocedure (lea_coalesce_imm);
25741   peepreplace (leaI_rReg_immI_peep());
25742 %}
25743 
25744 peephole
25745 %{
25746   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25747                 VM_Version::is_intel_cascade_lake());
25748   peepmatch (incI_rReg);
25749   peepprocedure (lea_coalesce_imm);
25750   peepreplace (leaI_rReg_immI_peep());
25751 %}
25752 
25753 peephole
25754 %{
25755   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25756                 VM_Version::is_intel_cascade_lake());
25757   peepmatch (decI_rReg);
25758   peepprocedure (lea_coalesce_imm);
25759   peepreplace (leaI_rReg_immI_peep());
25760 %}
25761 
25762 peephole
25763 %{
25764   peeppredicate(VM_Version::supports_fast_2op_lea());
25765   peepmatch (salI_rReg_immI2);
25766   peepprocedure (lea_coalesce_imm);
25767   peepreplace (leaI_rReg_immI2_peep());
25768 %}
25769 
25770 peephole
25771 %{
25772   peeppredicate(VM_Version::supports_fast_2op_lea());
25773   peepmatch (addL_rReg);
25774   peepprocedure (lea_coalesce_reg);
25775   peepreplace (leaL_rReg_rReg_peep());
25776 %}
25777 
25778 peephole
25779 %{
25780   peeppredicate(VM_Version::supports_fast_2op_lea());
25781   peepmatch (addL_rReg_imm);
25782   peepprocedure (lea_coalesce_imm);
25783   peepreplace (leaL_rReg_immL32_peep());
25784 %}
25785 
25786 peephole
25787 %{
25788   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25789                 VM_Version::is_intel_cascade_lake());
25790   peepmatch (incL_rReg);
25791   peepprocedure (lea_coalesce_imm);
25792   peepreplace (leaL_rReg_immL32_peep());
25793 %}
25794 
25795 peephole
25796 %{
25797   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25798                 VM_Version::is_intel_cascade_lake());
25799   peepmatch (decL_rReg);
25800   peepprocedure (lea_coalesce_imm);
25801   peepreplace (leaL_rReg_immL32_peep());
25802 %}
25803 
25804 peephole
25805 %{
25806   peeppredicate(VM_Version::supports_fast_2op_lea());
25807   peepmatch (salL_rReg_immI2);
25808   peepprocedure (lea_coalesce_imm);
25809   peepreplace (leaL_rReg_immI2_peep());
25810 %}
25811 
25812 peephole
25813 %{
25814   peepmatch (leaPCompressedOopOffset);
25815   peepprocedure (lea_remove_redundant);
25816 %}
25817 
25818 peephole
25819 %{
25820   peepmatch (leaP8Narrow);
25821   peepprocedure (lea_remove_redundant);
25822 %}
25823 
25824 peephole
25825 %{
25826   peepmatch (leaP32Narrow);
25827   peepprocedure (lea_remove_redundant);
25828 %}
25829 
25830 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25831 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25832 
25833 //int variant
25834 peephole
25835 %{
25836   peepmatch (testI_reg);
25837   peepprocedure (test_may_remove);
25838 %}
25839 
25840 //long variant
25841 peephole
25842 %{
25843   peepmatch (testL_reg);
25844   peepprocedure (test_may_remove);
25845 %}
25846 
25847 
25848 //----------SMARTSPILL RULES---------------------------------------------------
25849 // These must follow all instruction definitions as they use the names
25850 // defined in the instructions definitions.