1 //
    2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }
 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;
 1678 }
 1679 
 1680 // This could be in MacroAssembler but it's fairly C2 specific
 1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1682   Label exit;
 1683   __ jccb(Assembler::noParity, exit);
 1684   __ pushf();
 1685   //
 1686   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1687   // zero OF,AF,SF for NaN values.
 1688   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1689   // values returns 'less than' result (CF is set).
 1690   // Leave the rest of flags unchanged.
 1691   //
 1692   //    7 6 5 4 3 2 1 0
 1693   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1694   //    0 0 1 0 1 0 1 1   (0x2B)
 1695   //
 1696   __ andq(Address(rsp, 0), 0xffffff2b);
 1697   __ popf();
 1698   __ bind(exit);
 1699 }
 1700 
 1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1702   // If any floating point comparison instruction is used, unordered case always triggers jump
 1703   // for below condition, CF=1 is true when at least one input is NaN
 1704   Label done;
 1705   __ movl(dst, -1);
 1706   __ jcc(Assembler::below, done);
 1707   __ setcc(Assembler::notEqual, dst);
 1708   __ bind(done);
 1709 }
 1710 
 1711 enum FP_PREC {
 1712   fp_prec_hlf,
 1713   fp_prec_flt,
 1714   fp_prec_dbl
 1715 };
 1716 
 1717 static inline void emit_fp_ucom(MacroAssembler* masm, enum FP_PREC pt,
 1718                                 XMMRegister p, XMMRegister q) {
 1719   if (pt == fp_prec_hlf) {
 1720     __ evucomish(p, q);
 1721   } else if (pt == fp_prec_flt) {
 1722     __ ucomiss(p, q);
 1723   } else {
 1724     __ ucomisd(p, q);
 1725   }
 1726 }
 1727 
 1728 static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
 1729                          XMMRegister dst, XMMRegister src, Register scratch) {
 1730   if (pt == fp_prec_hlf) {
 1731     __ movhlf(dst, src, scratch);
 1732   } else if (pt == fp_prec_flt) {
 1733     __ movflt(dst, src);
 1734   } else {
 1735     __ movdbl(dst, src);
 1736   }
 1737 }
 1738 
 1739 // Math.min()          # Math.max()
 1740 // -----------------------------
 1741 // (v)ucomis[h/s/d]    #
 1742 // ja   -> b           # a
 1743 // jp   -> NaN         # NaN
 1744 // jb   -> a           # b
 1745 // je   -> a | b       # a & b
 1746 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1747                             XMMRegister a, XMMRegister b, Register rt,
 1748                             bool min, enum FP_PREC pt) {
 1749   Label nan, zero, below, above, done;
 1750 
 1751   emit_fp_ucom(masm, pt, a, b);
 1752 
 1753   if (dst->encoding() != (min ? b : a)->encoding()) {
 1754     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1755   } else {
 1756     __ jccb(Assembler::above, done);
 1757   }
 1758   __ jccb(Assembler::parity, nan);  // PF=1
 1759   __ jccb(Assembler::below, below); // CF=1
 1760 
 1761   // equal
 1762   // Using bitwise operations is a low cost way to compute the correct result
 1763   // for zero and non-zero inputs in this scenario except for NaN, which is
 1764   // handled separately. The mantissa and exponent are valid with either
 1765   // bitwise operation. For zero inputs, the sign bit is chosen according to
 1766   // whether a minimum or maximum value is required.
 1767   if (min) {
 1768     // Negative sign preserved when available (e.g., min(+0, -0) -> -0)
 1769     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1770   } else {
 1771     // Positive sign preserved when available (e.g., max(+0, -0) -> +0)
 1772     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1773   }
 1774   __ jmp(done);
 1775 
 1776   __ bind(above);
 1777   movfp(masm, pt, dst, min ? b : a, rt);
 1778   __ jmp(done);
 1779 
 1780   __ bind(nan);
 1781   if (pt == fp_prec_hlf) {
 1782     __ movl(rt, 0x00007e00); // Float16.NaN
 1783     __ evmovw(dst, rt);
 1784   } else if (pt == fp_prec_flt) {
 1785     __ movl(rt, 0x7fc00000); // Float.NaN
 1786     __ movdl(dst, rt);
 1787   } else {
 1788     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1789     __ movdq(dst, rt);
 1790   }
 1791   __ jmp(done);
 1792 
 1793   __ bind(below);
 1794   movfp(masm, pt, dst, min ? a : b, rt);
 1795 
 1796   __ bind(done);
 1797 }
 1798 
 1799 //=============================================================================
 1800 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1801 
 1802 int ConstantTable::calculate_table_base_offset() const {
 1803   return 0;  // absolute addressing, no offset
 1804 }
 1805 
 1806 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1807 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1808   ShouldNotReachHere();
 1809 }
 1810 
 1811 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1812   // Empty encoding
 1813 }
 1814 
 1815 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1816   return 0;
 1817 }
 1818 
 1819 #ifndef PRODUCT
 1820 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1821   st->print("# MachConstantBaseNode (empty encoding)");
 1822 }
 1823 #endif
 1824 
 1825 
 1826 //=============================================================================
 1827 #ifndef PRODUCT
 1828 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1829   Compile* C = ra_->C;
 1830 
 1831   int framesize = C->output()->frame_size_in_bytes();
 1832   int bangsize = C->output()->bang_size_in_bytes();
 1833   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1834   // Remove wordSize for return addr which is already pushed.
 1835   framesize -= wordSize;
 1836 
 1837   if (C->output()->need_stack_bang(bangsize)) {
 1838     framesize -= wordSize;
 1839     st->print("# stack bang (%d bytes)", bangsize);
 1840     st->print("\n\t");
 1841     st->print("pushq   rbp\t# Save rbp");
 1842     if (PreserveFramePointer) {
 1843         st->print("\n\t");
 1844         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1845     }
 1846     if (framesize) {
 1847       st->print("\n\t");
 1848       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1849     }
 1850   } else {
 1851     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1852     st->print("\n\t");
 1853     framesize -= wordSize;
 1854     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1855     if (PreserveFramePointer) {
 1856       st->print("\n\t");
 1857       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1858       if (framesize > 0) {
 1859         st->print("\n\t");
 1860         st->print("addq    rbp, #%d", framesize);
 1861       }
 1862     }
 1863   }
 1864 
 1865   if (VerifyStackAtCalls) {
 1866     st->print("\n\t");
 1867     framesize -= wordSize;
 1868     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1869 #ifdef ASSERT
 1870     st->print("\n\t");
 1871     st->print("# stack alignment check");
 1872 #endif
 1873   }
 1874   if (C->stub_function() != nullptr) {
 1875     st->print("\n\t");
 1876     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1877     st->print("\n\t");
 1878     st->print("je      fast_entry\t");
 1879     st->print("\n\t");
 1880     st->print("call    #nmethod_entry_barrier_stub\t");
 1881     st->print("\n\tfast_entry:");
 1882   }
 1883   st->cr();
 1884 }
 1885 #endif
 1886 
 1887 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1888   Compile* C = ra_->C;
 1889 
 1890   int framesize = C->output()->frame_size_in_bytes();
 1891   int bangsize = C->output()->bang_size_in_bytes();
 1892 
 1893   if (C->clinit_barrier_on_entry()) {
 1894     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1895     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1896 
 1897     Label L_skip_barrier;
 1898     Register klass = rscratch1;
 1899 
 1900     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1901     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1902 
 1903     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1904 
 1905     __ bind(L_skip_barrier);
 1906   }
 1907 
 1908   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
 1909 
 1910   C->output()->set_frame_complete(__ offset());
 1911 
 1912   if (C->has_mach_constant_base_node()) {
 1913     // NOTE: We set the table base offset here because users might be
 1914     // emitted before MachConstantBaseNode.
 1915     ConstantTable& constant_table = C->output()->constant_table();
 1916     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1917   }
 1918 }
 1919 
 1920 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1921 {
 1922   return MachNode::size(ra_); // too many variables; just compute it
 1923                               // the hard way
 1924 }
 1925 
 1926 int MachPrologNode::reloc() const
 1927 {
 1928   return 0; // a large enough number
 1929 }
 1930 
 1931 //=============================================================================
 1932 #ifndef PRODUCT
 1933 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1934 {
 1935   Compile* C = ra_->C;
 1936   if (generate_vzeroupper(C)) {
 1937     st->print("vzeroupper");
 1938     st->cr(); st->print("\t");
 1939   }
 1940 
 1941   int framesize = C->output()->frame_size_in_bytes();
 1942   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1943   // Remove word for return adr already pushed
 1944   // and RBP
 1945   framesize -= 2*wordSize;
 1946 
 1947   if (framesize) {
 1948     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1949     st->print("\t");
 1950   }
 1951 
 1952   st->print_cr("popq    rbp");
 1953   if (do_polling() && C->is_method_compilation()) {
 1954     st->print("\t");
 1955     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1956                  "ja      #safepoint_stub\t"
 1957                  "# Safepoint: poll for GC");
 1958   }
 1959 }
 1960 #endif
 1961 
 1962 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1963 {
 1964   Compile* C = ra_->C;
 1965 
 1966   if (generate_vzeroupper(C)) {
 1967     // Clear upper bits of YMM registers when current compiled code uses
 1968     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1969     __ vzeroupper();
 1970   }
 1971 
 1972   int framesize = C->output()->frame_size_in_bytes();
 1973   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1974   // Remove word for return adr already pushed
 1975   // and RBP
 1976   framesize -= 2*wordSize;
 1977 
 1978   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1979 
 1980   if (framesize) {
 1981     __ addq(rsp, framesize);
 1982   }
 1983 
 1984   __ popq(rbp);
 1985 
 1986   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1987     __ reserved_stack_check();
 1988   }
 1989 
 1990   if (do_polling() && C->is_method_compilation()) {
 1991     Label dummy_label;
 1992     Label* code_stub = &dummy_label;
 1993     if (!C->output()->in_scratch_emit_size()) {
 1994       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1995       C->output()->add_stub(stub);
 1996       code_stub = &stub->entry();
 1997     }
 1998     __ relocate(relocInfo::poll_return_type);
 1999     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 2000   }
 2001 }
 2002 
 2003 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 2004 {
 2005   return MachNode::size(ra_); // too many variables; just compute it
 2006                               // the hard way
 2007 }
 2008 
 2009 int MachEpilogNode::reloc() const
 2010 {
 2011   return 2; // a large enough number
 2012 }
 2013 
 2014 const Pipeline* MachEpilogNode::pipeline() const
 2015 {
 2016   return MachNode::pipeline_class();
 2017 }
 2018 
 2019 //=============================================================================
 2020 
 2021 enum RC {
 2022   rc_bad,
 2023   rc_int,
 2024   rc_kreg,
 2025   rc_float,
 2026   rc_stack
 2027 };
 2028 
 2029 static enum RC rc_class(OptoReg::Name reg)
 2030 {
 2031   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 2032 
 2033   if (OptoReg::is_stack(reg)) return rc_stack;
 2034 
 2035   VMReg r = OptoReg::as_VMReg(reg);
 2036 
 2037   if (r->is_Register()) return rc_int;
 2038 
 2039   if (r->is_KRegister()) return rc_kreg;
 2040 
 2041   assert(r->is_XMMRegister(), "must be");
 2042   return rc_float;
 2043 }
 2044 
 2045 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2046 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2047                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2048 
 2049 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2050                      int stack_offset, int reg, uint ireg, outputStream* st);
 2051 
 2052 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2053                                       int dst_offset, uint ireg, outputStream* st) {
 2054   if (masm) {
 2055     switch (ireg) {
 2056     case Op_VecS:
 2057       __ movq(Address(rsp, -8), rax);
 2058       __ movl(rax, Address(rsp, src_offset));
 2059       __ movl(Address(rsp, dst_offset), rax);
 2060       __ movq(rax, Address(rsp, -8));
 2061       break;
 2062     case Op_VecD:
 2063       __ pushq(Address(rsp, src_offset));
 2064       __ popq (Address(rsp, dst_offset));
 2065       break;
 2066     case Op_VecX:
 2067       __ pushq(Address(rsp, src_offset));
 2068       __ popq (Address(rsp, dst_offset));
 2069       __ pushq(Address(rsp, src_offset+8));
 2070       __ popq (Address(rsp, dst_offset+8));
 2071       break;
 2072     case Op_VecY:
 2073       __ vmovdqu(Address(rsp, -32), xmm0);
 2074       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2075       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2076       __ vmovdqu(xmm0, Address(rsp, -32));
 2077       break;
 2078     case Op_VecZ:
 2079       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2080       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2081       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2082       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2083       break;
 2084     default:
 2085       ShouldNotReachHere();
 2086     }
 2087 #ifndef PRODUCT
 2088   } else {
 2089     switch (ireg) {
 2090     case Op_VecS:
 2091       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2092                 "movl    rax, [rsp + #%d]\n\t"
 2093                 "movl    [rsp + #%d], rax\n\t"
 2094                 "movq    rax, [rsp - #8]",
 2095                 src_offset, dst_offset);
 2096       break;
 2097     case Op_VecD:
 2098       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2099                 "popq    [rsp + #%d]",
 2100                 src_offset, dst_offset);
 2101       break;
 2102      case Op_VecX:
 2103       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2104                 "popq    [rsp + #%d]\n\t"
 2105                 "pushq   [rsp + #%d]\n\t"
 2106                 "popq    [rsp + #%d]",
 2107                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2108       break;
 2109     case Op_VecY:
 2110       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2111                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2112                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2113                 "vmovdqu xmm0, [rsp - #32]",
 2114                 src_offset, dst_offset);
 2115       break;
 2116     case Op_VecZ:
 2117       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2118                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2119                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2120                 "vmovdqu xmm0, [rsp - #64]",
 2121                 src_offset, dst_offset);
 2122       break;
 2123     default:
 2124       ShouldNotReachHere();
 2125     }
 2126 #endif
 2127   }
 2128 }
 2129 
 2130 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2131                                        PhaseRegAlloc* ra_,
 2132                                        bool do_size,
 2133                                        outputStream* st) const {
 2134   assert(masm != nullptr || st  != nullptr, "sanity");
 2135   // Get registers to move
 2136   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2137   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2138   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2139   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2140 
 2141   enum RC src_second_rc = rc_class(src_second);
 2142   enum RC src_first_rc = rc_class(src_first);
 2143   enum RC dst_second_rc = rc_class(dst_second);
 2144   enum RC dst_first_rc = rc_class(dst_first);
 2145 
 2146   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2147          "must move at least 1 register" );
 2148 
 2149   if (src_first == dst_first && src_second == dst_second) {
 2150     // Self copy, no move
 2151     return 0;
 2152   }
 2153   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_pvectmask() == nullptr) {
 2154     uint ireg = ideal_reg();
 2155     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2156     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2157     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2158       // mem -> mem
 2159       int src_offset = ra_->reg2offset(src_first);
 2160       int dst_offset = ra_->reg2offset(dst_first);
 2161       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2162     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2163       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2164     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2165       int stack_offset = ra_->reg2offset(dst_first);
 2166       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2167     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2168       int stack_offset = ra_->reg2offset(src_first);
 2169       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2170     } else {
 2171       ShouldNotReachHere();
 2172     }
 2173     return 0;
 2174   }
 2175   if (src_first_rc == rc_stack) {
 2176     // mem ->
 2177     if (dst_first_rc == rc_stack) {
 2178       // mem -> mem
 2179       assert(src_second != dst_first, "overlap");
 2180       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2181           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2182         // 64-bit
 2183         int src_offset = ra_->reg2offset(src_first);
 2184         int dst_offset = ra_->reg2offset(dst_first);
 2185         if (masm) {
 2186           __ pushq(Address(rsp, src_offset));
 2187           __ popq (Address(rsp, dst_offset));
 2188 #ifndef PRODUCT
 2189         } else {
 2190           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2191                     "popq    [rsp + #%d]",
 2192                      src_offset, dst_offset);
 2193 #endif
 2194         }
 2195       } else {
 2196         // 32-bit
 2197         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2198         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2199         // No pushl/popl, so:
 2200         int src_offset = ra_->reg2offset(src_first);
 2201         int dst_offset = ra_->reg2offset(dst_first);
 2202         if (masm) {
 2203           __ movq(Address(rsp, -8), rax);
 2204           __ movl(rax, Address(rsp, src_offset));
 2205           __ movl(Address(rsp, dst_offset), rax);
 2206           __ movq(rax, Address(rsp, -8));
 2207 #ifndef PRODUCT
 2208         } else {
 2209           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2210                     "movl    rax, [rsp + #%d]\n\t"
 2211                     "movl    [rsp + #%d], rax\n\t"
 2212                     "movq    rax, [rsp - #8]",
 2213                      src_offset, dst_offset);
 2214 #endif
 2215         }
 2216       }
 2217       return 0;
 2218     } else if (dst_first_rc == rc_int) {
 2219       // mem -> gpr
 2220       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2221           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2222         // 64-bit
 2223         int offset = ra_->reg2offset(src_first);
 2224         if (masm) {
 2225           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2226 #ifndef PRODUCT
 2227         } else {
 2228           st->print("movq    %s, [rsp + #%d]\t# spill",
 2229                      Matcher::regName[dst_first],
 2230                      offset);
 2231 #endif
 2232         }
 2233       } else {
 2234         // 32-bit
 2235         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2236         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2237         int offset = ra_->reg2offset(src_first);
 2238         if (masm) {
 2239           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2240 #ifndef PRODUCT
 2241         } else {
 2242           st->print("movl    %s, [rsp + #%d]\t# spill",
 2243                      Matcher::regName[dst_first],
 2244                      offset);
 2245 #endif
 2246         }
 2247       }
 2248       return 0;
 2249     } else if (dst_first_rc == rc_float) {
 2250       // mem-> xmm
 2251       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2252           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2253         // 64-bit
 2254         int offset = ra_->reg2offset(src_first);
 2255         if (masm) {
 2256           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2257 #ifndef PRODUCT
 2258         } else {
 2259           st->print("%s  %s, [rsp + #%d]\t# spill",
 2260                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2261                      Matcher::regName[dst_first],
 2262                      offset);
 2263 #endif
 2264         }
 2265       } else {
 2266         // 32-bit
 2267         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2268         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2269         int offset = ra_->reg2offset(src_first);
 2270         if (masm) {
 2271           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2272 #ifndef PRODUCT
 2273         } else {
 2274           st->print("movss   %s, [rsp + #%d]\t# spill",
 2275                      Matcher::regName[dst_first],
 2276                      offset);
 2277 #endif
 2278         }
 2279       }
 2280       return 0;
 2281     } else if (dst_first_rc == rc_kreg) {
 2282       // mem -> kreg
 2283       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2284           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2285         // 64-bit
 2286         int offset = ra_->reg2offset(src_first);
 2287         if (masm) {
 2288           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2289 #ifndef PRODUCT
 2290         } else {
 2291           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2292                      Matcher::regName[dst_first],
 2293                      offset);
 2294 #endif
 2295         }
 2296       }
 2297       return 0;
 2298     }
 2299   } else if (src_first_rc == rc_int) {
 2300     // gpr ->
 2301     if (dst_first_rc == rc_stack) {
 2302       // gpr -> mem
 2303       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2304           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2305         // 64-bit
 2306         int offset = ra_->reg2offset(dst_first);
 2307         if (masm) {
 2308           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2309 #ifndef PRODUCT
 2310         } else {
 2311           st->print("movq    [rsp + #%d], %s\t# spill",
 2312                      offset,
 2313                      Matcher::regName[src_first]);
 2314 #endif
 2315         }
 2316       } else {
 2317         // 32-bit
 2318         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2319         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2320         int offset = ra_->reg2offset(dst_first);
 2321         if (masm) {
 2322           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2323 #ifndef PRODUCT
 2324         } else {
 2325           st->print("movl    [rsp + #%d], %s\t# spill",
 2326                      offset,
 2327                      Matcher::regName[src_first]);
 2328 #endif
 2329         }
 2330       }
 2331       return 0;
 2332     } else if (dst_first_rc == rc_int) {
 2333       // gpr -> gpr
 2334       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2335           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2336         // 64-bit
 2337         if (masm) {
 2338           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2339                   as_Register(Matcher::_regEncode[src_first]));
 2340 #ifndef PRODUCT
 2341         } else {
 2342           st->print("movq    %s, %s\t# spill",
 2343                      Matcher::regName[dst_first],
 2344                      Matcher::regName[src_first]);
 2345 #endif
 2346         }
 2347         return 0;
 2348       } else {
 2349         // 32-bit
 2350         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2351         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2352         if (masm) {
 2353           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2354                   as_Register(Matcher::_regEncode[src_first]));
 2355 #ifndef PRODUCT
 2356         } else {
 2357           st->print("movl    %s, %s\t# spill",
 2358                      Matcher::regName[dst_first],
 2359                      Matcher::regName[src_first]);
 2360 #endif
 2361         }
 2362         return 0;
 2363       }
 2364     } else if (dst_first_rc == rc_float) {
 2365       // gpr -> xmm
 2366       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2367           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2368         // 64-bit
 2369         if (masm) {
 2370           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2371 #ifndef PRODUCT
 2372         } else {
 2373           st->print("movdq   %s, %s\t# spill",
 2374                      Matcher::regName[dst_first],
 2375                      Matcher::regName[src_first]);
 2376 #endif
 2377         }
 2378       } else {
 2379         // 32-bit
 2380         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2381         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2382         if (masm) {
 2383           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2384 #ifndef PRODUCT
 2385         } else {
 2386           st->print("movdl   %s, %s\t# spill",
 2387                      Matcher::regName[dst_first],
 2388                      Matcher::regName[src_first]);
 2389 #endif
 2390         }
 2391       }
 2392       return 0;
 2393     } else if (dst_first_rc == rc_kreg) {
 2394       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2395           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2396         // 64-bit
 2397         if (masm) {
 2398           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2399   #ifndef PRODUCT
 2400         } else {
 2401            st->print("kmovq   %s, %s\t# spill",
 2402                        Matcher::regName[dst_first],
 2403                        Matcher::regName[src_first]);
 2404   #endif
 2405         }
 2406       }
 2407       Unimplemented();
 2408       return 0;
 2409     }
 2410   } else if (src_first_rc == rc_float) {
 2411     // xmm ->
 2412     if (dst_first_rc == rc_stack) {
 2413       // xmm -> mem
 2414       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2415           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2416         // 64-bit
 2417         int offset = ra_->reg2offset(dst_first);
 2418         if (masm) {
 2419           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2420 #ifndef PRODUCT
 2421         } else {
 2422           st->print("movsd   [rsp + #%d], %s\t# spill",
 2423                      offset,
 2424                      Matcher::regName[src_first]);
 2425 #endif
 2426         }
 2427       } else {
 2428         // 32-bit
 2429         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2430         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2431         int offset = ra_->reg2offset(dst_first);
 2432         if (masm) {
 2433           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2434 #ifndef PRODUCT
 2435         } else {
 2436           st->print("movss   [rsp + #%d], %s\t# spill",
 2437                      offset,
 2438                      Matcher::regName[src_first]);
 2439 #endif
 2440         }
 2441       }
 2442       return 0;
 2443     } else if (dst_first_rc == rc_int) {
 2444       // xmm -> gpr
 2445       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2446           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2447         // 64-bit
 2448         if (masm) {
 2449           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2450 #ifndef PRODUCT
 2451         } else {
 2452           st->print("movdq   %s, %s\t# spill",
 2453                      Matcher::regName[dst_first],
 2454                      Matcher::regName[src_first]);
 2455 #endif
 2456         }
 2457       } else {
 2458         // 32-bit
 2459         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2460         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2461         if (masm) {
 2462           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2463 #ifndef PRODUCT
 2464         } else {
 2465           st->print("movdl   %s, %s\t# spill",
 2466                      Matcher::regName[dst_first],
 2467                      Matcher::regName[src_first]);
 2468 #endif
 2469         }
 2470       }
 2471       return 0;
 2472     } else if (dst_first_rc == rc_float) {
 2473       // xmm -> xmm
 2474       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2475           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2476         // 64-bit
 2477         if (masm) {
 2478           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2479 #ifndef PRODUCT
 2480         } else {
 2481           st->print("%s  %s, %s\t# spill",
 2482                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2483                      Matcher::regName[dst_first],
 2484                      Matcher::regName[src_first]);
 2485 #endif
 2486         }
 2487       } else {
 2488         // 32-bit
 2489         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2490         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2491         if (masm) {
 2492           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2493 #ifndef PRODUCT
 2494         } else {
 2495           st->print("%s  %s, %s\t# spill",
 2496                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2497                      Matcher::regName[dst_first],
 2498                      Matcher::regName[src_first]);
 2499 #endif
 2500         }
 2501       }
 2502       return 0;
 2503     } else if (dst_first_rc == rc_kreg) {
 2504       assert(false, "Illegal spilling");
 2505       return 0;
 2506     }
 2507   } else if (src_first_rc == rc_kreg) {
 2508     if (dst_first_rc == rc_stack) {
 2509       // mem -> kreg
 2510       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2511           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2512         // 64-bit
 2513         int offset = ra_->reg2offset(dst_first);
 2514         if (masm) {
 2515           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2516 #ifndef PRODUCT
 2517         } else {
 2518           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2519                      offset,
 2520                      Matcher::regName[src_first]);
 2521 #endif
 2522         }
 2523       }
 2524       return 0;
 2525     } else if (dst_first_rc == rc_int) {
 2526       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2527           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2528         // 64-bit
 2529         if (masm) {
 2530           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2531 #ifndef PRODUCT
 2532         } else {
 2533          st->print("kmovq   %s, %s\t# spill",
 2534                      Matcher::regName[dst_first],
 2535                      Matcher::regName[src_first]);
 2536 #endif
 2537         }
 2538       }
 2539       Unimplemented();
 2540       return 0;
 2541     } else if (dst_first_rc == rc_kreg) {
 2542       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2543           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2544         // 64-bit
 2545         if (masm) {
 2546           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2547 #ifndef PRODUCT
 2548         } else {
 2549          st->print("kmovq   %s, %s\t# spill",
 2550                      Matcher::regName[dst_first],
 2551                      Matcher::regName[src_first]);
 2552 #endif
 2553         }
 2554       }
 2555       return 0;
 2556     } else if (dst_first_rc == rc_float) {
 2557       assert(false, "Illegal spill");
 2558       return 0;
 2559     }
 2560   }
 2561 
 2562   assert(0," foo ");
 2563   Unimplemented();
 2564   return 0;
 2565 }
 2566 
 2567 #ifndef PRODUCT
 2568 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2569   implementation(nullptr, ra_, false, st);
 2570 }
 2571 #endif
 2572 
 2573 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2574   implementation(masm, ra_, false, nullptr);
 2575 }
 2576 
 2577 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2578   return MachNode::size(ra_);
 2579 }
 2580 
 2581 //=============================================================================
 2582 #ifndef PRODUCT
 2583 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2584 {
 2585   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2586   int reg = ra_->get_reg_first(this);
 2587   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2588             Matcher::regName[reg], offset);
 2589 }
 2590 #endif
 2591 
 2592 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2593 {
 2594   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2595   int reg = ra_->get_encode(this);
 2596 
 2597   __ lea(as_Register(reg), Address(rsp, offset));
 2598 }
 2599 
 2600 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2601 {
 2602   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2603   if (ra_->get_encode(this) > 15) {
 2604     return (offset < 0x80) ? 6 : 9; // REX2
 2605   } else {
 2606     return (offset < 0x80) ? 5 : 8; // REX
 2607   }
 2608 }
 2609 
 2610 //=============================================================================
 2611 #ifndef PRODUCT
 2612 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2613 {
 2614   st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2615   st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2616   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2617 }
 2618 #endif
 2619 
 2620 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2621 {
 2622   __ ic_check(InteriorEntryAlignment);
 2623 }
 2624 
 2625 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2626 {
 2627   return MachNode::size(ra_); // too many variables; just compute it
 2628                               // the hard way
 2629 }
 2630 
 2631 
 2632 //=============================================================================
 2633 
 2634 bool Matcher::supports_vector_calling_convention(void) {
 2635   return EnableVectorSupport;
 2636 }
 2637 
 2638 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2639   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2640 }
 2641 
 2642 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2643   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2644 }
 2645 
 2646 #ifdef ASSERT
 2647 static bool is_ndd_demotable(const MachNode* mdef) {
 2648   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2649 }
 2650 #endif
 2651 
 2652 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
 2653                                             int oper_index) {
 2654   if (mdef == nullptr) {
 2655     return false;
 2656   }
 2657 
 2658   if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
 2659       mdef->in(mdef->operand_index(oper_index)) == nullptr) {
 2660     assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
 2661     assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
 2662     return false;
 2663   }
 2664 
 2665   // Complex memory operand covers multiple incoming edges needed for
 2666   // address computation. Biasing def towards any address component will not
 2667   // result in NDD demotion by assembler.
 2668   if (mdef->operand_num_edges(oper_index) != 1) {
 2669     return false;
 2670   }
 2671 
 2672   // Demotion candidate must be register mask compatible with definition.
 2673   const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
 2674   if (!oper_mask.overlap(mdef->out_RegMask())) {
 2675     assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
 2676     return false;
 2677   }
 2678 
 2679   switch (oper_index) {
 2680   // First operand of MachNode corresponding to Intel APX NDD selection
 2681   // pattern can share its assigned register with definition operand if
 2682   // their live ranges do not overlap. In such a scenario we can demote
 2683   // it to legacy map0/map1 instruction by replacing its 4-byte extended
 2684   // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
 2685   // are decorated with a special flag by instruction selector.
 2686   case 1:
 2687     return is_ndd_demotable_opr1(mdef);
 2688 
 2689   // Definition operand of commutative operation can be biased towards second
 2690   // operand.
 2691   case 2:
 2692     return is_ndd_demotable_opr2(mdef);
 2693 
 2694   // Current scheme only selects up to two biasing candidates
 2695   default:
 2696     assert(false, "unhandled operand index: %s", mdef->Name());
 2697     break;
 2698   }
 2699 
 2700   return false;
 2701 }
 2702 
 2703 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2704   assert(EnableVectorSupport, "sanity");
 2705   int lo = XMM0_num;
 2706   int hi = XMM0b_num;
 2707   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2708   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2709   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2710   return OptoRegPair(hi, lo);
 2711 }
 2712 
 2713 // Is this branch offset short enough that a short branch can be used?
 2714 //
 2715 // NOTE: If the platform does not provide any short branch variants, then
 2716 //       this method should return false for offset 0.
 2717 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2718   // The passed offset is relative to address of the branch.
 2719   // On 86 a branch displacement is calculated relative to address
 2720   // of a next instruction.
 2721   offset -= br_size;
 2722 
 2723   // the short version of jmpConUCF2 contains multiple branches,
 2724   // making the reach slightly less
 2725   if (rule == jmpConUCF2_rule)
 2726     return (-126 <= offset && offset <= 125);
 2727   return (-128 <= offset && offset <= 127);
 2728 }
 2729 
 2730 #ifdef ASSERT
 2731 // Return whether or not this register is ever used as an argument.
 2732 bool Matcher::can_be_java_arg(int reg)
 2733 {
 2734   return
 2735     reg ==  RDI_num || reg == RDI_H_num ||
 2736     reg ==  RSI_num || reg == RSI_H_num ||
 2737     reg ==  RDX_num || reg == RDX_H_num ||
 2738     reg ==  RCX_num || reg == RCX_H_num ||
 2739     reg ==   R8_num || reg ==  R8_H_num ||
 2740     reg ==   R9_num || reg ==  R9_H_num ||
 2741     reg ==  R12_num || reg == R12_H_num ||
 2742     reg == XMM0_num || reg == XMM0b_num ||
 2743     reg == XMM1_num || reg == XMM1b_num ||
 2744     reg == XMM2_num || reg == XMM2b_num ||
 2745     reg == XMM3_num || reg == XMM3b_num ||
 2746     reg == XMM4_num || reg == XMM4b_num ||
 2747     reg == XMM5_num || reg == XMM5b_num ||
 2748     reg == XMM6_num || reg == XMM6b_num ||
 2749     reg == XMM7_num || reg == XMM7b_num;
 2750 }
 2751 #endif
 2752 
 2753 uint Matcher::int_pressure_limit()
 2754 {
 2755   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2756 }
 2757 
 2758 uint Matcher::float_pressure_limit()
 2759 {
 2760   // After experiment around with different values, the following default threshold
 2761   // works best for LCM's register pressure scheduling on x64.
 2762   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2763   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2764   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2765 }
 2766 
 2767 // Register for DIVI projection of divmodI
 2768 const RegMask& Matcher::divI_proj_mask() {
 2769   return INT_RAX_REG_mask();
 2770 }
 2771 
 2772 // Register for MODI projection of divmodI
 2773 const RegMask& Matcher::modI_proj_mask() {
 2774   return INT_RDX_REG_mask();
 2775 }
 2776 
 2777 // Register for DIVL projection of divmodL
 2778 const RegMask& Matcher::divL_proj_mask() {
 2779   return LONG_RAX_REG_mask();
 2780 }
 2781 
 2782 // Register for MODL projection of divmodL
 2783 const RegMask& Matcher::modL_proj_mask() {
 2784   return LONG_RDX_REG_mask();
 2785 }
 2786 
 2787 %}
 2788 
 2789 source_hpp %{
 2790 // Header information of the source block.
 2791 // Method declarations/definitions which are used outside
 2792 // the ad-scope can conveniently be defined here.
 2793 //
 2794 // To keep related declarations/definitions/uses close together,
 2795 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2796 
 2797 #include "runtime/vm_version.hpp"
 2798 
 2799 class NativeJump;
 2800 
 2801 class CallStubImpl {
 2802 
 2803   //--------------------------------------------------------------
 2804   //---<  Used for optimization in Compile::shorten_branches  >---
 2805   //--------------------------------------------------------------
 2806 
 2807  public:
 2808   // Size of call trampoline stub.
 2809   static uint size_call_trampoline() {
 2810     return 0; // no call trampolines on this platform
 2811   }
 2812 
 2813   // number of relocations needed by a call trampoline stub
 2814   static uint reloc_call_trampoline() {
 2815     return 0; // no call trampolines on this platform
 2816   }
 2817 };
 2818 
 2819 class HandlerImpl {
 2820 
 2821  public:
 2822 
 2823   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2824 
 2825   static uint size_deopt_handler() {
 2826     // one call and one jmp.
 2827     return 7;
 2828   }
 2829 };
 2830 
 2831 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2832   switch(bytes) {
 2833     case  4: // fall-through
 2834     case  8: // fall-through
 2835     case 16: return Assembler::AVX_128bit;
 2836     case 32: return Assembler::AVX_256bit;
 2837     case 64: return Assembler::AVX_512bit;
 2838 
 2839     default: {
 2840       ShouldNotReachHere();
 2841       return Assembler::AVX_NoVec;
 2842     }
 2843   }
 2844 }
 2845 
 2846 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2847   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2848 }
 2849 
 2850 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2851   uint def_idx = use->operand_index(opnd);
 2852   Node* def = use->in(def_idx);
 2853   return vector_length_encoding(def);
 2854 }
 2855 
 2856 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2857   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2858          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2859 }
 2860 
 2861 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2862   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2863            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2864 }
 2865 
 2866 class Node::PD {
 2867 public:
 2868   enum NodeFlags : uint64_t {
 2869     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2870     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2871     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2872     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2873     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2874     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2875     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2876     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2877     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2878     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2879     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2880     Flag_ndd_demotable_opr1   = Node::_last_flag << 12,
 2881     Flag_ndd_demotable_opr2   = Node::_last_flag << 13,
 2882     _last_flag                = Flag_ndd_demotable_opr2
 2883   };
 2884 };
 2885 
 2886 %} // end source_hpp
 2887 
 2888 source %{
 2889 
 2890 #include "opto/addnode.hpp"
 2891 #include "c2_intelJccErratum_x86.hpp"
 2892 
 2893 void PhaseOutput::pd_perform_mach_node_analysis() {
 2894   if (VM_Version::has_intel_jcc_erratum()) {
 2895     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2896     _buf_sizes._code += extra_padding;
 2897   }
 2898 }
 2899 
 2900 int MachNode::pd_alignment_required() const {
 2901   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2902     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2903     return IntelJccErratum::largest_jcc_size() + 1;
 2904   } else {
 2905     return 1;
 2906   }
 2907 }
 2908 
 2909 int MachNode::compute_padding(int current_offset) const {
 2910   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2911     Compile* C = Compile::current();
 2912     PhaseOutput* output = C->output();
 2913     Block* block = output->block();
 2914     int index = output->index();
 2915     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2916   } else {
 2917     return 0;
 2918   }
 2919 }
 2920 
 2921 // Emit deopt handler code.
 2922 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2923 
 2924   // Note that the code buffer's insts_mark is always relative to insts.
 2925   // That's why we must use the macroassembler to generate a handler.
 2926   address base = __ start_a_stub(size_deopt_handler());
 2927   if (base == nullptr) {
 2928     ciEnv::current()->record_failure("CodeCache is full");
 2929     return 0;  // CodeBuffer::expand failed
 2930   }
 2931   int offset = __ offset();
 2932 
 2933   Label start;
 2934   __ bind(start);
 2935 
 2936   __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2937 
 2938   int entry_offset = __ offset();
 2939 
 2940   __ jmp(start);
 2941 
 2942   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2943   assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
 2944          "out of bounds read in post-call NOP check");
 2945   __ end_a_stub();
 2946   return entry_offset;
 2947 }
 2948 
 2949 static Assembler::Width widthForType(BasicType bt) {
 2950   if (bt == T_BYTE) {
 2951     return Assembler::B;
 2952   } else if (bt == T_SHORT) {
 2953     return Assembler::W;
 2954   } else if (bt == T_INT) {
 2955     return Assembler::D;
 2956   } else {
 2957     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2958     return Assembler::Q;
 2959   }
 2960 }
 2961 
 2962 //=============================================================================
 2963 
 2964   // Float masks come from different places depending on platform.
 2965   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2966   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2967   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2968   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2969   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2970   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2971   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2972   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2973   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2974   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2975   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2976   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 2977   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 2978   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 2979   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 2980   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 2981   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 2982   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 2983   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 2984 
 2985 //=============================================================================
 2986 bool Matcher::match_rule_supported(int opcode) {
 2987   if (!has_match_rule(opcode)) {
 2988     return false; // no match rule present
 2989   }
 2990   switch (opcode) {
 2991     case Op_AbsVL:
 2992     case Op_StoreVectorScatter:
 2993       if (UseAVX < 3) {
 2994         return false;
 2995       }
 2996       break;
 2997     case Op_PopCountI:
 2998     case Op_PopCountL:
 2999       if (!UsePopCountInstruction) {
 3000         return false;
 3001       }
 3002       break;
 3003     case Op_PopCountVI:
 3004       if (UseAVX < 2) {
 3005         return false;
 3006       }
 3007       break;
 3008     case Op_CompressV:
 3009     case Op_ExpandV:
 3010     case Op_PopCountVL:
 3011       if (UseAVX < 2) {
 3012         return false;
 3013       }
 3014       break;
 3015     case Op_MulVI:
 3016       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 3017         return false;
 3018       }
 3019       break;
 3020     case Op_MulVL:
 3021       if (UseSSE < 4) { // only with SSE4_1 or AVX
 3022         return false;
 3023       }
 3024       break;
 3025     case Op_MulReductionVL:
 3026       if (VM_Version::supports_avx512dq() == false) {
 3027         return false;
 3028       }
 3029       break;
 3030     case Op_AbsVB:
 3031     case Op_AbsVS:
 3032     case Op_AbsVI:
 3033     case Op_AddReductionVI:
 3034     case Op_AndReductionV:
 3035     case Op_OrReductionV:
 3036     case Op_XorReductionV:
 3037       if (UseSSE < 3) { // requires at least SSSE3
 3038         return false;
 3039       }
 3040       break;
 3041     case Op_MaxHF:
 3042     case Op_MinHF:
 3043       if (!VM_Version::supports_avx512vlbw()) {
 3044         return false;
 3045       }  // fallthrough
 3046     case Op_AddHF:
 3047     case Op_DivHF:
 3048     case Op_FmaHF:
 3049     case Op_MulHF:
 3050     case Op_ReinterpretS2HF:
 3051     case Op_ReinterpretHF2S:
 3052     case Op_SubHF:
 3053     case Op_SqrtHF:
 3054       if (!VM_Version::supports_avx512_fp16()) {
 3055         return false;
 3056       }
 3057       break;
 3058     case Op_VectorLoadShuffle:
 3059     case Op_VectorRearrange:
 3060     case Op_MulReductionVI:
 3061       if (UseSSE < 4) { // requires at least SSE4
 3062         return false;
 3063       }
 3064       break;
 3065     case Op_IsInfiniteF:
 3066     case Op_IsInfiniteD:
 3067       if (!VM_Version::supports_avx512dq()) {
 3068         return false;
 3069       }
 3070       break;
 3071     case Op_SqrtVD:
 3072     case Op_SqrtVF:
 3073     case Op_VectorMaskCmp:
 3074     case Op_VectorCastB2X:
 3075     case Op_VectorCastS2X:
 3076     case Op_VectorCastI2X:
 3077     case Op_VectorCastL2X:
 3078     case Op_VectorCastF2X:
 3079     case Op_VectorCastD2X:
 3080     case Op_VectorUCastB2X:
 3081     case Op_VectorUCastS2X:
 3082     case Op_VectorUCastI2X:
 3083     case Op_VectorMaskCast:
 3084       if (UseAVX < 1) { // enabled for AVX only
 3085         return false;
 3086       }
 3087       break;
 3088     case Op_PopulateIndex:
 3089       if (UseAVX < 2) {
 3090         return false;
 3091       }
 3092       break;
 3093     case Op_RoundVF:
 3094       if (UseAVX < 2) { // enabled for AVX2 only
 3095         return false;
 3096       }
 3097       break;
 3098     case Op_RoundVD:
 3099       if (UseAVX < 3) {
 3100         return false;  // enabled for AVX3 only
 3101       }
 3102       break;
 3103     case Op_CompareAndSwapL:
 3104     case Op_CompareAndSwapP:
 3105       break;
 3106     case Op_StrIndexOf:
 3107       if (!UseSSE42Intrinsics) {
 3108         return false;
 3109       }
 3110       break;
 3111     case Op_StrIndexOfChar:
 3112       if (!UseSSE42Intrinsics) {
 3113         return false;
 3114       }
 3115       break;
 3116     case Op_OnSpinWait:
 3117       if (VM_Version::supports_on_spin_wait() == false) {
 3118         return false;
 3119       }
 3120       break;
 3121     case Op_MulVB:
 3122     case Op_LShiftVB:
 3123     case Op_RShiftVB:
 3124     case Op_URShiftVB:
 3125     case Op_VectorInsert:
 3126     case Op_VectorLoadMask:
 3127     case Op_VectorStoreMask:
 3128     case Op_VectorBlend:
 3129       if (UseSSE < 4) {
 3130         return false;
 3131       }
 3132       break;
 3133     case Op_MaxD:
 3134     case Op_MaxF:
 3135     case Op_MinD:
 3136     case Op_MinF:
 3137       if (UseAVX < 1) { // enabled for AVX only
 3138         return false;
 3139       }
 3140       break;
 3141     case Op_CacheWB:
 3142     case Op_CacheWBPreSync:
 3143     case Op_CacheWBPostSync:
 3144       if (!VM_Version::supports_data_cache_line_flush()) {
 3145         return false;
 3146       }
 3147       break;
 3148     case Op_ExtractB:
 3149     case Op_ExtractL:
 3150     case Op_ExtractI:
 3151     case Op_RoundDoubleMode:
 3152       if (UseSSE < 4) {
 3153         return false;
 3154       }
 3155       break;
 3156     case Op_RoundDoubleModeV:
 3157       if (VM_Version::supports_avx() == false) {
 3158         return false; // 128bit vroundpd is not available
 3159       }
 3160       break;
 3161     case Op_LoadVectorGather:
 3162     case Op_LoadVectorGatherMasked:
 3163       if (UseAVX < 2) {
 3164         return false;
 3165       }
 3166       break;
 3167     case Op_FmaF:
 3168     case Op_FmaD:
 3169     case Op_FmaVD:
 3170     case Op_FmaVF:
 3171       if (!UseFMA) {
 3172         return false;
 3173       }
 3174       break;
 3175     case Op_MacroLogicV:
 3176       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3177         return false;
 3178       }
 3179       break;
 3180 
 3181     case Op_VectorCmpMasked:
 3182     case Op_VectorMaskGen:
 3183       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3184         return false;
 3185       }
 3186       break;
 3187     case Op_VectorMaskFirstTrue:
 3188     case Op_VectorMaskLastTrue:
 3189     case Op_VectorMaskTrueCount:
 3190     case Op_VectorMaskToLong:
 3191       if (UseAVX < 1) {
 3192          return false;
 3193       }
 3194       break;
 3195     case Op_RoundF:
 3196     case Op_RoundD:
 3197       break;
 3198     case Op_CopySignD:
 3199     case Op_CopySignF:
 3200       if (UseAVX < 3)  {
 3201         return false;
 3202       }
 3203       if (!VM_Version::supports_avx512vl()) {
 3204         return false;
 3205       }
 3206       break;
 3207     case Op_CompressBits:
 3208     case Op_ExpandBits:
 3209       if (!VM_Version::supports_bmi2()) {
 3210         return false;
 3211       }
 3212       break;
 3213     case Op_CompressM:
 3214       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3215         return false;
 3216       }
 3217       break;
 3218     case Op_ConvF2HF:
 3219     case Op_ConvHF2F:
 3220       if (!VM_Version::supports_float16()) {
 3221         return false;
 3222       }
 3223       break;
 3224     case Op_VectorCastF2HF:
 3225     case Op_VectorCastHF2F:
 3226       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3227         return false;
 3228       }
 3229       break;
 3230   }
 3231   return true;  // Match rules are supported by default.
 3232 }
 3233 
 3234 //------------------------------------------------------------------------
 3235 
 3236 static inline bool is_pop_count_instr_target(BasicType bt) {
 3237   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3238          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3239 }
 3240 
 3241 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3242   return match_rule_supported_vector(opcode, vlen, bt);
 3243 }
 3244 
 3245 // Identify extra cases that we might want to provide match rules for vector nodes and
 3246 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3247 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3248   if (!match_rule_supported(opcode)) {
 3249     return false;
 3250   }
 3251   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3252   //   * SSE2 supports 128bit vectors for all types;
 3253   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3254   //   * AVX2 supports 256bit vectors for all types;
 3255   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3256   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3257   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3258   // And MaxVectorSize is taken into account as well.
 3259   if (!vector_size_supported(bt, vlen)) {
 3260     return false;
 3261   }
 3262   // Special cases which require vector length follow:
 3263   //   * implementation limitations
 3264   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3265   //   * 128bit vroundpd instruction is present only in AVX1
 3266   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3267   switch (opcode) {
 3268     case Op_MaxVHF:
 3269     case Op_MinVHF:
 3270       if (!VM_Version::supports_avx512bw()) {
 3271         return false;
 3272       }
 3273     case Op_AddVHF:
 3274     case Op_DivVHF:
 3275     case Op_FmaVHF:
 3276     case Op_MulVHF:
 3277     case Op_SubVHF:
 3278     case Op_SqrtVHF:
 3279       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3280         return false;
 3281       }
 3282       if (!VM_Version::supports_avx512_fp16()) {
 3283         return false;
 3284       }
 3285       break;
 3286     case Op_AbsVF:
 3287     case Op_NegVF:
 3288       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3289         return false; // 512bit vandps and vxorps are not available
 3290       }
 3291       break;
 3292     case Op_AbsVD:
 3293     case Op_NegVD:
 3294       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3295         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3296       }
 3297       break;
 3298     case Op_RotateRightV:
 3299     case Op_RotateLeftV:
 3300       if (bt != T_INT && bt != T_LONG) {
 3301         return false;
 3302       } // fallthrough
 3303     case Op_MacroLogicV:
 3304       if (!VM_Version::supports_evex() ||
 3305           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3306         return false;
 3307       }
 3308       break;
 3309     case Op_ClearArray:
 3310     case Op_VectorMaskGen:
 3311     case Op_VectorCmpMasked:
 3312       if (!VM_Version::supports_avx512bw()) {
 3313         return false;
 3314       }
 3315       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3316         return false;
 3317       }
 3318       break;
 3319     case Op_LoadVectorMasked:
 3320     case Op_StoreVectorMasked:
 3321       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3322         return false;
 3323       }
 3324       break;
 3325     case Op_UMinV:
 3326     case Op_UMaxV:
 3327       if (UseAVX == 0) {
 3328         return false;
 3329       }
 3330       break;
 3331     case Op_UMinReductionV:
 3332     case Op_UMaxReductionV:
 3333       if (UseAVX == 0) {
 3334         return false;
 3335       }
 3336       if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
 3337         return false;
 3338       }
 3339       if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
 3340         return false;
 3341       }
 3342       break;
 3343     case Op_MaxV:
 3344     case Op_MinV:
 3345       if (UseSSE < 4 && is_integral_type(bt)) {
 3346         return false;
 3347       }
 3348       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3349           // Float/Double intrinsics are enabled for AVX family currently.
 3350           if (UseAVX == 0) {
 3351             return false;
 3352           }
 3353           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3354             return false;
 3355           }
 3356       }
 3357       break;
 3358     case Op_CallLeafVector:
 3359       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3360         return false;
 3361       }
 3362       break;
 3363     case Op_AddReductionVI:
 3364       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3365         return false;
 3366       }
 3367       // fallthrough
 3368     case Op_AndReductionV:
 3369     case Op_OrReductionV:
 3370     case Op_XorReductionV:
 3371       if (is_subword_type(bt) && (UseSSE < 4)) {
 3372         return false;
 3373       }
 3374       break;
 3375     case Op_MinReductionV:
 3376     case Op_MaxReductionV:
 3377       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3378         return false;
 3379       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3380         return false;
 3381       }
 3382       // Float/Double intrinsics enabled for AVX family.
 3383       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3384         return false;
 3385       }
 3386       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3387         return false;
 3388       }
 3389       break;
 3390     case Op_VectorBlend:
 3391       if (UseAVX == 0 && size_in_bits < 128) {
 3392         return false;
 3393       }
 3394       break;
 3395     case Op_VectorTest:
 3396       if (UseSSE < 4) {
 3397         return false; // Implementation limitation
 3398       } else if (size_in_bits < 32) {
 3399         return false; // Implementation limitation
 3400       }
 3401       break;
 3402     case Op_VectorLoadShuffle:
 3403     case Op_VectorRearrange:
 3404       if(vlen == 2) {
 3405         return false; // Implementation limitation due to how shuffle is loaded
 3406       } else if (size_in_bits == 256 && UseAVX < 2) {
 3407         return false; // Implementation limitation
 3408       }
 3409       break;
 3410     case Op_VectorLoadMask:
 3411     case Op_VectorMaskCast:
 3412       if (size_in_bits == 256 && UseAVX < 2) {
 3413         return false; // Implementation limitation
 3414       }
 3415       // fallthrough
 3416     case Op_VectorStoreMask:
 3417       if (vlen == 2) {
 3418         return false; // Implementation limitation
 3419       }
 3420       break;
 3421     case Op_PopulateIndex:
 3422       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3423         return false;
 3424       }
 3425       break;
 3426     case Op_VectorCastB2X:
 3427     case Op_VectorCastS2X:
 3428     case Op_VectorCastI2X:
 3429       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3430         return false;
 3431       }
 3432       break;
 3433     case Op_VectorCastL2X:
 3434       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3435         return false;
 3436       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3437         return false;
 3438       }
 3439       break;
 3440     case Op_VectorCastF2X: {
 3441         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3442         // happen after intermediate conversion to integer and special handling
 3443         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3444         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3445         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3446           return false;
 3447         }
 3448       }
 3449       // fallthrough
 3450     case Op_VectorCastD2X:
 3451       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3452         return false;
 3453       }
 3454       break;
 3455     case Op_VectorCastF2HF:
 3456     case Op_VectorCastHF2F:
 3457       if (!VM_Version::supports_f16c() &&
 3458          ((!VM_Version::supports_evex() ||
 3459          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3460         return false;
 3461       }
 3462       break;
 3463     case Op_RoundVD:
 3464       if (!VM_Version::supports_avx512dq()) {
 3465         return false;
 3466       }
 3467       break;
 3468     case Op_MulReductionVI:
 3469       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3470         return false;
 3471       }
 3472       break;
 3473     case Op_LoadVectorGatherMasked:
 3474       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3475         return false;
 3476       }
 3477       if (is_subword_type(bt) &&
 3478          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3479           (size_in_bits < 64)                                      ||
 3480           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3481         return false;
 3482       }
 3483       break;
 3484     case Op_StoreVectorScatterMasked:
 3485     case Op_StoreVectorScatter:
 3486       if (is_subword_type(bt)) {
 3487         return false;
 3488       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3489         return false;
 3490       }
 3491       // fallthrough
 3492     case Op_LoadVectorGather:
 3493       if (!is_subword_type(bt) && size_in_bits == 64) {
 3494         return false;
 3495       }
 3496       if (is_subword_type(bt) && size_in_bits < 64) {
 3497         return false;
 3498       }
 3499       break;
 3500     case Op_SaturatingAddV:
 3501     case Op_SaturatingSubV:
 3502       if (UseAVX < 1) {
 3503         return false; // Implementation limitation
 3504       }
 3505       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3506         return false;
 3507       }
 3508       break;
 3509     case Op_SelectFromTwoVector:
 3510        if (size_in_bits < 128) {
 3511          return false;
 3512        }
 3513        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3514          return false;
 3515        }
 3516        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3517          return false;
 3518        }
 3519        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3520          return false;
 3521        }
 3522        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3523          return false;
 3524        }
 3525        break;
 3526     case Op_MaskAll:
 3527       if (!VM_Version::supports_evex()) {
 3528         return false;
 3529       }
 3530       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3531         return false;
 3532       }
 3533       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3534         return false;
 3535       }
 3536       break;
 3537     case Op_VectorMaskCmp:
 3538       if (vlen < 2 || size_in_bits < 32) {
 3539         return false;
 3540       }
 3541       break;
 3542     case Op_CompressM:
 3543       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3544         return false;
 3545       }
 3546       break;
 3547     case Op_CompressV:
 3548     case Op_ExpandV:
 3549       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3550         return false;
 3551       }
 3552       if (size_in_bits < 128 ) {
 3553         return false;
 3554       }
 3555     case Op_VectorLongToMask:
 3556       if (UseAVX < 1) {
 3557         return false;
 3558       }
 3559       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3560         return false;
 3561       }
 3562       break;
 3563     case Op_SignumVD:
 3564     case Op_SignumVF:
 3565       if (UseAVX < 1) {
 3566         return false;
 3567       }
 3568       break;
 3569     case Op_PopCountVI:
 3570     case Op_PopCountVL: {
 3571         if (!is_pop_count_instr_target(bt) &&
 3572             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3573           return false;
 3574         }
 3575       }
 3576       break;
 3577     case Op_ReverseV:
 3578     case Op_ReverseBytesV:
 3579       if (UseAVX < 2) {
 3580         return false;
 3581       }
 3582       break;
 3583     case Op_CountTrailingZerosV:
 3584     case Op_CountLeadingZerosV:
 3585       if (UseAVX < 2) {
 3586         return false;
 3587       }
 3588       break;
 3589   }
 3590   return true;  // Per default match rules are supported.
 3591 }
 3592 
 3593 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3594   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3595   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3596   // of their non-masked counterpart with mask edge being the differentiator.
 3597   // This routine does a strict check on the existence of masked operation patterns
 3598   // by returning a default false value for all the other opcodes apart from the
 3599   // ones whose masked instruction patterns are defined in this file.
 3600   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3601     return false;
 3602   }
 3603 
 3604   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3605   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3606     return false;
 3607   }
 3608   switch(opcode) {
 3609     // Unary masked operations
 3610     case Op_AbsVB:
 3611     case Op_AbsVS:
 3612       if(!VM_Version::supports_avx512bw()) {
 3613         return false;  // Implementation limitation
 3614       }
 3615     case Op_AbsVI:
 3616     case Op_AbsVL:
 3617       return true;
 3618 
 3619     // Ternary masked operations
 3620     case Op_FmaVF:
 3621     case Op_FmaVD:
 3622       return true;
 3623 
 3624     case Op_MacroLogicV:
 3625       if(bt != T_INT && bt != T_LONG) {
 3626         return false;
 3627       }
 3628       return true;
 3629 
 3630     // Binary masked operations
 3631     case Op_AddVB:
 3632     case Op_AddVS:
 3633     case Op_SubVB:
 3634     case Op_SubVS:
 3635     case Op_MulVS:
 3636     case Op_LShiftVS:
 3637     case Op_RShiftVS:
 3638     case Op_URShiftVS:
 3639       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3640       if (!VM_Version::supports_avx512bw()) {
 3641         return false;  // Implementation limitation
 3642       }
 3643       return true;
 3644 
 3645     case Op_MulVL:
 3646       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3647       if (!VM_Version::supports_avx512dq()) {
 3648         return false;  // Implementation limitation
 3649       }
 3650       return true;
 3651 
 3652     case Op_AndV:
 3653     case Op_OrV:
 3654     case Op_XorV:
 3655     case Op_RotateRightV:
 3656     case Op_RotateLeftV:
 3657       if (bt != T_INT && bt != T_LONG) {
 3658         return false; // Implementation limitation
 3659       }
 3660       return true;
 3661 
 3662     case Op_VectorLoadMask:
 3663       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3664       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3665         return false;
 3666       }
 3667       return true;
 3668 
 3669     case Op_AddVI:
 3670     case Op_AddVL:
 3671     case Op_AddVF:
 3672     case Op_AddVD:
 3673     case Op_SubVI:
 3674     case Op_SubVL:
 3675     case Op_SubVF:
 3676     case Op_SubVD:
 3677     case Op_MulVI:
 3678     case Op_MulVF:
 3679     case Op_MulVD:
 3680     case Op_DivVF:
 3681     case Op_DivVD:
 3682     case Op_SqrtVF:
 3683     case Op_SqrtVD:
 3684     case Op_LShiftVI:
 3685     case Op_LShiftVL:
 3686     case Op_RShiftVI:
 3687     case Op_RShiftVL:
 3688     case Op_URShiftVI:
 3689     case Op_URShiftVL:
 3690     case Op_LoadVectorMasked:
 3691     case Op_StoreVectorMasked:
 3692     case Op_LoadVectorGatherMasked:
 3693     case Op_StoreVectorScatterMasked:
 3694       return true;
 3695 
 3696     case Op_UMinV:
 3697     case Op_UMaxV:
 3698       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3699         return false;
 3700       } // fallthrough
 3701     case Op_MaxV:
 3702     case Op_MinV:
 3703       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3704         return false; // Implementation limitation
 3705       }
 3706       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3707         return false; // Implementation limitation
 3708       }
 3709       return true;
 3710     case Op_SaturatingAddV:
 3711     case Op_SaturatingSubV:
 3712       if (!is_subword_type(bt)) {
 3713         return false;
 3714       }
 3715       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3716         return false; // Implementation limitation
 3717       }
 3718       return true;
 3719 
 3720     case Op_VectorMaskCmp:
 3721       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3722         return false; // Implementation limitation
 3723       }
 3724       return true;
 3725 
 3726     case Op_VectorRearrange:
 3727       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3728         return false; // Implementation limitation
 3729       }
 3730       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3731         return false; // Implementation limitation
 3732       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3733         return false; // Implementation limitation
 3734       }
 3735       return true;
 3736 
 3737     // Binary Logical operations
 3738     case Op_AndVMask:
 3739     case Op_OrVMask:
 3740     case Op_XorVMask:
 3741       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3742         return false; // Implementation limitation
 3743       }
 3744       return true;
 3745 
 3746     case Op_PopCountVI:
 3747     case Op_PopCountVL:
 3748       if (!is_pop_count_instr_target(bt)) {
 3749         return false;
 3750       }
 3751       return true;
 3752 
 3753     case Op_MaskAll:
 3754       return true;
 3755 
 3756     case Op_CountLeadingZerosV:
 3757       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3758         return true;
 3759       }
 3760     default:
 3761       return false;
 3762   }
 3763 }
 3764 
 3765 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3766   return false;
 3767 }
 3768 
 3769 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3770 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3771   switch (elem_bt) {
 3772     case T_BYTE:  return false;
 3773     case T_SHORT: return !VM_Version::supports_avx512bw();
 3774     case T_INT:   return !VM_Version::supports_avx();
 3775     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3776     default:
 3777       ShouldNotReachHere();
 3778       return false;
 3779   }
 3780 }
 3781 
 3782 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 3783   // Prefer predicate if the mask type is "TypePVectMask".
 3784   return vt->isa_pvectmask() != nullptr;
 3785 }
 3786 
 3787 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3788   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3789   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3790   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3791       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3792     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3793     return new legVecZOper();
 3794   }
 3795   if (legacy) {
 3796     switch (ideal_reg) {
 3797       case Op_VecS: return new legVecSOper();
 3798       case Op_VecD: return new legVecDOper();
 3799       case Op_VecX: return new legVecXOper();
 3800       case Op_VecY: return new legVecYOper();
 3801       case Op_VecZ: return new legVecZOper();
 3802     }
 3803   } else {
 3804     switch (ideal_reg) {
 3805       case Op_VecS: return new vecSOper();
 3806       case Op_VecD: return new vecDOper();
 3807       case Op_VecX: return new vecXOper();
 3808       case Op_VecY: return new vecYOper();
 3809       case Op_VecZ: return new vecZOper();
 3810     }
 3811   }
 3812   ShouldNotReachHere();
 3813   return nullptr;
 3814 }
 3815 
 3816 bool Matcher::is_reg2reg_move(MachNode* m) {
 3817   switch (m->rule()) {
 3818     case MoveVec2Leg_rule:
 3819     case MoveLeg2Vec_rule:
 3820     case MoveF2VL_rule:
 3821     case MoveF2LEG_rule:
 3822     case MoveVL2F_rule:
 3823     case MoveLEG2F_rule:
 3824     case MoveD2VL_rule:
 3825     case MoveD2LEG_rule:
 3826     case MoveVL2D_rule:
 3827     case MoveLEG2D_rule:
 3828       return true;
 3829     default:
 3830       return false;
 3831   }
 3832 }
 3833 
 3834 bool Matcher::is_generic_vector(MachOper* opnd) {
 3835   switch (opnd->opcode()) {
 3836     case VEC:
 3837     case LEGVEC:
 3838       return true;
 3839     default:
 3840       return false;
 3841   }
 3842 }
 3843 
 3844 //------------------------------------------------------------------------
 3845 
 3846 const RegMask* Matcher::predicate_reg_mask(void) {
 3847   return &_VECTMASK_REG_mask;
 3848 }
 3849 
 3850 // Max vector size in bytes. 0 if not supported.
 3851 int Matcher::vector_width_in_bytes(BasicType bt) {
 3852   assert(is_java_primitive(bt), "only primitive type vectors");
 3853   // SSE2 supports 128bit vectors for all types.
 3854   // AVX2 supports 256bit vectors for all types.
 3855   // AVX2/EVEX supports 512bit vectors for all types.
 3856   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3857   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3858   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3859     size = (UseAVX > 2) ? 64 : 32;
 3860   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3861     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3862   // Use flag to limit vector size.
 3863   size = MIN2(size,(int)MaxVectorSize);
 3864   // Minimum 2 values in vector (or 4 for bytes).
 3865   switch (bt) {
 3866   case T_DOUBLE:
 3867   case T_LONG:
 3868     if (size < 16) return 0;
 3869     break;
 3870   case T_FLOAT:
 3871   case T_INT:
 3872     if (size < 8) return 0;
 3873     break;
 3874   case T_BOOLEAN:
 3875     if (size < 4) return 0;
 3876     break;
 3877   case T_CHAR:
 3878     if (size < 4) return 0;
 3879     break;
 3880   case T_BYTE:
 3881     if (size < 4) return 0;
 3882     break;
 3883   case T_SHORT:
 3884     if (size < 4) return 0;
 3885     break;
 3886   default:
 3887     ShouldNotReachHere();
 3888   }
 3889   return size;
 3890 }
 3891 
 3892 // Limits on vector size (number of elements) loaded into vector.
 3893 int Matcher::max_vector_size(const BasicType bt) {
 3894   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3895 }
 3896 int Matcher::min_vector_size(const BasicType bt) {
 3897   int max_size = max_vector_size(bt);
 3898   // Min size which can be loaded into vector is 4 bytes.
 3899   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3900   // Support for calling svml double64 vectors
 3901   if (bt == T_DOUBLE) {
 3902     size = 1;
 3903   }
 3904   return MIN2(size,max_size);
 3905 }
 3906 
 3907 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3908   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3909   // by default on Cascade Lake
 3910   if (VM_Version::is_default_intel_cascade_lake()) {
 3911     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3912   }
 3913   return Matcher::max_vector_size(bt);
 3914 }
 3915 
 3916 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3917   return -1;
 3918 }
 3919 
 3920 // Vector ideal reg corresponding to specified size in bytes
 3921 uint Matcher::vector_ideal_reg(int size) {
 3922   assert(MaxVectorSize >= size, "");
 3923   switch(size) {
 3924     case  4: return Op_VecS;
 3925     case  8: return Op_VecD;
 3926     case 16: return Op_VecX;
 3927     case 32: return Op_VecY;
 3928     case 64: return Op_VecZ;
 3929   }
 3930   ShouldNotReachHere();
 3931   return 0;
 3932 }
 3933 
 3934 // Check for shift by small constant as well
 3935 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3936   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3937       shift->in(2)->get_int() <= 3 &&
 3938       // Are there other uses besides address expressions?
 3939       !matcher->is_visited(shift)) {
 3940     address_visited.set(shift->_idx); // Flag as address_visited
 3941     mstack.push(shift->in(2), Matcher::Visit);
 3942     Node *conv = shift->in(1);
 3943     // Allow Matcher to match the rule which bypass
 3944     // ConvI2L operation for an array index on LP64
 3945     // if the index value is positive.
 3946     if (conv->Opcode() == Op_ConvI2L &&
 3947         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3948         // Are there other uses besides address expressions?
 3949         !matcher->is_visited(conv)) {
 3950       address_visited.set(conv->_idx); // Flag as address_visited
 3951       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3952     } else {
 3953       mstack.push(conv, Matcher::Pre_Visit);
 3954     }
 3955     return true;
 3956   }
 3957   return false;
 3958 }
 3959 
 3960 // This function identifies sub-graphs in which a 'load' node is
 3961 // input to two different nodes, and such that it can be matched
 3962 // with BMI instructions like blsi, blsr, etc.
 3963 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3964 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3965 // refers to the same node.
 3966 //
 3967 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3968 // This is a temporary solution until we make DAGs expressible in ADL.
 3969 template<typename ConType>
 3970 class FusedPatternMatcher {
 3971   Node* _op1_node;
 3972   Node* _mop_node;
 3973   int _con_op;
 3974 
 3975   static int match_next(Node* n, int next_op, int next_op_idx) {
 3976     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3977       return -1;
 3978     }
 3979 
 3980     if (next_op_idx == -1) { // n is commutative, try rotations
 3981       if (n->in(1)->Opcode() == next_op) {
 3982         return 1;
 3983       } else if (n->in(2)->Opcode() == next_op) {
 3984         return 2;
 3985       }
 3986     } else {
 3987       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 3988       if (n->in(next_op_idx)->Opcode() == next_op) {
 3989         return next_op_idx;
 3990       }
 3991     }
 3992     return -1;
 3993   }
 3994 
 3995  public:
 3996   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 3997     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 3998 
 3999   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 4000              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 4001              typename ConType::NativeType con_value) {
 4002     if (_op1_node->Opcode() != op1) {
 4003       return false;
 4004     }
 4005     if (_mop_node->outcnt() > 2) {
 4006       return false;
 4007     }
 4008     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 4009     if (op1_op2_idx == -1) {
 4010       return false;
 4011     }
 4012     // Memory operation must be the other edge
 4013     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 4014 
 4015     // Check that the mop node is really what we want
 4016     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 4017       Node* op2_node = _op1_node->in(op1_op2_idx);
 4018       if (op2_node->outcnt() > 1) {
 4019         return false;
 4020       }
 4021       assert(op2_node->Opcode() == op2, "Should be");
 4022       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 4023       if (op2_con_idx == -1) {
 4024         return false;
 4025       }
 4026       // Memory operation must be the other edge
 4027       int op2_mop_idx = (op2_con_idx & 1) + 1;
 4028       // Check that the memory operation is the same node
 4029       if (op2_node->in(op2_mop_idx) == _mop_node) {
 4030         // Now check the constant
 4031         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 4032         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 4033           return true;
 4034         }
 4035       }
 4036     }
 4037     return false;
 4038   }
 4039 };
 4040 
 4041 static bool is_bmi_pattern(Node* n, Node* m) {
 4042   assert(VM_Version::supports_bmi1() && VM_Version::supports_avx(), "sanity");
 4043   if (n != nullptr && m != nullptr) {
 4044     if (m->Opcode() == Op_LoadI) {
 4045       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 4046       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 4047              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 4048              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 4049     } else if (m->Opcode() == Op_LoadL) {
 4050       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 4051       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 4052              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 4053              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 4054     }
 4055   }
 4056   return false;
 4057 }
 4058 
 4059 // Should the matcher clone input 'm' of node 'n'?
 4060 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 4061   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 4062   if (VM_Version::supports_bmi1() && VM_Version::supports_avx() && is_bmi_pattern(n, m)) {
 4063     mstack.push(m, Visit);
 4064     return true;
 4065   }
 4066   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 4067     mstack.push(m, Visit);           // m = ShiftCntV
 4068     return true;
 4069   }
 4070   if (is_encode_and_store_pattern(n, m)) {
 4071     mstack.push(m, Visit);
 4072     return true;
 4073   }
 4074   return false;
 4075 }
 4076 
 4077 // Should the Matcher clone shifts on addressing modes, expecting them
 4078 // to be subsumed into complex addressing expressions or compute them
 4079 // into registers?
 4080 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4081   Node *off = m->in(AddPNode::Offset);
 4082   if (off->is_Con()) {
 4083     address_visited.test_set(m->_idx); // Flag as address_visited
 4084     Node *adr = m->in(AddPNode::Address);
 4085 
 4086     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4087     // AtomicAdd is not an addressing expression.
 4088     // Cheap to find it by looking for screwy base.
 4089     if (adr->is_AddP() &&
 4090         !adr->in(AddPNode::Base)->is_top() &&
 4091         !adr->in(AddPNode::Offset)->is_Con() &&
 4092         off->get_long() == (int) (off->get_long()) && // immL32
 4093         // Are there other uses besides address expressions?
 4094         !is_visited(adr)) {
 4095       address_visited.set(adr->_idx); // Flag as address_visited
 4096       Node *shift = adr->in(AddPNode::Offset);
 4097       if (!clone_shift(shift, this, mstack, address_visited)) {
 4098         mstack.push(shift, Pre_Visit);
 4099       }
 4100       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4101       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4102     } else {
 4103       mstack.push(adr, Pre_Visit);
 4104     }
 4105 
 4106     // Clone X+offset as it also folds into most addressing expressions
 4107     mstack.push(off, Visit);
 4108     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4109     return true;
 4110   } else if (clone_shift(off, this, mstack, address_visited)) {
 4111     address_visited.test_set(m->_idx); // Flag as address_visited
 4112     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4113     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4114     return true;
 4115   }
 4116   return false;
 4117 }
 4118 
 4119 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4120   switch (bt) {
 4121     case BoolTest::eq:
 4122       return Assembler::eq;
 4123     case BoolTest::ne:
 4124       return Assembler::neq;
 4125     case BoolTest::le:
 4126     case BoolTest::ule:
 4127       return Assembler::le;
 4128     case BoolTest::ge:
 4129     case BoolTest::uge:
 4130       return Assembler::nlt;
 4131     case BoolTest::lt:
 4132     case BoolTest::ult:
 4133       return Assembler::lt;
 4134     case BoolTest::gt:
 4135     case BoolTest::ugt:
 4136       return Assembler::nle;
 4137     default : ShouldNotReachHere(); return Assembler::_false;
 4138   }
 4139 }
 4140 
 4141 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4142   switch (bt) {
 4143   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4144   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4145   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4146   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4147   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4148   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4149   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4150   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4151   }
 4152 }
 4153 
 4154 // Helper methods for MachSpillCopyNode::implementation().
 4155 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4156                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4157   assert(ireg == Op_VecS || // 32bit vector
 4158          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4159           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4160          "no non-adjacent vector moves" );
 4161   if (masm) {
 4162     switch (ireg) {
 4163     case Op_VecS: // copy whole register
 4164     case Op_VecD:
 4165     case Op_VecX:
 4166       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4167         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4168       } else {
 4169         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4170      }
 4171       break;
 4172     case Op_VecY:
 4173       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4174         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4175       } else {
 4176         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4177      }
 4178       break;
 4179     case Op_VecZ:
 4180       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4181       break;
 4182     default:
 4183       ShouldNotReachHere();
 4184     }
 4185 #ifndef PRODUCT
 4186   } else {
 4187     switch (ireg) {
 4188     case Op_VecS:
 4189     case Op_VecD:
 4190     case Op_VecX:
 4191       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4192       break;
 4193     case Op_VecY:
 4194     case Op_VecZ:
 4195       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4196       break;
 4197     default:
 4198       ShouldNotReachHere();
 4199     }
 4200 #endif
 4201   }
 4202 }
 4203 
 4204 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4205                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4206   if (masm) {
 4207     if (is_load) {
 4208       switch (ireg) {
 4209       case Op_VecS:
 4210         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4211         break;
 4212       case Op_VecD:
 4213         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4214         break;
 4215       case Op_VecX:
 4216         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4217           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4218         } else {
 4219           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4220           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4221         }
 4222         break;
 4223       case Op_VecY:
 4224         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4225           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4226         } else {
 4227           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4228           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4229         }
 4230         break;
 4231       case Op_VecZ:
 4232         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4233         break;
 4234       default:
 4235         ShouldNotReachHere();
 4236       }
 4237     } else { // store
 4238       switch (ireg) {
 4239       case Op_VecS:
 4240         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4241         break;
 4242       case Op_VecD:
 4243         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4244         break;
 4245       case Op_VecX:
 4246         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4247           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4248         }
 4249         else {
 4250           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4251         }
 4252         break;
 4253       case Op_VecY:
 4254         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4255           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4256         }
 4257         else {
 4258           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4259         }
 4260         break;
 4261       case Op_VecZ:
 4262         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4263         break;
 4264       default:
 4265         ShouldNotReachHere();
 4266       }
 4267     }
 4268 #ifndef PRODUCT
 4269   } else {
 4270     if (is_load) {
 4271       switch (ireg) {
 4272       case Op_VecS:
 4273         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4274         break;
 4275       case Op_VecD:
 4276         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4277         break;
 4278        case Op_VecX:
 4279         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4280         break;
 4281       case Op_VecY:
 4282       case Op_VecZ:
 4283         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4284         break;
 4285       default:
 4286         ShouldNotReachHere();
 4287       }
 4288     } else { // store
 4289       switch (ireg) {
 4290       case Op_VecS:
 4291         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4292         break;
 4293       case Op_VecD:
 4294         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4295         break;
 4296        case Op_VecX:
 4297         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4298         break;
 4299       case Op_VecY:
 4300       case Op_VecZ:
 4301         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4302         break;
 4303       default:
 4304         ShouldNotReachHere();
 4305       }
 4306     }
 4307 #endif
 4308   }
 4309 }
 4310 
 4311 template <class T>
 4312 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4313   int size = type2aelembytes(bt) * len;
 4314   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4315   for (int i = 0; i < len; i++) {
 4316     int offset = i * type2aelembytes(bt);
 4317     switch (bt) {
 4318       case T_BYTE: val->at(i) = con; break;
 4319       case T_SHORT: {
 4320         jshort c = con;
 4321         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4322         break;
 4323       }
 4324       case T_INT: {
 4325         jint c = con;
 4326         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4327         break;
 4328       }
 4329       case T_LONG: {
 4330         jlong c = con;
 4331         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4332         break;
 4333       }
 4334       case T_FLOAT: {
 4335         jfloat c = con;
 4336         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4337         break;
 4338       }
 4339       case T_DOUBLE: {
 4340         jdouble c = con;
 4341         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4342         break;
 4343       }
 4344       default: assert(false, "%s", type2name(bt));
 4345     }
 4346   }
 4347   return val;
 4348 }
 4349 
 4350 static inline jlong high_bit_set(BasicType bt) {
 4351   switch (bt) {
 4352     case T_BYTE:  return 0x8080808080808080;
 4353     case T_SHORT: return 0x8000800080008000;
 4354     case T_INT:   return 0x8000000080000000;
 4355     case T_LONG:  return 0x8000000000000000;
 4356     default:
 4357       ShouldNotReachHere();
 4358       return 0;
 4359   }
 4360 }
 4361 
 4362 #ifndef PRODUCT
 4363   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4364     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4365   }
 4366 #endif
 4367 
 4368   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4369     __ nop(_count);
 4370   }
 4371 
 4372   uint MachNopNode::size(PhaseRegAlloc*) const {
 4373     return _count;
 4374   }
 4375 
 4376 #ifndef PRODUCT
 4377   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4378     st->print("# breakpoint");
 4379   }
 4380 #endif
 4381 
 4382   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4383     __ int3();
 4384   }
 4385 
 4386   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4387     return MachNode::size(ra_);
 4388   }
 4389 
 4390 %}
 4391 
 4392 //----------ENCODING BLOCK-----------------------------------------------------
 4393 // This block specifies the encoding classes used by the compiler to
 4394 // output byte streams.  Encoding classes are parameterized macros
 4395 // used by Machine Instruction Nodes in order to generate the bit
 4396 // encoding of the instruction.  Operands specify their base encoding
 4397 // interface with the interface keyword.  There are currently
 4398 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4399 // COND_INTER.  REG_INTER causes an operand to generate a function
 4400 // which returns its register number when queried.  CONST_INTER causes
 4401 // an operand to generate a function which returns the value of the
 4402 // constant when queried.  MEMORY_INTER causes an operand to generate
 4403 // four functions which return the Base Register, the Index Register,
 4404 // the Scale Value, and the Offset Value of the operand when queried.
 4405 // COND_INTER causes an operand to generate six functions which return
 4406 // the encoding code (ie - encoding bits for the instruction)
 4407 // associated with each basic boolean condition for a conditional
 4408 // instruction.
 4409 //
 4410 // Instructions specify two basic values for encoding.  Again, a
 4411 // function is available to check if the constant displacement is an
 4412 // oop. They use the ins_encode keyword to specify their encoding
 4413 // classes (which must be a sequence of enc_class names, and their
 4414 // parameters, specified in the encoding block), and they use the
 4415 // opcode keyword to specify, in order, their primary, secondary, and
 4416 // tertiary opcode.  Only the opcode sections which a particular
 4417 // instruction needs for encoding need to be specified.
 4418 encode %{
 4419   enc_class cdql_enc(no_rax_rdx_RegI div)
 4420   %{
 4421     // Full implementation of Java idiv and irem; checks for
 4422     // special case as described in JVM spec., p.243 & p.271.
 4423     //
 4424     //         normal case                           special case
 4425     //
 4426     // input : rax: dividend                         min_int
 4427     //         reg: divisor                          -1
 4428     //
 4429     // output: rax: quotient  (= rax idiv reg)       min_int
 4430     //         rdx: remainder (= rax irem reg)       0
 4431     //
 4432     //  Code sequnce:
 4433     //
 4434     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4435     //    5:   75 07/08                jne    e <normal>
 4436     //    7:   33 d2                   xor    %edx,%edx
 4437     //  [div >= 8 -> offset + 1]
 4438     //  [REX_B]
 4439     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4440     //    c:   74 03/04                je     11 <done>
 4441     // 000000000000000e <normal>:
 4442     //    e:   99                      cltd
 4443     //  [div >= 8 -> offset + 1]
 4444     //  [REX_B]
 4445     //    f:   f7 f9                   idiv   $div
 4446     // 0000000000000011 <done>:
 4447     Label normal;
 4448     Label done;
 4449 
 4450     // cmp    $0x80000000,%eax
 4451     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4452 
 4453     // jne    e <normal>
 4454     __ jccb(Assembler::notEqual, normal);
 4455 
 4456     // xor    %edx,%edx
 4457     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4458 
 4459     // cmp    $0xffffffffffffffff,%ecx
 4460     __ cmpl($div$$Register, -1);
 4461 
 4462     // je     11 <done>
 4463     __ jccb(Assembler::equal, done);
 4464 
 4465     // <normal>
 4466     // cltd
 4467     __ bind(normal);
 4468     __ cdql();
 4469 
 4470     // idivl
 4471     // <done>
 4472     __ idivl($div$$Register);
 4473     __ bind(done);
 4474   %}
 4475 
 4476   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4477   %{
 4478     // Full implementation of Java ldiv and lrem; checks for
 4479     // special case as described in JVM spec., p.243 & p.271.
 4480     //
 4481     //         normal case                           special case
 4482     //
 4483     // input : rax: dividend                         min_long
 4484     //         reg: divisor                          -1
 4485     //
 4486     // output: rax: quotient  (= rax idiv reg)       min_long
 4487     //         rdx: remainder (= rax irem reg)       0
 4488     //
 4489     //  Code sequnce:
 4490     //
 4491     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4492     //    7:   00 00 80
 4493     //    a:   48 39 d0                cmp    %rdx,%rax
 4494     //    d:   75 08                   jne    17 <normal>
 4495     //    f:   33 d2                   xor    %edx,%edx
 4496     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4497     //   15:   74 05                   je     1c <done>
 4498     // 0000000000000017 <normal>:
 4499     //   17:   48 99                   cqto
 4500     //   19:   48 f7 f9                idiv   $div
 4501     // 000000000000001c <done>:
 4502     Label normal;
 4503     Label done;
 4504 
 4505     // mov    $0x8000000000000000,%rdx
 4506     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4507 
 4508     // cmp    %rdx,%rax
 4509     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4510 
 4511     // jne    17 <normal>
 4512     __ jccb(Assembler::notEqual, normal);
 4513 
 4514     // xor    %edx,%edx
 4515     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4516 
 4517     // cmp    $0xffffffffffffffff,$div
 4518     __ cmpq($div$$Register, -1);
 4519 
 4520     // je     1e <done>
 4521     __ jccb(Assembler::equal, done);
 4522 
 4523     // <normal>
 4524     // cqto
 4525     __ bind(normal);
 4526     __ cdqq();
 4527 
 4528     // idivq (note: must be emitted by the user of this rule)
 4529     // <done>
 4530     __ idivq($div$$Register);
 4531     __ bind(done);
 4532   %}
 4533 
 4534   enc_class clear_avx %{
 4535     DEBUG_ONLY(int off0 = __ offset());
 4536     if (generate_vzeroupper(Compile::current())) {
 4537       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4538       // Clear upper bits of YMM registers when current compiled code uses
 4539       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4540       __ vzeroupper();
 4541     }
 4542     DEBUG_ONLY(int off1 = __ offset());
 4543     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4544   %}
 4545 
 4546   enc_class Java_To_Runtime(method meth) %{
 4547     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4548     __ call(r10);
 4549     __ post_call_nop();
 4550   %}
 4551 
 4552   enc_class Java_Static_Call(method meth)
 4553   %{
 4554     // JAVA STATIC CALL
 4555     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4556     // determine who we intended to call.
 4557     if (!_method) {
 4558       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4559     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4560       // The NOP here is purely to ensure that eliding a call to
 4561       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4562       __ nop(5);
 4563       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4564     } else {
 4565       int method_index = resolved_method_index(masm);
 4566       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4567                                                   : static_call_Relocation::spec(method_index);
 4568       address mark = __ pc();
 4569       int call_offset = __ offset();
 4570       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4571       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4572         // Calls of the same statically bound method can share
 4573         // a stub to the interpreter.
 4574         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4575       } else {
 4576         // Emit stubs for static call.
 4577         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4578         __ clear_inst_mark();
 4579         if (stub == nullptr) {
 4580           ciEnv::current()->record_failure("CodeCache is full");
 4581           return;
 4582         }
 4583       }
 4584     }
 4585     __ post_call_nop();
 4586   %}
 4587 
 4588   enc_class Java_Dynamic_Call(method meth) %{
 4589     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4590     __ post_call_nop();
 4591   %}
 4592 
 4593   enc_class call_epilog %{
 4594     if (VerifyStackAtCalls) {
 4595       // Check that stack depth is unchanged: find majik cookie on stack
 4596       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4597       Label L;
 4598       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4599       __ jccb(Assembler::equal, L);
 4600       // Die if stack mismatch
 4601       __ int3();
 4602       __ bind(L);
 4603     }
 4604   %}
 4605 
 4606 %}
 4607 
 4608 //----------FRAME--------------------------------------------------------------
 4609 // Definition of frame structure and management information.
 4610 //
 4611 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4612 //                             |   (to get allocators register number
 4613 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4614 //  r   CALLER     |        |
 4615 //  o     |        +--------+      pad to even-align allocators stack-slot
 4616 //  w     V        |  pad0  |        numbers; owned by CALLER
 4617 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4618 //  h     ^        |   in   |  5
 4619 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4620 //  |     |        |        |  3
 4621 //  |     |        +--------+
 4622 //  V     |        | old out|      Empty on Intel, window on Sparc
 4623 //        |    old |preserve|      Must be even aligned.
 4624 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4625 //        |        |   in   |  3   area for Intel ret address
 4626 //     Owned by    |preserve|      Empty on Sparc.
 4627 //       SELF      +--------+
 4628 //        |        |  pad2  |  2   pad to align old SP
 4629 //        |        +--------+  1
 4630 //        |        | locks  |  0
 4631 //        |        +--------+----> OptoReg::stack0(), even aligned
 4632 //        |        |  pad1  | 11   pad to align new SP
 4633 //        |        +--------+
 4634 //        |        |        | 10
 4635 //        |        | spills |  9   spills
 4636 //        V        |        |  8   (pad0 slot for callee)
 4637 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4638 //        ^        |  out   |  7
 4639 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4640 //     Owned by    +--------+
 4641 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4642 //        |    new |preserve|      Must be even-aligned.
 4643 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4644 //        |        |        |
 4645 //
 4646 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4647 //         known from SELF's arguments and the Java calling convention.
 4648 //         Region 6-7 is determined per call site.
 4649 // Note 2: If the calling convention leaves holes in the incoming argument
 4650 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4651 //         are owned by the CALLEE.  Holes should not be necessary in the
 4652 //         incoming area, as the Java calling convention is completely under
 4653 //         the control of the AD file.  Doubles can be sorted and packed to
 4654 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4655 //         varargs C calling conventions.
 4656 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4657 //         even aligned with pad0 as needed.
 4658 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4659 //         region 6-11 is even aligned; it may be padded out more so that
 4660 //         the region from SP to FP meets the minimum stack alignment.
 4661 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4662 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4663 //         SP meets the minimum alignment.
 4664 
 4665 frame
 4666 %{
 4667   // These three registers define part of the calling convention
 4668   // between compiled code and the interpreter.
 4669   inline_cache_reg(RAX);                // Inline Cache Register
 4670 
 4671   // Optional: name the operand used by cisc-spilling to access
 4672   // [stack_pointer + offset]
 4673   cisc_spilling_operand_name(indOffset32);
 4674 
 4675   // Number of stack slots consumed by locking an object
 4676   sync_stack_slots(2);
 4677 
 4678   // Compiled code's Frame Pointer
 4679   frame_pointer(RSP);
 4680 
 4681   // Stack alignment requirement
 4682   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4683 
 4684   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4685   // for calls to C.  Supports the var-args backing area for register parms.
 4686   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4687 
 4688   // The after-PROLOG location of the return address.  Location of
 4689   // return address specifies a type (REG or STACK) and a number
 4690   // representing the register number (i.e. - use a register name) or
 4691   // stack slot.
 4692   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4693   // Otherwise, it is above the locks and verification slot and alignment word
 4694   return_addr(STACK - 2 +
 4695               align_up((Compile::current()->in_preserve_stack_slots() +
 4696                         Compile::current()->fixed_slots()),
 4697                        stack_alignment_in_slots()));
 4698 
 4699   // Location of compiled Java return values.  Same as C for now.
 4700   return_value
 4701   %{
 4702     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4703            "only return normal values");
 4704 
 4705     static const int lo[Op_RegL + 1] = {
 4706       0,
 4707       0,
 4708       RAX_num,  // Op_RegN
 4709       RAX_num,  // Op_RegI
 4710       RAX_num,  // Op_RegP
 4711       XMM0_num, // Op_RegF
 4712       XMM0_num, // Op_RegD
 4713       RAX_num   // Op_RegL
 4714     };
 4715     static const int hi[Op_RegL + 1] = {
 4716       0,
 4717       0,
 4718       OptoReg::Bad, // Op_RegN
 4719       OptoReg::Bad, // Op_RegI
 4720       RAX_H_num,    // Op_RegP
 4721       OptoReg::Bad, // Op_RegF
 4722       XMM0b_num,    // Op_RegD
 4723       RAX_H_num     // Op_RegL
 4724     };
 4725     // Excluded flags and vector registers.
 4726     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4727     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4728   %}
 4729 %}
 4730 
 4731 //----------ATTRIBUTES---------------------------------------------------------
 4732 //----------Operand Attributes-------------------------------------------------
 4733 op_attrib op_cost(0);        // Required cost attribute
 4734 
 4735 //----------Instruction Attributes---------------------------------------------
 4736 ins_attrib ins_cost(100);       // Required cost attribute
 4737 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4738 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4739                                 // a non-matching short branch variant
 4740                                 // of some long branch?
 4741 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4742                                 // be a power of 2) specifies the
 4743                                 // alignment that some part of the
 4744                                 // instruction (not necessarily the
 4745                                 // start) requires.  If > 1, a
 4746                                 // compute_padding() function must be
 4747                                 // provided for the instruction
 4748 
 4749 // Whether this node is expanded during code emission into a sequence of
 4750 // instructions and the first instruction can perform an implicit null check.
 4751 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4752 
 4753 //----------OPERANDS-----------------------------------------------------------
 4754 // Operand definitions must precede instruction definitions for correct parsing
 4755 // in the ADLC because operands constitute user defined types which are used in
 4756 // instruction definitions.
 4757 
 4758 //----------Simple Operands----------------------------------------------------
 4759 // Immediate Operands
 4760 // Integer Immediate
 4761 operand immI()
 4762 %{
 4763   match(ConI);
 4764 
 4765   op_cost(10);
 4766   format %{ %}
 4767   interface(CONST_INTER);
 4768 %}
 4769 
 4770 // Constant for test vs zero
 4771 operand immI_0()
 4772 %{
 4773   predicate(n->get_int() == 0);
 4774   match(ConI);
 4775 
 4776   op_cost(0);
 4777   format %{ %}
 4778   interface(CONST_INTER);
 4779 %}
 4780 
 4781 // Constant for increment
 4782 operand immI_1()
 4783 %{
 4784   predicate(n->get_int() == 1);
 4785   match(ConI);
 4786 
 4787   op_cost(0);
 4788   format %{ %}
 4789   interface(CONST_INTER);
 4790 %}
 4791 
 4792 // Constant for decrement
 4793 operand immI_M1()
 4794 %{
 4795   predicate(n->get_int() == -1);
 4796   match(ConI);
 4797 
 4798   op_cost(0);
 4799   format %{ %}
 4800   interface(CONST_INTER);
 4801 %}
 4802 
 4803 operand immI_2()
 4804 %{
 4805   predicate(n->get_int() == 2);
 4806   match(ConI);
 4807 
 4808   op_cost(0);
 4809   format %{ %}
 4810   interface(CONST_INTER);
 4811 %}
 4812 
 4813 operand immI_4()
 4814 %{
 4815   predicate(n->get_int() == 4);
 4816   match(ConI);
 4817 
 4818   op_cost(0);
 4819   format %{ %}
 4820   interface(CONST_INTER);
 4821 %}
 4822 
 4823 operand immI_8()
 4824 %{
 4825   predicate(n->get_int() == 8);
 4826   match(ConI);
 4827 
 4828   op_cost(0);
 4829   format %{ %}
 4830   interface(CONST_INTER);
 4831 %}
 4832 
 4833 // Valid scale values for addressing modes
 4834 operand immI2()
 4835 %{
 4836   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4837   match(ConI);
 4838 
 4839   format %{ %}
 4840   interface(CONST_INTER);
 4841 %}
 4842 
 4843 operand immU7()
 4844 %{
 4845   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4846   match(ConI);
 4847 
 4848   op_cost(5);
 4849   format %{ %}
 4850   interface(CONST_INTER);
 4851 %}
 4852 
 4853 operand immI8()
 4854 %{
 4855   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4856   match(ConI);
 4857 
 4858   op_cost(5);
 4859   format %{ %}
 4860   interface(CONST_INTER);
 4861 %}
 4862 
 4863 operand immU8()
 4864 %{
 4865   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4866   match(ConI);
 4867 
 4868   op_cost(5);
 4869   format %{ %}
 4870   interface(CONST_INTER);
 4871 %}
 4872 
 4873 operand immI16()
 4874 %{
 4875   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4876   match(ConI);
 4877 
 4878   op_cost(10);
 4879   format %{ %}
 4880   interface(CONST_INTER);
 4881 %}
 4882 
 4883 // Int Immediate non-negative
 4884 operand immU31()
 4885 %{
 4886   predicate(n->get_int() >= 0);
 4887   match(ConI);
 4888 
 4889   op_cost(0);
 4890   format %{ %}
 4891   interface(CONST_INTER);
 4892 %}
 4893 
 4894 // Pointer Immediate
 4895 operand immP()
 4896 %{
 4897   match(ConP);
 4898 
 4899   op_cost(10);
 4900   format %{ %}
 4901   interface(CONST_INTER);
 4902 %}
 4903 
 4904 // Null Pointer Immediate
 4905 operand immP0()
 4906 %{
 4907   predicate(n->get_ptr() == 0);
 4908   match(ConP);
 4909 
 4910   op_cost(5);
 4911   format %{ %}
 4912   interface(CONST_INTER);
 4913 %}
 4914 
 4915 // Pointer Immediate
 4916 operand immN() %{
 4917   match(ConN);
 4918 
 4919   op_cost(10);
 4920   format %{ %}
 4921   interface(CONST_INTER);
 4922 %}
 4923 
 4924 operand immNKlass() %{
 4925   match(ConNKlass);
 4926 
 4927   op_cost(10);
 4928   format %{ %}
 4929   interface(CONST_INTER);
 4930 %}
 4931 
 4932 // Null Pointer Immediate
 4933 operand immN0() %{
 4934   predicate(n->get_narrowcon() == 0);
 4935   match(ConN);
 4936 
 4937   op_cost(5);
 4938   format %{ %}
 4939   interface(CONST_INTER);
 4940 %}
 4941 
 4942 operand immP31()
 4943 %{
 4944   predicate(n->as_Type()->type()->is_ptr()->reloc() == relocInfo::none
 4945             && (n->get_ptr() >> 31) == 0);
 4946   match(ConP);
 4947 
 4948   op_cost(5);
 4949   format %{ %}
 4950   interface(CONST_INTER);
 4951 %}
 4952 
 4953 
 4954 // Long Immediate
 4955 operand immL()
 4956 %{
 4957   match(ConL);
 4958 
 4959   op_cost(20);
 4960   format %{ %}
 4961   interface(CONST_INTER);
 4962 %}
 4963 
 4964 // Long Immediate 8-bit
 4965 operand immL8()
 4966 %{
 4967   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 4968   match(ConL);
 4969 
 4970   op_cost(5);
 4971   format %{ %}
 4972   interface(CONST_INTER);
 4973 %}
 4974 
 4975 // Long Immediate 32-bit unsigned
 4976 operand immUL32()
 4977 %{
 4978   predicate(n->get_long() == (unsigned int) (n->get_long()));
 4979   match(ConL);
 4980 
 4981   op_cost(10);
 4982   format %{ %}
 4983   interface(CONST_INTER);
 4984 %}
 4985 
 4986 // Long Immediate 32-bit signed
 4987 operand immL32()
 4988 %{
 4989   predicate(n->get_long() == (int) (n->get_long()));
 4990   match(ConL);
 4991 
 4992   op_cost(15);
 4993   format %{ %}
 4994   interface(CONST_INTER);
 4995 %}
 4996 
 4997 operand immL_Pow2()
 4998 %{
 4999   predicate(is_power_of_2((julong)n->get_long()));
 5000   match(ConL);
 5001 
 5002   op_cost(15);
 5003   format %{ %}
 5004   interface(CONST_INTER);
 5005 %}
 5006 
 5007 operand immL_NotPow2()
 5008 %{
 5009   predicate(is_power_of_2((julong)~n->get_long()));
 5010   match(ConL);
 5011 
 5012   op_cost(15);
 5013   format %{ %}
 5014   interface(CONST_INTER);
 5015 %}
 5016 
 5017 // Long Immediate zero
 5018 operand immL0()
 5019 %{
 5020   predicate(n->get_long() == 0L);
 5021   match(ConL);
 5022 
 5023   op_cost(10);
 5024   format %{ %}
 5025   interface(CONST_INTER);
 5026 %}
 5027 
 5028 // Constant for increment
 5029 operand immL1()
 5030 %{
 5031   predicate(n->get_long() == 1);
 5032   match(ConL);
 5033 
 5034   format %{ %}
 5035   interface(CONST_INTER);
 5036 %}
 5037 
 5038 // Constant for decrement
 5039 operand immL_M1()
 5040 %{
 5041   predicate(n->get_long() == -1);
 5042   match(ConL);
 5043 
 5044   format %{ %}
 5045   interface(CONST_INTER);
 5046 %}
 5047 
 5048 // Long Immediate: low 32-bit mask
 5049 operand immL_32bits()
 5050 %{
 5051   predicate(n->get_long() == 0xFFFFFFFFL);
 5052   match(ConL);
 5053   op_cost(20);
 5054 
 5055   format %{ %}
 5056   interface(CONST_INTER);
 5057 %}
 5058 
 5059 // Int Immediate: 2^n-1, positive
 5060 operand immI_Pow2M1()
 5061 %{
 5062   predicate((n->get_int() > 0)
 5063             && is_power_of_2((juint)n->get_int() + 1));
 5064   match(ConI);
 5065 
 5066   op_cost(20);
 5067   format %{ %}
 5068   interface(CONST_INTER);
 5069 %}
 5070 
 5071 // Float Immediate zero
 5072 operand immF0()
 5073 %{
 5074   predicate(jint_cast(n->getf()) == 0);
 5075   match(ConF);
 5076 
 5077   op_cost(5);
 5078   format %{ %}
 5079   interface(CONST_INTER);
 5080 %}
 5081 
 5082 // Float Immediate
 5083 operand immF()
 5084 %{
 5085   match(ConF);
 5086 
 5087   op_cost(15);
 5088   format %{ %}
 5089   interface(CONST_INTER);
 5090 %}
 5091 
 5092 // Half Float Immediate
 5093 operand immH()
 5094 %{
 5095   match(ConH);
 5096 
 5097   op_cost(15);
 5098   format %{ %}
 5099   interface(CONST_INTER);
 5100 %}
 5101 
 5102 // Double Immediate zero
 5103 operand immD0()
 5104 %{
 5105   predicate(jlong_cast(n->getd()) == 0);
 5106   match(ConD);
 5107 
 5108   op_cost(5);
 5109   format %{ %}
 5110   interface(CONST_INTER);
 5111 %}
 5112 
 5113 // Double Immediate
 5114 operand immD()
 5115 %{
 5116   match(ConD);
 5117 
 5118   op_cost(15);
 5119   format %{ %}
 5120   interface(CONST_INTER);
 5121 %}
 5122 
 5123 // Immediates for special shifts (sign extend)
 5124 
 5125 // Constants for increment
 5126 operand immI_16()
 5127 %{
 5128   predicate(n->get_int() == 16);
 5129   match(ConI);
 5130 
 5131   format %{ %}
 5132   interface(CONST_INTER);
 5133 %}
 5134 
 5135 operand immI_24()
 5136 %{
 5137   predicate(n->get_int() == 24);
 5138   match(ConI);
 5139 
 5140   format %{ %}
 5141   interface(CONST_INTER);
 5142 %}
 5143 
 5144 // Constant for byte-wide masking
 5145 operand immI_255()
 5146 %{
 5147   predicate(n->get_int() == 255);
 5148   match(ConI);
 5149 
 5150   format %{ %}
 5151   interface(CONST_INTER);
 5152 %}
 5153 
 5154 // Constant for short-wide masking
 5155 operand immI_65535()
 5156 %{
 5157   predicate(n->get_int() == 65535);
 5158   match(ConI);
 5159 
 5160   format %{ %}
 5161   interface(CONST_INTER);
 5162 %}
 5163 
 5164 // Constant for byte-wide masking
 5165 operand immL_255()
 5166 %{
 5167   predicate(n->get_long() == 255);
 5168   match(ConL);
 5169 
 5170   format %{ %}
 5171   interface(CONST_INTER);
 5172 %}
 5173 
 5174 // Constant for short-wide masking
 5175 operand immL_65535()
 5176 %{
 5177   predicate(n->get_long() == 65535);
 5178   match(ConL);
 5179 
 5180   format %{ %}
 5181   interface(CONST_INTER);
 5182 %}
 5183 
 5184 // AOT Runtime Constants Address
 5185 operand immAOTRuntimeConstantsAddress()
 5186 %{
 5187   // Check if the address is in the range of AOT Runtime Constants
 5188   predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
 5189   match(ConP);
 5190 
 5191   op_cost(0);
 5192   format %{ %}
 5193   interface(CONST_INTER);
 5194 %}
 5195 
 5196 operand kReg()
 5197 %{
 5198   constraint(ALLOC_IN_RC(vectmask_reg));
 5199   match(RegVectMask);
 5200   format %{%}
 5201   interface(REG_INTER);
 5202 %}
 5203 
 5204 // Register Operands
 5205 // Integer Register
 5206 operand rRegI()
 5207 %{
 5208   constraint(ALLOC_IN_RC(int_reg));
 5209   match(RegI);
 5210 
 5211   match(rax_RegI);
 5212   match(rbx_RegI);
 5213   match(rcx_RegI);
 5214   match(rdx_RegI);
 5215   match(rdi_RegI);
 5216 
 5217   format %{ %}
 5218   interface(REG_INTER);
 5219 %}
 5220 
 5221 // Special Registers
 5222 operand rax_RegI()
 5223 %{
 5224   constraint(ALLOC_IN_RC(int_rax_reg));
 5225   match(RegI);
 5226   match(rRegI);
 5227 
 5228   format %{ "RAX" %}
 5229   interface(REG_INTER);
 5230 %}
 5231 
 5232 // Special Registers
 5233 operand rbx_RegI()
 5234 %{
 5235   constraint(ALLOC_IN_RC(int_rbx_reg));
 5236   match(RegI);
 5237   match(rRegI);
 5238 
 5239   format %{ "RBX" %}
 5240   interface(REG_INTER);
 5241 %}
 5242 
 5243 operand rcx_RegI()
 5244 %{
 5245   constraint(ALLOC_IN_RC(int_rcx_reg));
 5246   match(RegI);
 5247   match(rRegI);
 5248 
 5249   format %{ "RCX" %}
 5250   interface(REG_INTER);
 5251 %}
 5252 
 5253 operand rdx_RegI()
 5254 %{
 5255   constraint(ALLOC_IN_RC(int_rdx_reg));
 5256   match(RegI);
 5257   match(rRegI);
 5258 
 5259   format %{ "RDX" %}
 5260   interface(REG_INTER);
 5261 %}
 5262 
 5263 operand rdi_RegI()
 5264 %{
 5265   constraint(ALLOC_IN_RC(int_rdi_reg));
 5266   match(RegI);
 5267   match(rRegI);
 5268 
 5269   format %{ "RDI" %}
 5270   interface(REG_INTER);
 5271 %}
 5272 
 5273 operand no_rax_rdx_RegI()
 5274 %{
 5275   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5276   match(RegI);
 5277   match(rbx_RegI);
 5278   match(rcx_RegI);
 5279   match(rdi_RegI);
 5280 
 5281   format %{ %}
 5282   interface(REG_INTER);
 5283 %}
 5284 
 5285 operand no_rbp_r13_RegI()
 5286 %{
 5287   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5288   match(RegI);
 5289   match(rRegI);
 5290   match(rax_RegI);
 5291   match(rbx_RegI);
 5292   match(rcx_RegI);
 5293   match(rdx_RegI);
 5294   match(rdi_RegI);
 5295 
 5296   format %{ %}
 5297   interface(REG_INTER);
 5298 %}
 5299 
 5300 // Pointer Register
 5301 operand any_RegP()
 5302 %{
 5303   constraint(ALLOC_IN_RC(any_reg));
 5304   match(RegP);
 5305   match(rax_RegP);
 5306   match(rbx_RegP);
 5307   match(rdi_RegP);
 5308   match(rsi_RegP);
 5309   match(rbp_RegP);
 5310   match(r15_RegP);
 5311   match(rRegP);
 5312 
 5313   format %{ %}
 5314   interface(REG_INTER);
 5315 %}
 5316 
 5317 operand rRegP()
 5318 %{
 5319   constraint(ALLOC_IN_RC(ptr_reg));
 5320   match(RegP);
 5321   match(rax_RegP);
 5322   match(rbx_RegP);
 5323   match(rdi_RegP);
 5324   match(rsi_RegP);
 5325   match(rbp_RegP);  // See Q&A below about
 5326   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5327 
 5328   format %{ %}
 5329   interface(REG_INTER);
 5330 %}
 5331 
 5332 operand rRegN() %{
 5333   constraint(ALLOC_IN_RC(int_reg));
 5334   match(RegN);
 5335 
 5336   format %{ %}
 5337   interface(REG_INTER);
 5338 %}
 5339 
 5340 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5341 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5342 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5343 // The output of an instruction is controlled by the allocator, which respects
 5344 // register class masks, not match rules.  Unless an instruction mentions
 5345 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5346 // by the allocator as an input.
 5347 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5348 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5349 // result, RBP is not included in the output of the instruction either.
 5350 
 5351 // This operand is not allowed to use RBP even if
 5352 // RBP is not used to hold the frame pointer.
 5353 operand no_rbp_RegP()
 5354 %{
 5355   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5356   match(RegP);
 5357   match(rbx_RegP);
 5358   match(rsi_RegP);
 5359   match(rdi_RegP);
 5360 
 5361   format %{ %}
 5362   interface(REG_INTER);
 5363 %}
 5364 
 5365 // Special Registers
 5366 // Return a pointer value
 5367 operand rax_RegP()
 5368 %{
 5369   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5370   match(RegP);
 5371   match(rRegP);
 5372 
 5373   format %{ %}
 5374   interface(REG_INTER);
 5375 %}
 5376 
 5377 // Special Registers
 5378 // Return a compressed pointer value
 5379 operand rax_RegN()
 5380 %{
 5381   constraint(ALLOC_IN_RC(int_rax_reg));
 5382   match(RegN);
 5383   match(rRegN);
 5384 
 5385   format %{ %}
 5386   interface(REG_INTER);
 5387 %}
 5388 
 5389 // Used in AtomicAdd
 5390 operand rbx_RegP()
 5391 %{
 5392   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5393   match(RegP);
 5394   match(rRegP);
 5395 
 5396   format %{ %}
 5397   interface(REG_INTER);
 5398 %}
 5399 
 5400 operand rsi_RegP()
 5401 %{
 5402   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5403   match(RegP);
 5404   match(rRegP);
 5405 
 5406   format %{ %}
 5407   interface(REG_INTER);
 5408 %}
 5409 
 5410 operand rbp_RegP()
 5411 %{
 5412   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5413   match(RegP);
 5414   match(rRegP);
 5415 
 5416   format %{ %}
 5417   interface(REG_INTER);
 5418 %}
 5419 
 5420 // Used in rep stosq
 5421 operand rdi_RegP()
 5422 %{
 5423   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5424   match(RegP);
 5425   match(rRegP);
 5426 
 5427   format %{ %}
 5428   interface(REG_INTER);
 5429 %}
 5430 
 5431 operand r15_RegP()
 5432 %{
 5433   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5434   match(RegP);
 5435   match(rRegP);
 5436 
 5437   format %{ %}
 5438   interface(REG_INTER);
 5439 %}
 5440 
 5441 operand rRegL()
 5442 %{
 5443   constraint(ALLOC_IN_RC(long_reg));
 5444   match(RegL);
 5445   match(rax_RegL);
 5446   match(rdx_RegL);
 5447 
 5448   format %{ %}
 5449   interface(REG_INTER);
 5450 %}
 5451 
 5452 // Special Registers
 5453 operand no_rax_rdx_RegL()
 5454 %{
 5455   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5456   match(RegL);
 5457   match(rRegL);
 5458 
 5459   format %{ %}
 5460   interface(REG_INTER);
 5461 %}
 5462 
 5463 operand rax_RegL()
 5464 %{
 5465   constraint(ALLOC_IN_RC(long_rax_reg));
 5466   match(RegL);
 5467   match(rRegL);
 5468 
 5469   format %{ "RAX" %}
 5470   interface(REG_INTER);
 5471 %}
 5472 
 5473 operand rcx_RegL()
 5474 %{
 5475   constraint(ALLOC_IN_RC(long_rcx_reg));
 5476   match(RegL);
 5477   match(rRegL);
 5478 
 5479   format %{ %}
 5480   interface(REG_INTER);
 5481 %}
 5482 
 5483 operand rdx_RegL()
 5484 %{
 5485   constraint(ALLOC_IN_RC(long_rdx_reg));
 5486   match(RegL);
 5487   match(rRegL);
 5488 
 5489   format %{ %}
 5490   interface(REG_INTER);
 5491 %}
 5492 
 5493 operand r11_RegL()
 5494 %{
 5495   constraint(ALLOC_IN_RC(long_r11_reg));
 5496   match(RegL);
 5497   match(rRegL);
 5498 
 5499   format %{ %}
 5500   interface(REG_INTER);
 5501 %}
 5502 
 5503 operand no_rbp_r13_RegL()
 5504 %{
 5505   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5506   match(RegL);
 5507   match(rRegL);
 5508   match(rax_RegL);
 5509   match(rcx_RegL);
 5510   match(rdx_RegL);
 5511 
 5512   format %{ %}
 5513   interface(REG_INTER);
 5514 %}
 5515 
 5516 // Flags register, used as output of compare instructions
 5517 operand rFlagsReg()
 5518 %{
 5519   constraint(ALLOC_IN_RC(int_flags));
 5520   match(RegFlags);
 5521 
 5522   format %{ "RFLAGS" %}
 5523   interface(REG_INTER);
 5524 %}
 5525 
 5526 // Flags register, used as output of FLOATING POINT compare instructions
 5527 operand rFlagsRegU()
 5528 %{
 5529   constraint(ALLOC_IN_RC(int_flags));
 5530   match(RegFlags);
 5531 
 5532   format %{ "RFLAGS_U" %}
 5533   interface(REG_INTER);
 5534 %}
 5535 
 5536 operand rFlagsRegUCF() %{
 5537   constraint(ALLOC_IN_RC(int_flags));
 5538   match(RegFlags);
 5539   predicate(!UseAPX || !VM_Version::supports_avx10_2());
 5540 
 5541   format %{ "RFLAGS_U_CF" %}
 5542   interface(REG_INTER);
 5543 %}
 5544 
 5545 operand rFlagsRegUCFE() %{
 5546   constraint(ALLOC_IN_RC(int_flags));
 5547   match(RegFlags);
 5548   predicate(UseAPX && VM_Version::supports_avx10_2());
 5549 
 5550   format %{ "RFLAGS_U_CFE" %}
 5551   interface(REG_INTER);
 5552 %}
 5553 
 5554 // Float register operands
 5555 operand regF() %{
 5556    constraint(ALLOC_IN_RC(float_reg));
 5557    match(RegF);
 5558 
 5559    format %{ %}
 5560    interface(REG_INTER);
 5561 %}
 5562 
 5563 // Float register operands
 5564 operand legRegF() %{
 5565    constraint(ALLOC_IN_RC(float_reg_legacy));
 5566    match(RegF);
 5567 
 5568    format %{ %}
 5569    interface(REG_INTER);
 5570 %}
 5571 
 5572 // Float register operands
 5573 operand vlRegF() %{
 5574    constraint(ALLOC_IN_RC(float_reg_vl));
 5575    match(RegF);
 5576 
 5577    format %{ %}
 5578    interface(REG_INTER);
 5579 %}
 5580 
 5581 // Double register operands
 5582 operand regD() %{
 5583    constraint(ALLOC_IN_RC(double_reg));
 5584    match(RegD);
 5585 
 5586    format %{ %}
 5587    interface(REG_INTER);
 5588 %}
 5589 
 5590 // Double register operands
 5591 operand legRegD() %{
 5592    constraint(ALLOC_IN_RC(double_reg_legacy));
 5593    match(RegD);
 5594 
 5595    format %{ %}
 5596    interface(REG_INTER);
 5597 %}
 5598 
 5599 // Double register operands
 5600 operand vlRegD() %{
 5601    constraint(ALLOC_IN_RC(double_reg_vl));
 5602    match(RegD);
 5603 
 5604    format %{ %}
 5605    interface(REG_INTER);
 5606 %}
 5607 
 5608 //----------Memory Operands----------------------------------------------------
 5609 // Direct Memory Operand
 5610 // operand direct(immP addr)
 5611 // %{
 5612 //   match(addr);
 5613 
 5614 //   format %{ "[$addr]" %}
 5615 //   interface(MEMORY_INTER) %{
 5616 //     base(0xFFFFFFFF);
 5617 //     index(0x4);
 5618 //     scale(0x0);
 5619 //     disp($addr);
 5620 //   %}
 5621 // %}
 5622 
 5623 // Indirect Memory Operand
 5624 operand indirect(any_RegP reg)
 5625 %{
 5626   constraint(ALLOC_IN_RC(ptr_reg));
 5627   match(reg);
 5628 
 5629   format %{ "[$reg]" %}
 5630   interface(MEMORY_INTER) %{
 5631     base($reg);
 5632     index(0x4);
 5633     scale(0x0);
 5634     disp(0x0);
 5635   %}
 5636 %}
 5637 
 5638 // Indirect Memory Plus Short Offset Operand
 5639 operand indOffset8(any_RegP reg, immL8 off)
 5640 %{
 5641   constraint(ALLOC_IN_RC(ptr_reg));
 5642   match(AddP reg off);
 5643 
 5644   format %{ "[$reg + $off (8-bit)]" %}
 5645   interface(MEMORY_INTER) %{
 5646     base($reg);
 5647     index(0x4);
 5648     scale(0x0);
 5649     disp($off);
 5650   %}
 5651 %}
 5652 
 5653 // Indirect Memory Plus Long Offset Operand
 5654 operand indOffset32(any_RegP reg, immL32 off)
 5655 %{
 5656   constraint(ALLOC_IN_RC(ptr_reg));
 5657   match(AddP reg off);
 5658 
 5659   format %{ "[$reg + $off (32-bit)]" %}
 5660   interface(MEMORY_INTER) %{
 5661     base($reg);
 5662     index(0x4);
 5663     scale(0x0);
 5664     disp($off);
 5665   %}
 5666 %}
 5667 
 5668 // Indirect Memory Plus Index Register Plus Offset Operand
 5669 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5670 %{
 5671   constraint(ALLOC_IN_RC(ptr_reg));
 5672   match(AddP (AddP reg lreg) off);
 5673 
 5674   op_cost(10);
 5675   format %{"[$reg + $off + $lreg]" %}
 5676   interface(MEMORY_INTER) %{
 5677     base($reg);
 5678     index($lreg);
 5679     scale(0x0);
 5680     disp($off);
 5681   %}
 5682 %}
 5683 
 5684 // Indirect Memory Plus Index Register Plus Offset Operand
 5685 operand indIndex(any_RegP reg, rRegL lreg)
 5686 %{
 5687   constraint(ALLOC_IN_RC(ptr_reg));
 5688   match(AddP reg lreg);
 5689 
 5690   op_cost(10);
 5691   format %{"[$reg + $lreg]" %}
 5692   interface(MEMORY_INTER) %{
 5693     base($reg);
 5694     index($lreg);
 5695     scale(0x0);
 5696     disp(0x0);
 5697   %}
 5698 %}
 5699 
 5700 // Indirect Memory Times Scale Plus Index Register
 5701 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5702 %{
 5703   constraint(ALLOC_IN_RC(ptr_reg));
 5704   match(AddP reg (LShiftL lreg scale));
 5705 
 5706   op_cost(10);
 5707   format %{"[$reg + $lreg << $scale]" %}
 5708   interface(MEMORY_INTER) %{
 5709     base($reg);
 5710     index($lreg);
 5711     scale($scale);
 5712     disp(0x0);
 5713   %}
 5714 %}
 5715 
 5716 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5717 %{
 5718   constraint(ALLOC_IN_RC(ptr_reg));
 5719   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5720   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5721 
 5722   op_cost(10);
 5723   format %{"[$reg + pos $idx << $scale]" %}
 5724   interface(MEMORY_INTER) %{
 5725     base($reg);
 5726     index($idx);
 5727     scale($scale);
 5728     disp(0x0);
 5729   %}
 5730 %}
 5731 
 5732 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5733 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5734 %{
 5735   constraint(ALLOC_IN_RC(ptr_reg));
 5736   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5737 
 5738   op_cost(10);
 5739   format %{"[$reg + $off + $lreg << $scale]" %}
 5740   interface(MEMORY_INTER) %{
 5741     base($reg);
 5742     index($lreg);
 5743     scale($scale);
 5744     disp($off);
 5745   %}
 5746 %}
 5747 
 5748 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5749 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5750 %{
 5751   constraint(ALLOC_IN_RC(ptr_reg));
 5752   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5753   match(AddP (AddP reg (ConvI2L idx)) off);
 5754 
 5755   op_cost(10);
 5756   format %{"[$reg + $off + $idx]" %}
 5757   interface(MEMORY_INTER) %{
 5758     base($reg);
 5759     index($idx);
 5760     scale(0x0);
 5761     disp($off);
 5762   %}
 5763 %}
 5764 
 5765 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5766 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5767 %{
 5768   constraint(ALLOC_IN_RC(ptr_reg));
 5769   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5770   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5771 
 5772   op_cost(10);
 5773   format %{"[$reg + $off + $idx << $scale]" %}
 5774   interface(MEMORY_INTER) %{
 5775     base($reg);
 5776     index($idx);
 5777     scale($scale);
 5778     disp($off);
 5779   %}
 5780 %}
 5781 
 5782 // Indirect Narrow Oop Plus Offset Operand
 5783 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5784 // we can't free r12 even with CompressedOops::base() == nullptr.
 5785 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5786   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5787   constraint(ALLOC_IN_RC(ptr_reg));
 5788   match(AddP (DecodeN reg) off);
 5789 
 5790   op_cost(10);
 5791   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5792   interface(MEMORY_INTER) %{
 5793     base(0xc); // R12
 5794     index($reg);
 5795     scale(0x3);
 5796     disp($off);
 5797   %}
 5798 %}
 5799 
 5800 // Indirect Memory Operand
 5801 operand indirectNarrow(rRegN reg)
 5802 %{
 5803   predicate(CompressedOops::shift() == 0);
 5804   constraint(ALLOC_IN_RC(ptr_reg));
 5805   match(DecodeN reg);
 5806 
 5807   format %{ "[$reg]" %}
 5808   interface(MEMORY_INTER) %{
 5809     base($reg);
 5810     index(0x4);
 5811     scale(0x0);
 5812     disp(0x0);
 5813   %}
 5814 %}
 5815 
 5816 // Indirect Memory Plus Short Offset Operand
 5817 operand indOffset8Narrow(rRegN reg, immL8 off)
 5818 %{
 5819   predicate(CompressedOops::shift() == 0);
 5820   constraint(ALLOC_IN_RC(ptr_reg));
 5821   match(AddP (DecodeN reg) off);
 5822 
 5823   format %{ "[$reg + $off (8-bit)]" %}
 5824   interface(MEMORY_INTER) %{
 5825     base($reg);
 5826     index(0x4);
 5827     scale(0x0);
 5828     disp($off);
 5829   %}
 5830 %}
 5831 
 5832 // Indirect Memory Plus Long Offset Operand
 5833 operand indOffset32Narrow(rRegN reg, immL32 off)
 5834 %{
 5835   predicate(CompressedOops::shift() == 0);
 5836   constraint(ALLOC_IN_RC(ptr_reg));
 5837   match(AddP (DecodeN reg) off);
 5838 
 5839   format %{ "[$reg + $off (32-bit)]" %}
 5840   interface(MEMORY_INTER) %{
 5841     base($reg);
 5842     index(0x4);
 5843     scale(0x0);
 5844     disp($off);
 5845   %}
 5846 %}
 5847 
 5848 // Indirect Memory Plus Index Register Plus Offset Operand
 5849 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5850 %{
 5851   predicate(CompressedOops::shift() == 0);
 5852   constraint(ALLOC_IN_RC(ptr_reg));
 5853   match(AddP (AddP (DecodeN reg) lreg) off);
 5854 
 5855   op_cost(10);
 5856   format %{"[$reg + $off + $lreg]" %}
 5857   interface(MEMORY_INTER) %{
 5858     base($reg);
 5859     index($lreg);
 5860     scale(0x0);
 5861     disp($off);
 5862   %}
 5863 %}
 5864 
 5865 // Indirect Memory Plus Index Register Plus Offset Operand
 5866 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5867 %{
 5868   predicate(CompressedOops::shift() == 0);
 5869   constraint(ALLOC_IN_RC(ptr_reg));
 5870   match(AddP (DecodeN reg) lreg);
 5871 
 5872   op_cost(10);
 5873   format %{"[$reg + $lreg]" %}
 5874   interface(MEMORY_INTER) %{
 5875     base($reg);
 5876     index($lreg);
 5877     scale(0x0);
 5878     disp(0x0);
 5879   %}
 5880 %}
 5881 
 5882 // Indirect Memory Times Scale Plus Index Register
 5883 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5884 %{
 5885   predicate(CompressedOops::shift() == 0);
 5886   constraint(ALLOC_IN_RC(ptr_reg));
 5887   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5888 
 5889   op_cost(10);
 5890   format %{"[$reg + $lreg << $scale]" %}
 5891   interface(MEMORY_INTER) %{
 5892     base($reg);
 5893     index($lreg);
 5894     scale($scale);
 5895     disp(0x0);
 5896   %}
 5897 %}
 5898 
 5899 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5900 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5901 %{
 5902   predicate(CompressedOops::shift() == 0);
 5903   constraint(ALLOC_IN_RC(ptr_reg));
 5904   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5905 
 5906   op_cost(10);
 5907   format %{"[$reg + $off + $lreg << $scale]" %}
 5908   interface(MEMORY_INTER) %{
 5909     base($reg);
 5910     index($lreg);
 5911     scale($scale);
 5912     disp($off);
 5913   %}
 5914 %}
 5915 
 5916 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5917 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5918 %{
 5919   constraint(ALLOC_IN_RC(ptr_reg));
 5920   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5921   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5922 
 5923   op_cost(10);
 5924   format %{"[$reg + $off + $idx]" %}
 5925   interface(MEMORY_INTER) %{
 5926     base($reg);
 5927     index($idx);
 5928     scale(0x0);
 5929     disp($off);
 5930   %}
 5931 %}
 5932 
 5933 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5934 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 5935 %{
 5936   constraint(ALLOC_IN_RC(ptr_reg));
 5937   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5938   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 5939 
 5940   op_cost(10);
 5941   format %{"[$reg + $off + $idx << $scale]" %}
 5942   interface(MEMORY_INTER) %{
 5943     base($reg);
 5944     index($idx);
 5945     scale($scale);
 5946     disp($off);
 5947   %}
 5948 %}
 5949 
 5950 //----------Special Memory Operands--------------------------------------------
 5951 // Stack Slot Operand - This operand is used for loading and storing temporary
 5952 //                      values on the stack where a match requires a value to
 5953 //                      flow through memory.
 5954 operand stackSlotP(sRegP reg)
 5955 %{
 5956   constraint(ALLOC_IN_RC(stack_slots));
 5957   // No match rule because this operand is only generated in matching
 5958 
 5959   format %{ "[$reg]" %}
 5960   interface(MEMORY_INTER) %{
 5961     base(0x4);   // RSP
 5962     index(0x4);  // No Index
 5963     scale(0x0);  // No Scale
 5964     disp($reg);  // Stack Offset
 5965   %}
 5966 %}
 5967 
 5968 operand stackSlotI(sRegI reg)
 5969 %{
 5970   constraint(ALLOC_IN_RC(stack_slots));
 5971   // No match rule because this operand is only generated in matching
 5972 
 5973   format %{ "[$reg]" %}
 5974   interface(MEMORY_INTER) %{
 5975     base(0x4);   // RSP
 5976     index(0x4);  // No Index
 5977     scale(0x0);  // No Scale
 5978     disp($reg);  // Stack Offset
 5979   %}
 5980 %}
 5981 
 5982 operand stackSlotF(sRegF reg)
 5983 %{
 5984   constraint(ALLOC_IN_RC(stack_slots));
 5985   // No match rule because this operand is only generated in matching
 5986 
 5987   format %{ "[$reg]" %}
 5988   interface(MEMORY_INTER) %{
 5989     base(0x4);   // RSP
 5990     index(0x4);  // No Index
 5991     scale(0x0);  // No Scale
 5992     disp($reg);  // Stack Offset
 5993   %}
 5994 %}
 5995 
 5996 operand stackSlotD(sRegD reg)
 5997 %{
 5998   constraint(ALLOC_IN_RC(stack_slots));
 5999   // No match rule because this operand is only generated in matching
 6000 
 6001   format %{ "[$reg]" %}
 6002   interface(MEMORY_INTER) %{
 6003     base(0x4);   // RSP
 6004     index(0x4);  // No Index
 6005     scale(0x0);  // No Scale
 6006     disp($reg);  // Stack Offset
 6007   %}
 6008 %}
 6009 operand stackSlotL(sRegL reg)
 6010 %{
 6011   constraint(ALLOC_IN_RC(stack_slots));
 6012   // No match rule because this operand is only generated in matching
 6013 
 6014   format %{ "[$reg]" %}
 6015   interface(MEMORY_INTER) %{
 6016     base(0x4);   // RSP
 6017     index(0x4);  // No Index
 6018     scale(0x0);  // No Scale
 6019     disp($reg);  // Stack Offset
 6020   %}
 6021 %}
 6022 
 6023 //----------Conditional Branch Operands----------------------------------------
 6024 // Comparison Op  - This is the operation of the comparison, and is limited to
 6025 //                  the following set of codes:
 6026 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 6027 //
 6028 // Other attributes of the comparison, such as unsignedness, are specified
 6029 // by the comparison instruction that sets a condition code flags register.
 6030 // That result is represented by a flags operand whose subtype is appropriate
 6031 // to the unsignedness (etc.) of the comparison.
 6032 //
 6033 // Later, the instruction which matches both the Comparison Op (a Bool) and
 6034 // the flags (produced by the Cmp) specifies the coding of the comparison op
 6035 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 6036 
 6037 // Comparison Code
 6038 operand cmpOp()
 6039 %{
 6040   match(Bool);
 6041 
 6042   format %{ "" %}
 6043   interface(COND_INTER) %{
 6044     equal(0x4, "e");
 6045     not_equal(0x5, "ne");
 6046     less(0xc, "l");
 6047     greater_equal(0xd, "ge");
 6048     less_equal(0xe, "le");
 6049     greater(0xf, "g");
 6050     overflow(0x0, "o");
 6051     no_overflow(0x1, "no");
 6052   %}
 6053 %}
 6054 
 6055 // Comparison Code, unsigned compare.  Used by FP also, with
 6056 // C2 (unordered) turned into GT or LT already.  The other bits
 6057 // C0 and C3 are turned into Carry & Zero flags.
 6058 operand cmpOpU()
 6059 %{
 6060   match(Bool);
 6061 
 6062   format %{ "" %}
 6063   interface(COND_INTER) %{
 6064     equal(0x4, "e");
 6065     not_equal(0x5, "ne");
 6066     less(0x2, "b");
 6067     greater_equal(0x3, "ae");
 6068     less_equal(0x6, "be");
 6069     greater(0x7, "a");
 6070     overflow(0x0, "o");
 6071     no_overflow(0x1, "no");
 6072   %}
 6073 %}
 6074 
 6075 
 6076 // Floating comparisons that don't require any fixup for the unordered case,
 6077 // If both inputs of the comparison are the same, ZF is always set so we
 6078 // don't need to use cmpOpUCF2 for eq/ne
 6079 operand cmpOpUCF() %{
 6080   match(Bool);
 6081   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6082             (n->as_Bool()->_test._test == BoolTest::lt ||
 6083              n->as_Bool()->_test._test == BoolTest::ge ||
 6084              n->as_Bool()->_test._test == BoolTest::le ||
 6085              n->as_Bool()->_test._test == BoolTest::gt ||
 6086              n->in(1)->in(1) == n->in(1)->in(2)));
 6087   format %{ "" %}
 6088   interface(COND_INTER) %{
 6089     equal(0xb, "np");
 6090     not_equal(0xa, "p");
 6091     less(0x2, "b");
 6092     greater_equal(0x3, "ae");
 6093     less_equal(0x6, "be");
 6094     greater(0x7, "a");
 6095     overflow(0x0, "o");
 6096     no_overflow(0x1, "no");
 6097   %}
 6098 %}
 6099 
 6100 
 6101 // Floating comparisons that can be fixed up with extra conditional jumps
 6102 operand cmpOpUCF2() %{
 6103   match(Bool);
 6104   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6105             (n->as_Bool()->_test._test == BoolTest::ne ||
 6106              n->as_Bool()->_test._test == BoolTest::eq) &&
 6107             n->in(1)->in(1) != n->in(1)->in(2));
 6108   format %{ "" %}
 6109   interface(COND_INTER) %{
 6110     equal(0x4, "e");
 6111     not_equal(0x5, "ne");
 6112     less(0x2, "b");
 6113     greater_equal(0x3, "ae");
 6114     less_equal(0x6, "be");
 6115     greater(0x7, "a");
 6116     overflow(0x0, "o");
 6117     no_overflow(0x1, "no");
 6118   %}
 6119 %}
 6120 
 6121 
 6122 // Floating point comparisons that set condition flags to test more directly,
 6123 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
 6124 // are used for L (<) and LE (<=) conditions. It's important to convert these
 6125 // latter conditions to ones that use unsigned tests before passing into an
 6126 // instruction because the preceding comparison might be based on a three way
 6127 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
 6128 operand cmpOpUCFE()
 6129 %{
 6130   match(Bool);
 6131   predicate((UseAPX && VM_Version::supports_avx10_2()) &&
 6132             (n->as_Bool()->_test._test == BoolTest::ne ||
 6133              n->as_Bool()->_test._test == BoolTest::eq ||
 6134              n->as_Bool()->_test._test == BoolTest::lt ||
 6135              n->as_Bool()->_test._test == BoolTest::ge ||
 6136              n->as_Bool()->_test._test == BoolTest::le ||
 6137              n->as_Bool()->_test._test == BoolTest::gt));
 6138 
 6139   format %{ "" %}
 6140   interface(COND_INTER) %{
 6141     equal(0x4, "e");
 6142     not_equal(0x5, "ne");
 6143     less(0x2, "b");
 6144     greater_equal(0x3, "ae");
 6145     less_equal(0x6, "be");
 6146     greater(0x7, "a");
 6147     overflow(0x0, "o");
 6148     no_overflow(0x1, "no");
 6149   %}
 6150 %}
 6151 
 6152 // Operands for bound floating pointer register arguments
 6153 operand rxmm0() %{
 6154   constraint(ALLOC_IN_RC(xmm0_reg));
 6155   match(VecX);
 6156   format%{%}
 6157   interface(REG_INTER);
 6158 %}
 6159 
 6160 // Vectors
 6161 
 6162 // Dummy generic vector class. Should be used for all vector operands.
 6163 // Replaced with vec[SDXYZ] during post-selection pass.
 6164 operand vec() %{
 6165   constraint(ALLOC_IN_RC(dynamic));
 6166   match(VecX);
 6167   match(VecY);
 6168   match(VecZ);
 6169   match(VecS);
 6170   match(VecD);
 6171 
 6172   format %{ %}
 6173   interface(REG_INTER);
 6174 %}
 6175 
 6176 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6177 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6178 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6179 // runtime code generation via reg_class_dynamic.
 6180 operand legVec() %{
 6181   constraint(ALLOC_IN_RC(dynamic));
 6182   match(VecX);
 6183   match(VecY);
 6184   match(VecZ);
 6185   match(VecS);
 6186   match(VecD);
 6187 
 6188   format %{ %}
 6189   interface(REG_INTER);
 6190 %}
 6191 
 6192 // Replaces vec during post-selection cleanup. See above.
 6193 operand vecS() %{
 6194   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6195   match(VecS);
 6196 
 6197   format %{ %}
 6198   interface(REG_INTER);
 6199 %}
 6200 
 6201 // Replaces legVec during post-selection cleanup. See above.
 6202 operand legVecS() %{
 6203   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6204   match(VecS);
 6205 
 6206   format %{ %}
 6207   interface(REG_INTER);
 6208 %}
 6209 
 6210 // Replaces vec during post-selection cleanup. See above.
 6211 operand vecD() %{
 6212   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6213   match(VecD);
 6214 
 6215   format %{ %}
 6216   interface(REG_INTER);
 6217 %}
 6218 
 6219 // Replaces legVec during post-selection cleanup. See above.
 6220 operand legVecD() %{
 6221   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6222   match(VecD);
 6223 
 6224   format %{ %}
 6225   interface(REG_INTER);
 6226 %}
 6227 
 6228 // Replaces vec during post-selection cleanup. See above.
 6229 operand vecX() %{
 6230   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6231   match(VecX);
 6232 
 6233   format %{ %}
 6234   interface(REG_INTER);
 6235 %}
 6236 
 6237 // Replaces legVec during post-selection cleanup. See above.
 6238 operand legVecX() %{
 6239   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6240   match(VecX);
 6241 
 6242   format %{ %}
 6243   interface(REG_INTER);
 6244 %}
 6245 
 6246 // Replaces vec during post-selection cleanup. See above.
 6247 operand vecY() %{
 6248   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6249   match(VecY);
 6250 
 6251   format %{ %}
 6252   interface(REG_INTER);
 6253 %}
 6254 
 6255 // Replaces legVec during post-selection cleanup. See above.
 6256 operand legVecY() %{
 6257   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6258   match(VecY);
 6259 
 6260   format %{ %}
 6261   interface(REG_INTER);
 6262 %}
 6263 
 6264 // Replaces vec during post-selection cleanup. See above.
 6265 operand vecZ() %{
 6266   constraint(ALLOC_IN_RC(vectorz_reg));
 6267   match(VecZ);
 6268 
 6269   format %{ %}
 6270   interface(REG_INTER);
 6271 %}
 6272 
 6273 // Replaces legVec during post-selection cleanup. See above.
 6274 operand legVecZ() %{
 6275   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6276   match(VecZ);
 6277 
 6278   format %{ %}
 6279   interface(REG_INTER);
 6280 %}
 6281 
 6282 //----------OPERAND CLASSES----------------------------------------------------
 6283 // Operand Classes are groups of operands that are used as to simplify
 6284 // instruction definitions by not requiring the AD writer to specify separate
 6285 // instructions for every form of operand when the instruction accepts
 6286 // multiple operand types with the same basic encoding and format.  The classic
 6287 // case of this is memory operands.
 6288 
 6289 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6290                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6291                indCompressedOopOffset,
 6292                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6293                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6294                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6295 
 6296 //----------PIPELINE-----------------------------------------------------------
 6297 // Rules which define the behavior of the target architectures pipeline.
 6298 pipeline %{
 6299 
 6300 //----------ATTRIBUTES---------------------------------------------------------
 6301 attributes %{
 6302   variable_size_instructions;        // Fixed size instructions
 6303   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6304   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6305   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6306   instruction_fetch_units = 1;       // of 16 bytes
 6307 %}
 6308 
 6309 //----------RESOURCES----------------------------------------------------------
 6310 // Resources are the functional units available to the machine
 6311 
 6312 // Generic P2/P3 pipeline
 6313 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6314 // 3 instructions decoded per cycle.
 6315 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6316 // 3 ALU op, only ALU0 handles mul instructions.
 6317 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6318            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6319            BR, FPU,
 6320            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6321 
 6322 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6323 // Pipeline Description specifies the stages in the machine's pipeline
 6324 
 6325 // Generic P2/P3 pipeline
 6326 pipe_desc(S0, S1, S2, S3, S4, S5);
 6327 
 6328 //----------PIPELINE CLASSES---------------------------------------------------
 6329 // Pipeline Classes describe the stages in which input and output are
 6330 // referenced by the hardware pipeline.
 6331 
 6332 // Naming convention: ialu or fpu
 6333 // Then: _reg
 6334 // Then: _reg if there is a 2nd register
 6335 // Then: _long if it's a pair of instructions implementing a long
 6336 // Then: _fat if it requires the big decoder
 6337 //   Or: _mem if it requires the big decoder and a memory unit.
 6338 
 6339 // Integer ALU reg operation
 6340 pipe_class ialu_reg(rRegI dst)
 6341 %{
 6342     single_instruction;
 6343     dst    : S4(write);
 6344     dst    : S3(read);
 6345     DECODE : S0;        // any decoder
 6346     ALU    : S3;        // any alu
 6347 %}
 6348 
 6349 // Long ALU reg operation
 6350 pipe_class ialu_reg_long(rRegL dst)
 6351 %{
 6352     instruction_count(2);
 6353     dst    : S4(write);
 6354     dst    : S3(read);
 6355     DECODE : S0(2);     // any 2 decoders
 6356     ALU    : S3(2);     // both alus
 6357 %}
 6358 
 6359 // Integer ALU reg operation using big decoder
 6360 pipe_class ialu_reg_fat(rRegI dst)
 6361 %{
 6362     single_instruction;
 6363     dst    : S4(write);
 6364     dst    : S3(read);
 6365     D0     : S0;        // big decoder only
 6366     ALU    : S3;        // any alu
 6367 %}
 6368 
 6369 // Integer ALU reg-reg operation
 6370 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6371 %{
 6372     single_instruction;
 6373     dst    : S4(write);
 6374     src    : S3(read);
 6375     DECODE : S0;        // any decoder
 6376     ALU    : S3;        // any alu
 6377 %}
 6378 
 6379 // Integer ALU reg-reg operation
 6380 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6381 %{
 6382     single_instruction;
 6383     dst    : S4(write);
 6384     src    : S3(read);
 6385     D0     : S0;        // big decoder only
 6386     ALU    : S3;        // any alu
 6387 %}
 6388 
 6389 // Integer ALU reg-mem operation
 6390 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6391 %{
 6392     single_instruction;
 6393     dst    : S5(write);
 6394     mem    : S3(read);
 6395     D0     : S0;        // big decoder only
 6396     ALU    : S4;        // any alu
 6397     MEM    : S3;        // any mem
 6398 %}
 6399 
 6400 // Integer mem operation (prefetch)
 6401 pipe_class ialu_mem(memory mem)
 6402 %{
 6403     single_instruction;
 6404     mem    : S3(read);
 6405     D0     : S0;        // big decoder only
 6406     MEM    : S3;        // any mem
 6407 %}
 6408 
 6409 // Integer Store to Memory
 6410 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6411 %{
 6412     single_instruction;
 6413     mem    : S3(read);
 6414     src    : S5(read);
 6415     D0     : S0;        // big decoder only
 6416     ALU    : S4;        // any alu
 6417     MEM    : S3;
 6418 %}
 6419 
 6420 // // Long Store to Memory
 6421 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6422 // %{
 6423 //     instruction_count(2);
 6424 //     mem    : S3(read);
 6425 //     src    : S5(read);
 6426 //     D0     : S0(2);          // big decoder only; twice
 6427 //     ALU    : S4(2);     // any 2 alus
 6428 //     MEM    : S3(2);  // Both mems
 6429 // %}
 6430 
 6431 // Integer Store to Memory
 6432 pipe_class ialu_mem_imm(memory mem)
 6433 %{
 6434     single_instruction;
 6435     mem    : S3(read);
 6436     D0     : S0;        // big decoder only
 6437     ALU    : S4;        // any alu
 6438     MEM    : S3;
 6439 %}
 6440 
 6441 // Integer ALU0 reg-reg operation
 6442 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6443 %{
 6444     single_instruction;
 6445     dst    : S4(write);
 6446     src    : S3(read);
 6447     D0     : S0;        // Big decoder only
 6448     ALU0   : S3;        // only alu0
 6449 %}
 6450 
 6451 // Integer ALU0 reg-mem operation
 6452 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6453 %{
 6454     single_instruction;
 6455     dst    : S5(write);
 6456     mem    : S3(read);
 6457     D0     : S0;        // big decoder only
 6458     ALU0   : S4;        // ALU0 only
 6459     MEM    : S3;        // any mem
 6460 %}
 6461 
 6462 // Integer ALU reg-reg operation
 6463 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6464 %{
 6465     single_instruction;
 6466     cr     : S4(write);
 6467     src1   : S3(read);
 6468     src2   : S3(read);
 6469     DECODE : S0;        // any decoder
 6470     ALU    : S3;        // any alu
 6471 %}
 6472 
 6473 // Integer ALU reg-imm operation
 6474 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6475 %{
 6476     single_instruction;
 6477     cr     : S4(write);
 6478     src1   : S3(read);
 6479     DECODE : S0;        // any decoder
 6480     ALU    : S3;        // any alu
 6481 %}
 6482 
 6483 // Integer ALU reg-mem operation
 6484 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6485 %{
 6486     single_instruction;
 6487     cr     : S4(write);
 6488     src1   : S3(read);
 6489     src2   : S3(read);
 6490     D0     : S0;        // big decoder only
 6491     ALU    : S4;        // any alu
 6492     MEM    : S3;
 6493 %}
 6494 
 6495 // Conditional move reg-reg
 6496 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6497 %{
 6498     instruction_count(4);
 6499     y      : S4(read);
 6500     q      : S3(read);
 6501     p      : S3(read);
 6502     DECODE : S0(4);     // any decoder
 6503 %}
 6504 
 6505 // Conditional move reg-reg
 6506 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6507 %{
 6508     single_instruction;
 6509     dst    : S4(write);
 6510     src    : S3(read);
 6511     cr     : S3(read);
 6512     DECODE : S0;        // any decoder
 6513 %}
 6514 
 6515 // Conditional move reg-mem
 6516 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6517 %{
 6518     single_instruction;
 6519     dst    : S4(write);
 6520     src    : S3(read);
 6521     cr     : S3(read);
 6522     DECODE : S0;        // any decoder
 6523     MEM    : S3;
 6524 %}
 6525 
 6526 // Conditional move reg-reg long
 6527 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6528 %{
 6529     single_instruction;
 6530     dst    : S4(write);
 6531     src    : S3(read);
 6532     cr     : S3(read);
 6533     DECODE : S0(2);     // any 2 decoders
 6534 %}
 6535 
 6536 // Float reg-reg operation
 6537 pipe_class fpu_reg(regD dst)
 6538 %{
 6539     instruction_count(2);
 6540     dst    : S3(read);
 6541     DECODE : S0(2);     // any 2 decoders
 6542     FPU    : S3;
 6543 %}
 6544 
 6545 // Float reg-reg operation
 6546 pipe_class fpu_reg_reg(regD dst, regD src)
 6547 %{
 6548     instruction_count(2);
 6549     dst    : S4(write);
 6550     src    : S3(read);
 6551     DECODE : S0(2);     // any 2 decoders
 6552     FPU    : S3;
 6553 %}
 6554 
 6555 // Float reg-reg operation
 6556 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6557 %{
 6558     instruction_count(3);
 6559     dst    : S4(write);
 6560     src1   : S3(read);
 6561     src2   : S3(read);
 6562     DECODE : S0(3);     // any 3 decoders
 6563     FPU    : S3(2);
 6564 %}
 6565 
 6566 // Float reg-reg operation
 6567 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6568 %{
 6569     instruction_count(4);
 6570     dst    : S4(write);
 6571     src1   : S3(read);
 6572     src2   : S3(read);
 6573     src3   : S3(read);
 6574     DECODE : S0(4);     // any 3 decoders
 6575     FPU    : S3(2);
 6576 %}
 6577 
 6578 // Float reg-reg operation
 6579 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6580 %{
 6581     instruction_count(4);
 6582     dst    : S4(write);
 6583     src1   : S3(read);
 6584     src2   : S3(read);
 6585     src3   : S3(read);
 6586     DECODE : S1(3);     // any 3 decoders
 6587     D0     : S0;        // Big decoder only
 6588     FPU    : S3(2);
 6589     MEM    : S3;
 6590 %}
 6591 
 6592 // Float reg-mem operation
 6593 pipe_class fpu_reg_mem(regD dst, memory mem)
 6594 %{
 6595     instruction_count(2);
 6596     dst    : S5(write);
 6597     mem    : S3(read);
 6598     D0     : S0;        // big decoder only
 6599     DECODE : S1;        // any decoder for FPU POP
 6600     FPU    : S4;
 6601     MEM    : S3;        // any mem
 6602 %}
 6603 
 6604 // Float reg-mem operation
 6605 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6606 %{
 6607     instruction_count(3);
 6608     dst    : S5(write);
 6609     src1   : S3(read);
 6610     mem    : S3(read);
 6611     D0     : S0;        // big decoder only
 6612     DECODE : S1(2);     // any decoder for FPU POP
 6613     FPU    : S4;
 6614     MEM    : S3;        // any mem
 6615 %}
 6616 
 6617 // Float mem-reg operation
 6618 pipe_class fpu_mem_reg(memory mem, regD src)
 6619 %{
 6620     instruction_count(2);
 6621     src    : S5(read);
 6622     mem    : S3(read);
 6623     DECODE : S0;        // any decoder for FPU PUSH
 6624     D0     : S1;        // big decoder only
 6625     FPU    : S4;
 6626     MEM    : S3;        // any mem
 6627 %}
 6628 
 6629 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6630 %{
 6631     instruction_count(3);
 6632     src1   : S3(read);
 6633     src2   : S3(read);
 6634     mem    : S3(read);
 6635     DECODE : S0(2);     // any decoder for FPU PUSH
 6636     D0     : S1;        // big decoder only
 6637     FPU    : S4;
 6638     MEM    : S3;        // any mem
 6639 %}
 6640 
 6641 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6642 %{
 6643     instruction_count(3);
 6644     src1   : S3(read);
 6645     src2   : S3(read);
 6646     mem    : S4(read);
 6647     DECODE : S0;        // any decoder for FPU PUSH
 6648     D0     : S0(2);     // big decoder only
 6649     FPU    : S4;
 6650     MEM    : S3(2);     // any mem
 6651 %}
 6652 
 6653 pipe_class fpu_mem_mem(memory dst, memory src1)
 6654 %{
 6655     instruction_count(2);
 6656     src1   : S3(read);
 6657     dst    : S4(read);
 6658     D0     : S0(2);     // big decoder only
 6659     MEM    : S3(2);     // any mem
 6660 %}
 6661 
 6662 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6663 %{
 6664     instruction_count(3);
 6665     src1   : S3(read);
 6666     src2   : S3(read);
 6667     dst    : S4(read);
 6668     D0     : S0(3);     // big decoder only
 6669     FPU    : S4;
 6670     MEM    : S3(3);     // any mem
 6671 %}
 6672 
 6673 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6674 %{
 6675     instruction_count(3);
 6676     src1   : S4(read);
 6677     mem    : S4(read);
 6678     DECODE : S0;        // any decoder for FPU PUSH
 6679     D0     : S0(2);     // big decoder only
 6680     FPU    : S4;
 6681     MEM    : S3(2);     // any mem
 6682 %}
 6683 
 6684 // Float load constant
 6685 pipe_class fpu_reg_con(regD dst)
 6686 %{
 6687     instruction_count(2);
 6688     dst    : S5(write);
 6689     D0     : S0;        // big decoder only for the load
 6690     DECODE : S1;        // any decoder for FPU POP
 6691     FPU    : S4;
 6692     MEM    : S3;        // any mem
 6693 %}
 6694 
 6695 // Float load constant
 6696 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6697 %{
 6698     instruction_count(3);
 6699     dst    : S5(write);
 6700     src    : S3(read);
 6701     D0     : S0;        // big decoder only for the load
 6702     DECODE : S1(2);     // any decoder for FPU POP
 6703     FPU    : S4;
 6704     MEM    : S3;        // any mem
 6705 %}
 6706 
 6707 // UnConditional branch
 6708 pipe_class pipe_jmp(label labl)
 6709 %{
 6710     single_instruction;
 6711     BR   : S3;
 6712 %}
 6713 
 6714 // Conditional branch
 6715 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6716 %{
 6717     single_instruction;
 6718     cr    : S1(read);
 6719     BR    : S3;
 6720 %}
 6721 
 6722 // Allocation idiom
 6723 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6724 %{
 6725     instruction_count(1); force_serialization;
 6726     fixed_latency(6);
 6727     heap_ptr : S3(read);
 6728     DECODE   : S0(3);
 6729     D0       : S2;
 6730     MEM      : S3;
 6731     ALU      : S3(2);
 6732     dst      : S5(write);
 6733     BR       : S5;
 6734 %}
 6735 
 6736 // Generic big/slow expanded idiom
 6737 pipe_class pipe_slow()
 6738 %{
 6739     instruction_count(10); multiple_bundles; force_serialization;
 6740     fixed_latency(100);
 6741     D0  : S0(2);
 6742     MEM : S3(2);
 6743 %}
 6744 
 6745 // The real do-nothing guy
 6746 pipe_class empty()
 6747 %{
 6748     instruction_count(0);
 6749 %}
 6750 
 6751 // Define the class for the Nop node
 6752 define
 6753 %{
 6754    MachNop = empty;
 6755 %}
 6756 
 6757 %}
 6758 
 6759 //----------INSTRUCTIONS-------------------------------------------------------
 6760 //
 6761 // match      -- States which machine-independent subtree may be replaced
 6762 //               by this instruction.
 6763 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6764 //               selection to identify a minimum cost tree of machine
 6765 //               instructions that matches a tree of machine-independent
 6766 //               instructions.
 6767 // format     -- A string providing the disassembly for this instruction.
 6768 //               The value of an instruction's operand may be inserted
 6769 //               by referring to it with a '$' prefix.
 6770 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6771 //               to within an encode class as $primary, $secondary, and $tertiary
 6772 //               rrspectively.  The primary opcode is commonly used to
 6773 //               indicate the type of machine instruction, while secondary
 6774 //               and tertiary are often used for prefix options or addressing
 6775 //               modes.
 6776 // ins_encode -- A list of encode classes with parameters. The encode class
 6777 //               name must have been defined in an 'enc_class' specification
 6778 //               in the encode section of the architecture description.
 6779 
 6780 // ============================================================================
 6781 
 6782 instruct ShouldNotReachHere() %{
 6783   match(Halt);
 6784   format %{ "stop\t# ShouldNotReachHere" %}
 6785   ins_encode %{
 6786     if (is_reachable()) {
 6787       const char* str = __ code_string(_halt_reason);
 6788       __ stop(str);
 6789     }
 6790   %}
 6791   ins_pipe(pipe_slow);
 6792 %}
 6793 
 6794 // ============================================================================
 6795 
 6796 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6797 // Load Float
 6798 instruct MoveF2VL(vlRegF dst, regF src) %{
 6799   match(Set dst src);
 6800   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6801   ins_encode %{
 6802     ShouldNotReachHere();
 6803   %}
 6804   ins_pipe( fpu_reg_reg );
 6805 %}
 6806 
 6807 // Load Float
 6808 instruct MoveF2LEG(legRegF dst, regF src) %{
 6809   match(Set dst src);
 6810   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6811   ins_encode %{
 6812     ShouldNotReachHere();
 6813   %}
 6814   ins_pipe( fpu_reg_reg );
 6815 %}
 6816 
 6817 // Load Float
 6818 instruct MoveVL2F(regF dst, vlRegF src) %{
 6819   match(Set dst src);
 6820   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6821   ins_encode %{
 6822     ShouldNotReachHere();
 6823   %}
 6824   ins_pipe( fpu_reg_reg );
 6825 %}
 6826 
 6827 // Load Float
 6828 instruct MoveLEG2F(regF dst, legRegF src) %{
 6829   match(Set dst src);
 6830   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6831   ins_encode %{
 6832     ShouldNotReachHere();
 6833   %}
 6834   ins_pipe( fpu_reg_reg );
 6835 %}
 6836 
 6837 // Load Double
 6838 instruct MoveD2VL(vlRegD dst, regD src) %{
 6839   match(Set dst src);
 6840   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6841   ins_encode %{
 6842     ShouldNotReachHere();
 6843   %}
 6844   ins_pipe( fpu_reg_reg );
 6845 %}
 6846 
 6847 // Load Double
 6848 instruct MoveD2LEG(legRegD dst, regD src) %{
 6849   match(Set dst src);
 6850   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6851   ins_encode %{
 6852     ShouldNotReachHere();
 6853   %}
 6854   ins_pipe( fpu_reg_reg );
 6855 %}
 6856 
 6857 // Load Double
 6858 instruct MoveVL2D(regD dst, vlRegD src) %{
 6859   match(Set dst src);
 6860   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6861   ins_encode %{
 6862     ShouldNotReachHere();
 6863   %}
 6864   ins_pipe( fpu_reg_reg );
 6865 %}
 6866 
 6867 // Load Double
 6868 instruct MoveLEG2D(regD dst, legRegD src) %{
 6869   match(Set dst src);
 6870   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6871   ins_encode %{
 6872     ShouldNotReachHere();
 6873   %}
 6874   ins_pipe( fpu_reg_reg );
 6875 %}
 6876 
 6877 //----------Load/Store/Move Instructions---------------------------------------
 6878 //----------Load Instructions--------------------------------------------------
 6879 
 6880 // Load Byte (8 bit signed)
 6881 instruct loadB(rRegI dst, memory mem)
 6882 %{
 6883   match(Set dst (LoadB mem));
 6884 
 6885   ins_cost(125);
 6886   format %{ "movsbl  $dst, $mem\t# byte" %}
 6887 
 6888   ins_encode %{
 6889     __ movsbl($dst$$Register, $mem$$Address);
 6890   %}
 6891 
 6892   ins_pipe(ialu_reg_mem);
 6893 %}
 6894 
 6895 // Load Byte (8 bit signed) into Long Register
 6896 instruct loadB2L(rRegL dst, memory mem)
 6897 %{
 6898   match(Set dst (ConvI2L (LoadB mem)));
 6899 
 6900   ins_cost(125);
 6901   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6902 
 6903   ins_encode %{
 6904     __ movsbq($dst$$Register, $mem$$Address);
 6905   %}
 6906 
 6907   ins_pipe(ialu_reg_mem);
 6908 %}
 6909 
 6910 // Load Unsigned Byte (8 bit UNsigned)
 6911 instruct loadUB(rRegI dst, memory mem)
 6912 %{
 6913   match(Set dst (LoadUB mem));
 6914 
 6915   ins_cost(125);
 6916   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6917 
 6918   ins_encode %{
 6919     __ movzbl($dst$$Register, $mem$$Address);
 6920   %}
 6921 
 6922   ins_pipe(ialu_reg_mem);
 6923 %}
 6924 
 6925 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6926 instruct loadUB2L(rRegL dst, memory mem)
 6927 %{
 6928   match(Set dst (ConvI2L (LoadUB mem)));
 6929 
 6930   ins_cost(125);
 6931   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 6932 
 6933   ins_encode %{
 6934     __ movzbq($dst$$Register, $mem$$Address);
 6935   %}
 6936 
 6937   ins_pipe(ialu_reg_mem);
 6938 %}
 6939 
 6940 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 6941 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 6942   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 6943   effect(KILL cr);
 6944 
 6945   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 6946             "andl    $dst, right_n_bits($mask, 8)" %}
 6947   ins_encode %{
 6948     Register Rdst = $dst$$Register;
 6949     __ movzbq(Rdst, $mem$$Address);
 6950     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 6951   %}
 6952   ins_pipe(ialu_reg_mem);
 6953 %}
 6954 
 6955 // Load Short (16 bit signed)
 6956 instruct loadS(rRegI dst, memory mem)
 6957 %{
 6958   match(Set dst (LoadS mem));
 6959 
 6960   ins_cost(125);
 6961   format %{ "movswl $dst, $mem\t# short" %}
 6962 
 6963   ins_encode %{
 6964     __ movswl($dst$$Register, $mem$$Address);
 6965   %}
 6966 
 6967   ins_pipe(ialu_reg_mem);
 6968 %}
 6969 
 6970 // Load Short (16 bit signed) to Byte (8 bit signed)
 6971 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6972   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 6973 
 6974   ins_cost(125);
 6975   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 6976   ins_encode %{
 6977     __ movsbl($dst$$Register, $mem$$Address);
 6978   %}
 6979   ins_pipe(ialu_reg_mem);
 6980 %}
 6981 
 6982 // Load Short (16 bit signed) into Long Register
 6983 instruct loadS2L(rRegL dst, memory mem)
 6984 %{
 6985   match(Set dst (ConvI2L (LoadS mem)));
 6986 
 6987   ins_cost(125);
 6988   format %{ "movswq $dst, $mem\t# short -> long" %}
 6989 
 6990   ins_encode %{
 6991     __ movswq($dst$$Register, $mem$$Address);
 6992   %}
 6993 
 6994   ins_pipe(ialu_reg_mem);
 6995 %}
 6996 
 6997 // Load Unsigned Short/Char (16 bit UNsigned)
 6998 instruct loadUS(rRegI dst, memory mem)
 6999 %{
 7000   match(Set dst (LoadUS mem));
 7001 
 7002   ins_cost(125);
 7003   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 7004 
 7005   ins_encode %{
 7006     __ movzwl($dst$$Register, $mem$$Address);
 7007   %}
 7008 
 7009   ins_pipe(ialu_reg_mem);
 7010 %}
 7011 
 7012 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 7013 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7014   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 7015 
 7016   ins_cost(125);
 7017   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 7018   ins_encode %{
 7019     __ movsbl($dst$$Register, $mem$$Address);
 7020   %}
 7021   ins_pipe(ialu_reg_mem);
 7022 %}
 7023 
 7024 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 7025 instruct loadUS2L(rRegL dst, memory mem)
 7026 %{
 7027   match(Set dst (ConvI2L (LoadUS mem)));
 7028 
 7029   ins_cost(125);
 7030   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 7031 
 7032   ins_encode %{
 7033     __ movzwq($dst$$Register, $mem$$Address);
 7034   %}
 7035 
 7036   ins_pipe(ialu_reg_mem);
 7037 %}
 7038 
 7039 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 7040 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7041   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7042 
 7043   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 7044   ins_encode %{
 7045     __ movzbq($dst$$Register, $mem$$Address);
 7046   %}
 7047   ins_pipe(ialu_reg_mem);
 7048 %}
 7049 
 7050 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 7051 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7052   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7053   effect(KILL cr);
 7054 
 7055   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 7056             "andl    $dst, right_n_bits($mask, 16)" %}
 7057   ins_encode %{
 7058     Register Rdst = $dst$$Register;
 7059     __ movzwq(Rdst, $mem$$Address);
 7060     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 7061   %}
 7062   ins_pipe(ialu_reg_mem);
 7063 %}
 7064 
 7065 // Load Integer
 7066 instruct loadI(rRegI dst, memory mem)
 7067 %{
 7068   match(Set dst (LoadI mem));
 7069 
 7070   ins_cost(125);
 7071   format %{ "movl    $dst, $mem\t# int" %}
 7072 
 7073   ins_encode %{
 7074     __ movl($dst$$Register, $mem$$Address);
 7075   %}
 7076 
 7077   ins_pipe(ialu_reg_mem);
 7078 %}
 7079 
 7080 // Load Integer (32 bit signed) to Byte (8 bit signed)
 7081 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7082   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 7083 
 7084   ins_cost(125);
 7085   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 7086   ins_encode %{
 7087     __ movsbl($dst$$Register, $mem$$Address);
 7088   %}
 7089   ins_pipe(ialu_reg_mem);
 7090 %}
 7091 
 7092 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 7093 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 7094   match(Set dst (AndI (LoadI mem) mask));
 7095 
 7096   ins_cost(125);
 7097   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 7098   ins_encode %{
 7099     __ movzbl($dst$$Register, $mem$$Address);
 7100   %}
 7101   ins_pipe(ialu_reg_mem);
 7102 %}
 7103 
 7104 // Load Integer (32 bit signed) to Short (16 bit signed)
 7105 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 7106   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 7107 
 7108   ins_cost(125);
 7109   format %{ "movswl  $dst, $mem\t# int -> short" %}
 7110   ins_encode %{
 7111     __ movswl($dst$$Register, $mem$$Address);
 7112   %}
 7113   ins_pipe(ialu_reg_mem);
 7114 %}
 7115 
 7116 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 7117 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 7118   match(Set dst (AndI (LoadI mem) mask));
 7119 
 7120   ins_cost(125);
 7121   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7122   ins_encode %{
 7123     __ movzwl($dst$$Register, $mem$$Address);
 7124   %}
 7125   ins_pipe(ialu_reg_mem);
 7126 %}
 7127 
 7128 // Load Integer into Long Register
 7129 instruct loadI2L(rRegL dst, memory mem)
 7130 %{
 7131   match(Set dst (ConvI2L (LoadI mem)));
 7132 
 7133   ins_cost(125);
 7134   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7135 
 7136   ins_encode %{
 7137     __ movslq($dst$$Register, $mem$$Address);
 7138   %}
 7139 
 7140   ins_pipe(ialu_reg_mem);
 7141 %}
 7142 
 7143 // Load Integer with mask 0xFF into Long Register
 7144 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7145   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7146 
 7147   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7148   ins_encode %{
 7149     __ movzbq($dst$$Register, $mem$$Address);
 7150   %}
 7151   ins_pipe(ialu_reg_mem);
 7152 %}
 7153 
 7154 // Load Integer with mask 0xFFFF into Long Register
 7155 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7156   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7157 
 7158   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7159   ins_encode %{
 7160     __ movzwq($dst$$Register, $mem$$Address);
 7161   %}
 7162   ins_pipe(ialu_reg_mem);
 7163 %}
 7164 
 7165 // Load Integer with a 31-bit mask into Long Register
 7166 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7167   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7168   effect(KILL cr);
 7169 
 7170   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7171             "andl    $dst, $mask" %}
 7172   ins_encode %{
 7173     Register Rdst = $dst$$Register;
 7174     __ movl(Rdst, $mem$$Address);
 7175     __ andl(Rdst, $mask$$constant);
 7176   %}
 7177   ins_pipe(ialu_reg_mem);
 7178 %}
 7179 
 7180 // Load Unsigned Integer into Long Register
 7181 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7182 %{
 7183   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7184 
 7185   ins_cost(125);
 7186   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7187 
 7188   ins_encode %{
 7189     __ movl($dst$$Register, $mem$$Address);
 7190   %}
 7191 
 7192   ins_pipe(ialu_reg_mem);
 7193 %}
 7194 
 7195 // Load Long
 7196 instruct loadL(rRegL dst, memory mem)
 7197 %{
 7198   match(Set dst (LoadL mem));
 7199 
 7200   ins_cost(125);
 7201   format %{ "movq    $dst, $mem\t# long" %}
 7202 
 7203   ins_encode %{
 7204     __ movq($dst$$Register, $mem$$Address);
 7205   %}
 7206 
 7207   ins_pipe(ialu_reg_mem); // XXX
 7208 %}
 7209 
 7210 // Load Range
 7211 instruct loadRange(rRegI dst, memory mem)
 7212 %{
 7213   match(Set dst (LoadRange mem));
 7214 
 7215   ins_cost(125); // XXX
 7216   format %{ "movl    $dst, $mem\t# range" %}
 7217   ins_encode %{
 7218     __ movl($dst$$Register, $mem$$Address);
 7219   %}
 7220   ins_pipe(ialu_reg_mem);
 7221 %}
 7222 
 7223 // Load Pointer
 7224 instruct loadP(rRegP dst, memory mem)
 7225 %{
 7226   match(Set dst (LoadP mem));
 7227   predicate(n->as_Load()->barrier_data() == 0);
 7228 
 7229   ins_cost(125); // XXX
 7230   format %{ "movq    $dst, $mem\t# ptr" %}
 7231   ins_encode %{
 7232     __ movq($dst$$Register, $mem$$Address);
 7233   %}
 7234   ins_pipe(ialu_reg_mem); // XXX
 7235 %}
 7236 
 7237 // Load Compressed Pointer
 7238 instruct loadN(rRegN dst, memory mem)
 7239 %{
 7240    predicate(n->as_Load()->barrier_data() == 0);
 7241    match(Set dst (LoadN mem));
 7242 
 7243    ins_cost(125); // XXX
 7244    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7245    ins_encode %{
 7246      __ movl($dst$$Register, $mem$$Address);
 7247    %}
 7248    ins_pipe(ialu_reg_mem); // XXX
 7249 %}
 7250 
 7251 
 7252 // Load Klass Pointer
 7253 instruct loadKlass(rRegP dst, memory mem)
 7254 %{
 7255   match(Set dst (LoadKlass mem));
 7256 
 7257   ins_cost(125); // XXX
 7258   format %{ "movq    $dst, $mem\t# class" %}
 7259   ins_encode %{
 7260     __ movq($dst$$Register, $mem$$Address);
 7261   %}
 7262   ins_pipe(ialu_reg_mem); // XXX
 7263 %}
 7264 
 7265 // Load narrow Klass Pointer
 7266 instruct loadNKlass(rRegN dst, memory mem)
 7267 %{
 7268   predicate(!UseCompactObjectHeaders);
 7269   match(Set dst (LoadNKlass mem));
 7270 
 7271   ins_cost(125); // XXX
 7272   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7273   ins_encode %{
 7274     __ movl($dst$$Register, $mem$$Address);
 7275   %}
 7276   ins_pipe(ialu_reg_mem); // XXX
 7277 %}
 7278 
 7279 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7280 %{
 7281   predicate(UseCompactObjectHeaders);
 7282   match(Set dst (LoadNKlass mem));
 7283   effect(KILL cr);
 7284   ins_cost(125);
 7285   format %{
 7286     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7287     "shrl    $dst, markWord::klass_shift_at_offset"
 7288   %}
 7289   ins_encode %{
 7290     __ movl($dst$$Register, $mem$$Address);
 7291     __ shrl($dst$$Register, markWord::klass_shift_at_offset);
 7292   %}
 7293   ins_pipe(ialu_reg_mem);
 7294 %}
 7295 
 7296 // Load Float
 7297 instruct loadF(regF dst, memory mem)
 7298 %{
 7299   match(Set dst (LoadF mem));
 7300 
 7301   ins_cost(145); // XXX
 7302   format %{ "movss   $dst, $mem\t# float" %}
 7303   ins_encode %{
 7304     __ movflt($dst$$XMMRegister, $mem$$Address);
 7305   %}
 7306   ins_pipe(pipe_slow); // XXX
 7307 %}
 7308 
 7309 // Load Double
 7310 instruct loadD_partial(regD dst, memory mem)
 7311 %{
 7312   predicate(!UseXmmLoadAndClearUpper);
 7313   match(Set dst (LoadD mem));
 7314 
 7315   ins_cost(145); // XXX
 7316   format %{ "movlpd  $dst, $mem\t# double" %}
 7317   ins_encode %{
 7318     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7319   %}
 7320   ins_pipe(pipe_slow); // XXX
 7321 %}
 7322 
 7323 instruct loadD(regD dst, memory mem)
 7324 %{
 7325   predicate(UseXmmLoadAndClearUpper);
 7326   match(Set dst (LoadD mem));
 7327 
 7328   ins_cost(145); // XXX
 7329   format %{ "movsd   $dst, $mem\t# double" %}
 7330   ins_encode %{
 7331     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7332   %}
 7333   ins_pipe(pipe_slow); // XXX
 7334 %}
 7335 
 7336 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
 7337 %{
 7338   match(Set dst con);
 7339 
 7340   format %{ "leaq  $dst, $con\t# AOT Runtime Constants Address" %}
 7341 
 7342   ins_encode %{
 7343     __ load_aotrc_address($dst$$Register, (address)$con$$constant);
 7344   %}
 7345 
 7346   ins_pipe(ialu_reg_fat);
 7347 %}
 7348 
 7349 // min = java.lang.Math.min(float a, float b)
 7350 // max = java.lang.Math.max(float a, float b)
 7351 instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
 7352 %{
 7353   predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
 7354   match(Set dst (MaxF a b));
 7355   match(Set dst (MinF a b));
 7356 
 7357   format %{ "minmaxF $dst, $a, $b" %}
 7358   ins_encode %{
 7359     int opcode = this->ideal_Opcode();
 7360     __ sminmax_fp_avx10_2(opcode, T_FLOAT, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
 7361   %}
 7362   ins_pipe( pipe_slow );
 7363 %}
 7364 
 7365 instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, rRegI rtmp, rFlagsReg cr)
 7366 %{
 7367   predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
 7368   match(Set dst (MaxF a b));
 7369   match(Set dst (MinF a b));
 7370   effect(USE a, USE b, TEMP rtmp, KILL cr);
 7371 
 7372   format %{ "minmaxF_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
 7373   ins_encode %{
 7374     int opcode = this->ideal_Opcode();
 7375     bool min = (opcode == Op_MinF) ? true : false;
 7376     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
 7377                     min, fp_prec_flt /*pt*/);
 7378   %}
 7379   ins_pipe( pipe_slow );
 7380 %}
 7381 
 7382 // min = java.lang.Math.min(float a, float b)
 7383 // max = java.lang.Math.max(float a, float b)
 7384 instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp)
 7385 %{
 7386   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7387   match(Set dst (MaxF a b));
 7388   match(Set dst (MinF a b));
 7389   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7390 
 7391   format %{ "minmaxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7392   ins_encode %{
 7393     int opcode = this->ideal_Opcode();
 7394     int param_opcode = (opcode == Op_MinF) ? Op_MinV : Op_MaxV;
 7395     __ vminmax_fp(param_opcode, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
 7396                   $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7397   %}
 7398   ins_pipe( pipe_slow );
 7399 %}
 7400 
 7401 instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, rRegI rtmp, rFlagsReg cr)
 7402 %{
 7403   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7404   match(Set dst (MaxF a b));
 7405   match(Set dst (MinF a b));
 7406   effect(USE a, USE b, TEMP rtmp, KILL cr);
 7407 
 7408   format %{ "minmaxF_reduction $dst, $a, $b \t!using $rtmp as TEMP" %}
 7409   ins_encode %{
 7410     int opcode = this->ideal_Opcode();
 7411     bool min = (opcode == Op_MinF) ? true : false;
 7412     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
 7413                     min, fp_prec_flt /*pt*/);
 7414   %}
 7415   ins_pipe( pipe_slow );
 7416 %}
 7417 
 7418 // min = java.lang.Math.min(double a, double b)
 7419 // max = java.lang.Math.max(double a, double b)
 7420 instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
 7421 %{
 7422   predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
 7423   match(Set dst (MaxD a b));
 7424   match(Set dst (MinD a b));
 7425 
 7426   format %{ "minmaxD $dst, $a, $b" %}
 7427   ins_encode %{
 7428     int opcode = this->ideal_Opcode();
 7429     __ sminmax_fp_avx10_2(opcode, T_DOUBLE, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
 7430   %}
 7431   ins_pipe( pipe_slow );
 7432 %}
 7433 
 7434 instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, rRegI rtmp, rFlagsReg cr)
 7435 %{
 7436   predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
 7437   match(Set dst (MaxD a b));
 7438   match(Set dst (MinD a b));
 7439   effect(USE a, USE b, TEMP rtmp, KILL cr);
 7440 
 7441   format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
 7442   ins_encode %{
 7443     int opcode = this->ideal_Opcode();
 7444     bool min = (opcode == Op_MinD) ? true : false;
 7445     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
 7446                     min, fp_prec_dbl /*pt*/);
 7447   %}
 7448   ins_pipe( pipe_slow );
 7449 %}
 7450 
 7451 // min = java.lang.Math.min(double a, double b)
 7452 // max = java.lang.Math.max(double a, double b)
 7453 instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp)
 7454 %{
 7455   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7456   match(Set dst (MaxD a b));
 7457   match(Set dst (MinD a b));
 7458   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7459 
 7460   format %{ "minmaxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7461   ins_encode %{
 7462     int opcode = this->ideal_Opcode();
 7463     int param_opcode = (opcode == Op_MinD) ? Op_MinV : Op_MaxV;
 7464     __ vminmax_fp(param_opcode, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
 7465                   $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7466   %}
 7467   ins_pipe( pipe_slow );
 7468 %}
 7469 
 7470 instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, rRegL rtmp, rFlagsReg cr)
 7471 %{
 7472   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7473   match(Set dst (MaxD a b));
 7474   match(Set dst (MinD a b));
 7475   effect(USE a, USE b, TEMP rtmp, KILL cr);
 7476 
 7477   format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
 7478   ins_encode %{
 7479     int opcode = this->ideal_Opcode();
 7480     bool min = (opcode == Op_MinD) ? true : false;
 7481     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
 7482                     min, fp_prec_dbl /*pt*/);
 7483   %}
 7484   ins_pipe( pipe_slow );
 7485 %}
 7486 
 7487 // Load Effective Address
 7488 instruct leaP8(rRegP dst, indOffset8 mem)
 7489 %{
 7490   match(Set dst mem);
 7491 
 7492   ins_cost(110); // XXX
 7493   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7494   ins_encode %{
 7495     __ leaq($dst$$Register, $mem$$Address);
 7496   %}
 7497   ins_pipe(ialu_reg_reg_fat);
 7498 %}
 7499 
 7500 instruct leaP32(rRegP dst, indOffset32 mem)
 7501 %{
 7502   match(Set dst mem);
 7503 
 7504   ins_cost(110);
 7505   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7506   ins_encode %{
 7507     __ leaq($dst$$Register, $mem$$Address);
 7508   %}
 7509   ins_pipe(ialu_reg_reg_fat);
 7510 %}
 7511 
 7512 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7513 %{
 7514   match(Set dst mem);
 7515 
 7516   ins_cost(110);
 7517   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7518   ins_encode %{
 7519     __ leaq($dst$$Register, $mem$$Address);
 7520   %}
 7521   ins_pipe(ialu_reg_reg_fat);
 7522 %}
 7523 
 7524 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7525 %{
 7526   match(Set dst mem);
 7527 
 7528   ins_cost(110);
 7529   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7530   ins_encode %{
 7531     __ leaq($dst$$Register, $mem$$Address);
 7532   %}
 7533   ins_pipe(ialu_reg_reg_fat);
 7534 %}
 7535 
 7536 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7537 %{
 7538   match(Set dst mem);
 7539 
 7540   ins_cost(110);
 7541   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7542   ins_encode %{
 7543     __ leaq($dst$$Register, $mem$$Address);
 7544   %}
 7545   ins_pipe(ialu_reg_reg_fat);
 7546 %}
 7547 
 7548 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7549 %{
 7550   match(Set dst mem);
 7551 
 7552   ins_cost(110);
 7553   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7554   ins_encode %{
 7555     __ leaq($dst$$Register, $mem$$Address);
 7556   %}
 7557   ins_pipe(ialu_reg_reg_fat);
 7558 %}
 7559 
 7560 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7561 %{
 7562   match(Set dst mem);
 7563 
 7564   ins_cost(110);
 7565   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7566   ins_encode %{
 7567     __ leaq($dst$$Register, $mem$$Address);
 7568   %}
 7569   ins_pipe(ialu_reg_reg_fat);
 7570 %}
 7571 
 7572 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7573 %{
 7574   match(Set dst mem);
 7575 
 7576   ins_cost(110);
 7577   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7578   ins_encode %{
 7579     __ leaq($dst$$Register, $mem$$Address);
 7580   %}
 7581   ins_pipe(ialu_reg_reg_fat);
 7582 %}
 7583 
 7584 // Load Effective Address which uses Narrow (32-bits) oop
 7585 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7586 %{
 7587   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7588   match(Set dst mem);
 7589 
 7590   ins_cost(110);
 7591   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7592   ins_encode %{
 7593     __ leaq($dst$$Register, $mem$$Address);
 7594   %}
 7595   ins_pipe(ialu_reg_reg_fat);
 7596 %}
 7597 
 7598 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7599 %{
 7600   predicate(CompressedOops::shift() == 0);
 7601   match(Set dst mem);
 7602 
 7603   ins_cost(110); // XXX
 7604   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7605   ins_encode %{
 7606     __ leaq($dst$$Register, $mem$$Address);
 7607   %}
 7608   ins_pipe(ialu_reg_reg_fat);
 7609 %}
 7610 
 7611 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7612 %{
 7613   predicate(CompressedOops::shift() == 0);
 7614   match(Set dst mem);
 7615 
 7616   ins_cost(110);
 7617   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7618   ins_encode %{
 7619     __ leaq($dst$$Register, $mem$$Address);
 7620   %}
 7621   ins_pipe(ialu_reg_reg_fat);
 7622 %}
 7623 
 7624 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7625 %{
 7626   predicate(CompressedOops::shift() == 0);
 7627   match(Set dst mem);
 7628 
 7629   ins_cost(110);
 7630   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7631   ins_encode %{
 7632     __ leaq($dst$$Register, $mem$$Address);
 7633   %}
 7634   ins_pipe(ialu_reg_reg_fat);
 7635 %}
 7636 
 7637 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7638 %{
 7639   predicate(CompressedOops::shift() == 0);
 7640   match(Set dst mem);
 7641 
 7642   ins_cost(110);
 7643   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7644   ins_encode %{
 7645     __ leaq($dst$$Register, $mem$$Address);
 7646   %}
 7647   ins_pipe(ialu_reg_reg_fat);
 7648 %}
 7649 
 7650 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7651 %{
 7652   predicate(CompressedOops::shift() == 0);
 7653   match(Set dst mem);
 7654 
 7655   ins_cost(110);
 7656   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7657   ins_encode %{
 7658     __ leaq($dst$$Register, $mem$$Address);
 7659   %}
 7660   ins_pipe(ialu_reg_reg_fat);
 7661 %}
 7662 
 7663 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7664 %{
 7665   predicate(CompressedOops::shift() == 0);
 7666   match(Set dst mem);
 7667 
 7668   ins_cost(110);
 7669   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7670   ins_encode %{
 7671     __ leaq($dst$$Register, $mem$$Address);
 7672   %}
 7673   ins_pipe(ialu_reg_reg_fat);
 7674 %}
 7675 
 7676 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7677 %{
 7678   predicate(CompressedOops::shift() == 0);
 7679   match(Set dst mem);
 7680 
 7681   ins_cost(110);
 7682   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7683   ins_encode %{
 7684     __ leaq($dst$$Register, $mem$$Address);
 7685   %}
 7686   ins_pipe(ialu_reg_reg_fat);
 7687 %}
 7688 
 7689 instruct loadConI(rRegI dst, immI src)
 7690 %{
 7691   match(Set dst src);
 7692 
 7693   format %{ "movl    $dst, $src\t# int" %}
 7694   ins_encode %{
 7695     __ movl($dst$$Register, $src$$constant);
 7696   %}
 7697   ins_pipe(ialu_reg_fat); // XXX
 7698 %}
 7699 
 7700 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7701 %{
 7702   match(Set dst src);
 7703   effect(KILL cr);
 7704 
 7705   ins_cost(50);
 7706   format %{ "xorl    $dst, $dst\t# int" %}
 7707   ins_encode %{
 7708     __ xorl($dst$$Register, $dst$$Register);
 7709   %}
 7710   ins_pipe(ialu_reg);
 7711 %}
 7712 
 7713 instruct loadConL(rRegL dst, immL src)
 7714 %{
 7715   match(Set dst src);
 7716 
 7717   ins_cost(150);
 7718   format %{ "movq    $dst, $src\t# long" %}
 7719   ins_encode %{
 7720     __ mov64($dst$$Register, $src$$constant);
 7721   %}
 7722   ins_pipe(ialu_reg);
 7723 %}
 7724 
 7725 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7726 %{
 7727   match(Set dst src);
 7728   effect(KILL cr);
 7729 
 7730   ins_cost(50);
 7731   format %{ "xorl    $dst, $dst\t# long" %}
 7732   ins_encode %{
 7733     __ xorl($dst$$Register, $dst$$Register);
 7734   %}
 7735   ins_pipe(ialu_reg); // XXX
 7736 %}
 7737 
 7738 instruct loadConUL32(rRegL dst, immUL32 src)
 7739 %{
 7740   match(Set dst src);
 7741 
 7742   ins_cost(60);
 7743   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7744   ins_encode %{
 7745     __ movl($dst$$Register, $src$$constant);
 7746   %}
 7747   ins_pipe(ialu_reg);
 7748 %}
 7749 
 7750 instruct loadConL32(rRegL dst, immL32 src)
 7751 %{
 7752   match(Set dst src);
 7753 
 7754   ins_cost(70);
 7755   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7756   ins_encode %{
 7757     __ movq($dst$$Register, $src$$constant);
 7758   %}
 7759   ins_pipe(ialu_reg);
 7760 %}
 7761 
 7762 instruct loadConP(rRegP dst, immP con) %{
 7763   match(Set dst con);
 7764 
 7765   format %{ "movq    $dst, $con\t# ptr" %}
 7766   ins_encode %{
 7767     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7768   %}
 7769   ins_pipe(ialu_reg_fat); // XXX
 7770 %}
 7771 
 7772 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7773 %{
 7774   match(Set dst src);
 7775   effect(KILL cr);
 7776 
 7777   ins_cost(50);
 7778   format %{ "xorl    $dst, $dst\t# ptr" %}
 7779   ins_encode %{
 7780     __ xorl($dst$$Register, $dst$$Register);
 7781   %}
 7782   ins_pipe(ialu_reg);
 7783 %}
 7784 
 7785 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7786 %{
 7787   match(Set dst src);
 7788   effect(KILL cr);
 7789 
 7790   ins_cost(60);
 7791   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7792   ins_encode %{
 7793     __ movl($dst$$Register, $src$$constant);
 7794   %}
 7795   ins_pipe(ialu_reg);
 7796 %}
 7797 
 7798 instruct loadConF(regF dst, immF con) %{
 7799   match(Set dst con);
 7800   ins_cost(125);
 7801   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7802   ins_encode %{
 7803     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7804   %}
 7805   ins_pipe(pipe_slow);
 7806 %}
 7807 
 7808 instruct loadConH(regF dst, immH con) %{
 7809   match(Set dst con);
 7810   ins_cost(125);
 7811   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7812   ins_encode %{
 7813     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7814   %}
 7815   ins_pipe(pipe_slow);
 7816 %}
 7817 
 7818 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7819   match(Set dst src);
 7820   effect(KILL cr);
 7821   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7822   ins_encode %{
 7823     __ xorq($dst$$Register, $dst$$Register);
 7824   %}
 7825   ins_pipe(ialu_reg);
 7826 %}
 7827 
 7828 instruct loadConN(rRegN dst, immN src) %{
 7829   match(Set dst src);
 7830 
 7831   ins_cost(125);
 7832   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7833   ins_encode %{
 7834     address con = (address)$src$$constant;
 7835     if (con == nullptr) {
 7836       ShouldNotReachHere();
 7837     } else {
 7838       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7839     }
 7840   %}
 7841   ins_pipe(ialu_reg_fat); // XXX
 7842 %}
 7843 
 7844 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7845   match(Set dst src);
 7846 
 7847   ins_cost(125);
 7848   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7849   ins_encode %{
 7850     address con = (address)$src$$constant;
 7851     if (con == nullptr) {
 7852       ShouldNotReachHere();
 7853     } else {
 7854       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7855     }
 7856   %}
 7857   ins_pipe(ialu_reg_fat); // XXX
 7858 %}
 7859 
 7860 instruct loadConF0(regF dst, immF0 src)
 7861 %{
 7862   match(Set dst src);
 7863   ins_cost(100);
 7864 
 7865   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7866   ins_encode %{
 7867     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7868   %}
 7869   ins_pipe(pipe_slow);
 7870 %}
 7871 
 7872 // Use the same format since predicate() can not be used here.
 7873 instruct loadConD(regD dst, immD con) %{
 7874   match(Set dst con);
 7875   ins_cost(125);
 7876   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7877   ins_encode %{
 7878     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7879   %}
 7880   ins_pipe(pipe_slow);
 7881 %}
 7882 
 7883 instruct loadConD0(regD dst, immD0 src)
 7884 %{
 7885   match(Set dst src);
 7886   ins_cost(100);
 7887 
 7888   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7889   ins_encode %{
 7890     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7891   %}
 7892   ins_pipe(pipe_slow);
 7893 %}
 7894 
 7895 instruct loadSSI(rRegI dst, stackSlotI src)
 7896 %{
 7897   match(Set dst src);
 7898 
 7899   ins_cost(125);
 7900   format %{ "movl    $dst, $src\t# int stk" %}
 7901   ins_encode %{
 7902     __ movl($dst$$Register, $src$$Address);
 7903   %}
 7904   ins_pipe(ialu_reg_mem);
 7905 %}
 7906 
 7907 instruct loadSSL(rRegL dst, stackSlotL src)
 7908 %{
 7909   match(Set dst src);
 7910 
 7911   ins_cost(125);
 7912   format %{ "movq    $dst, $src\t# long stk" %}
 7913   ins_encode %{
 7914     __ movq($dst$$Register, $src$$Address);
 7915   %}
 7916   ins_pipe(ialu_reg_mem);
 7917 %}
 7918 
 7919 instruct loadSSP(rRegP dst, stackSlotP src)
 7920 %{
 7921   match(Set dst src);
 7922 
 7923   ins_cost(125);
 7924   format %{ "movq    $dst, $src\t# ptr stk" %}
 7925   ins_encode %{
 7926     __ movq($dst$$Register, $src$$Address);
 7927   %}
 7928   ins_pipe(ialu_reg_mem);
 7929 %}
 7930 
 7931 instruct loadSSF(regF dst, stackSlotF src)
 7932 %{
 7933   match(Set dst src);
 7934 
 7935   ins_cost(125);
 7936   format %{ "movss   $dst, $src\t# float stk" %}
 7937   ins_encode %{
 7938     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 7939   %}
 7940   ins_pipe(pipe_slow); // XXX
 7941 %}
 7942 
 7943 // Use the same format since predicate() can not be used here.
 7944 instruct loadSSD(regD dst, stackSlotD src)
 7945 %{
 7946   match(Set dst src);
 7947 
 7948   ins_cost(125);
 7949   format %{ "movsd   $dst, $src\t# double stk" %}
 7950   ins_encode  %{
 7951     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 7952   %}
 7953   ins_pipe(pipe_slow); // XXX
 7954 %}
 7955 
 7956 // Prefetch instructions for allocation.
 7957 // Must be safe to execute with invalid address (cannot fault).
 7958 
 7959 instruct prefetchAlloc( memory mem ) %{
 7960   predicate(AllocatePrefetchInstr==3);
 7961   match(PrefetchAllocation mem);
 7962   ins_cost(125);
 7963 
 7964   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 7965   ins_encode %{
 7966     __ prefetchw($mem$$Address);
 7967   %}
 7968   ins_pipe(ialu_mem);
 7969 %}
 7970 
 7971 instruct prefetchAllocNTA( memory mem ) %{
 7972   predicate(AllocatePrefetchInstr==0);
 7973   match(PrefetchAllocation mem);
 7974   ins_cost(125);
 7975 
 7976   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 7977   ins_encode %{
 7978     __ prefetchnta($mem$$Address);
 7979   %}
 7980   ins_pipe(ialu_mem);
 7981 %}
 7982 
 7983 instruct prefetchAllocT0( memory mem ) %{
 7984   predicate(AllocatePrefetchInstr==1);
 7985   match(PrefetchAllocation mem);
 7986   ins_cost(125);
 7987 
 7988   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 7989   ins_encode %{
 7990     __ prefetcht0($mem$$Address);
 7991   %}
 7992   ins_pipe(ialu_mem);
 7993 %}
 7994 
 7995 instruct prefetchAllocT2( memory mem ) %{
 7996   predicate(AllocatePrefetchInstr==2);
 7997   match(PrefetchAllocation mem);
 7998   ins_cost(125);
 7999 
 8000   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 8001   ins_encode %{
 8002     __ prefetcht2($mem$$Address);
 8003   %}
 8004   ins_pipe(ialu_mem);
 8005 %}
 8006 
 8007 //----------Store Instructions-------------------------------------------------
 8008 
 8009 // Store Byte
 8010 instruct storeB(memory mem, rRegI src)
 8011 %{
 8012   match(Set mem (StoreB mem src));
 8013 
 8014   ins_cost(125); // XXX
 8015   format %{ "movb    $mem, $src\t# byte" %}
 8016   ins_encode %{
 8017     __ movb($mem$$Address, $src$$Register);
 8018   %}
 8019   ins_pipe(ialu_mem_reg);
 8020 %}
 8021 
 8022 // Store Char/Short
 8023 instruct storeC(memory mem, rRegI src)
 8024 %{
 8025   match(Set mem (StoreC mem src));
 8026 
 8027   ins_cost(125); // XXX
 8028   format %{ "movw    $mem, $src\t# char/short" %}
 8029   ins_encode %{
 8030     __ movw($mem$$Address, $src$$Register);
 8031   %}
 8032   ins_pipe(ialu_mem_reg);
 8033 %}
 8034 
 8035 // Store Integer
 8036 instruct storeI(memory mem, rRegI src)
 8037 %{
 8038   match(Set mem (StoreI mem src));
 8039 
 8040   ins_cost(125); // XXX
 8041   format %{ "movl    $mem, $src\t# int" %}
 8042   ins_encode %{
 8043     __ movl($mem$$Address, $src$$Register);
 8044   %}
 8045   ins_pipe(ialu_mem_reg);
 8046 %}
 8047 
 8048 // Store Long
 8049 instruct storeL(memory mem, rRegL src)
 8050 %{
 8051   match(Set mem (StoreL mem src));
 8052 
 8053   ins_cost(125); // XXX
 8054   format %{ "movq    $mem, $src\t# long" %}
 8055   ins_encode %{
 8056     __ movq($mem$$Address, $src$$Register);
 8057   %}
 8058   ins_pipe(ialu_mem_reg); // XXX
 8059 %}
 8060 
 8061 // Store Pointer
 8062 instruct storeP(memory mem, any_RegP src)
 8063 %{
 8064   predicate(n->as_Store()->barrier_data() == 0);
 8065   match(Set mem (StoreP mem src));
 8066 
 8067   ins_cost(125); // XXX
 8068   format %{ "movq    $mem, $src\t# ptr" %}
 8069   ins_encode %{
 8070     __ movq($mem$$Address, $src$$Register);
 8071   %}
 8072   ins_pipe(ialu_mem_reg);
 8073 %}
 8074 
 8075 instruct storeImmP0(memory mem, immP0 zero)
 8076 %{
 8077   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 8078   match(Set mem (StoreP mem zero));
 8079 
 8080   ins_cost(125); // XXX
 8081   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 8082   ins_encode %{
 8083     __ movq($mem$$Address, r12);
 8084   %}
 8085   ins_pipe(ialu_mem_reg);
 8086 %}
 8087 
 8088 // Store Null Pointer, mark word, or other simple pointer constant.
 8089 instruct storeImmP(memory mem, immP31 src)
 8090 %{
 8091   predicate(n->as_Store()->barrier_data() == 0);
 8092   match(Set mem (StoreP mem src));
 8093 
 8094   ins_cost(150); // XXX
 8095   format %{ "movq    $mem, $src\t# ptr" %}
 8096   ins_encode %{
 8097     __ movq($mem$$Address, $src$$constant);
 8098   %}
 8099   ins_pipe(ialu_mem_imm);
 8100 %}
 8101 
 8102 // Store Compressed Pointer
 8103 instruct storeN(memory mem, rRegN src)
 8104 %{
 8105   predicate(n->as_Store()->barrier_data() == 0);
 8106   match(Set mem (StoreN mem src));
 8107 
 8108   ins_cost(125); // XXX
 8109   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8110   ins_encode %{
 8111     __ movl($mem$$Address, $src$$Register);
 8112   %}
 8113   ins_pipe(ialu_mem_reg);
 8114 %}
 8115 
 8116 instruct storeNKlass(memory mem, rRegN src)
 8117 %{
 8118   match(Set mem (StoreNKlass mem src));
 8119 
 8120   ins_cost(125); // XXX
 8121   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8122   ins_encode %{
 8123     __ movl($mem$$Address, $src$$Register);
 8124   %}
 8125   ins_pipe(ialu_mem_reg);
 8126 %}
 8127 
 8128 instruct storeImmN0(memory mem, immN0 zero)
 8129 %{
 8130   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8131   match(Set mem (StoreN mem zero));
 8132 
 8133   ins_cost(125); // XXX
 8134   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8135   ins_encode %{
 8136     __ movl($mem$$Address, r12);
 8137   %}
 8138   ins_pipe(ialu_mem_reg);
 8139 %}
 8140 
 8141 instruct storeImmN(memory mem, immN src)
 8142 %{
 8143   predicate(n->as_Store()->barrier_data() == 0);
 8144   match(Set mem (StoreN mem src));
 8145 
 8146   ins_cost(150); // XXX
 8147   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8148   ins_encode %{
 8149     address con = (address)$src$$constant;
 8150     if (con == nullptr) {
 8151       __ movl($mem$$Address, 0);
 8152     } else {
 8153       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8154     }
 8155   %}
 8156   ins_pipe(ialu_mem_imm);
 8157 %}
 8158 
 8159 instruct storeImmNKlass(memory mem, immNKlass src)
 8160 %{
 8161   match(Set mem (StoreNKlass mem src));
 8162 
 8163   ins_cost(150); // XXX
 8164   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8165   ins_encode %{
 8166     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8167   %}
 8168   ins_pipe(ialu_mem_imm);
 8169 %}
 8170 
 8171 // Store Integer Immediate
 8172 instruct storeImmI0(memory mem, immI_0 zero)
 8173 %{
 8174   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8175   match(Set mem (StoreI mem zero));
 8176 
 8177   ins_cost(125); // XXX
 8178   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8179   ins_encode %{
 8180     __ movl($mem$$Address, r12);
 8181   %}
 8182   ins_pipe(ialu_mem_reg);
 8183 %}
 8184 
 8185 instruct storeImmI(memory mem, immI src)
 8186 %{
 8187   match(Set mem (StoreI mem src));
 8188 
 8189   ins_cost(150);
 8190   format %{ "movl    $mem, $src\t# int" %}
 8191   ins_encode %{
 8192     __ movl($mem$$Address, $src$$constant);
 8193   %}
 8194   ins_pipe(ialu_mem_imm);
 8195 %}
 8196 
 8197 // Store Long Immediate
 8198 instruct storeImmL0(memory mem, immL0 zero)
 8199 %{
 8200   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8201   match(Set mem (StoreL mem zero));
 8202 
 8203   ins_cost(125); // XXX
 8204   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8205   ins_encode %{
 8206     __ movq($mem$$Address, r12);
 8207   %}
 8208   ins_pipe(ialu_mem_reg);
 8209 %}
 8210 
 8211 instruct storeImmL(memory mem, immL32 src)
 8212 %{
 8213   match(Set mem (StoreL mem src));
 8214 
 8215   ins_cost(150);
 8216   format %{ "movq    $mem, $src\t# long" %}
 8217   ins_encode %{
 8218     __ movq($mem$$Address, $src$$constant);
 8219   %}
 8220   ins_pipe(ialu_mem_imm);
 8221 %}
 8222 
 8223 // Store Short/Char Immediate
 8224 instruct storeImmC0(memory mem, immI_0 zero)
 8225 %{
 8226   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8227   match(Set mem (StoreC mem zero));
 8228 
 8229   ins_cost(125); // XXX
 8230   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8231   ins_encode %{
 8232     __ movw($mem$$Address, r12);
 8233   %}
 8234   ins_pipe(ialu_mem_reg);
 8235 %}
 8236 
 8237 instruct storeImmI16(memory mem, immI16 src)
 8238 %{
 8239   predicate(UseStoreImmI16);
 8240   match(Set mem (StoreC mem src));
 8241 
 8242   ins_cost(150);
 8243   format %{ "movw    $mem, $src\t# short/char" %}
 8244   ins_encode %{
 8245     __ movw($mem$$Address, $src$$constant);
 8246   %}
 8247   ins_pipe(ialu_mem_imm);
 8248 %}
 8249 
 8250 // Store Byte Immediate
 8251 instruct storeImmB0(memory mem, immI_0 zero)
 8252 %{
 8253   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8254   match(Set mem (StoreB mem zero));
 8255 
 8256   ins_cost(125); // XXX
 8257   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8258   ins_encode %{
 8259     __ movb($mem$$Address, r12);
 8260   %}
 8261   ins_pipe(ialu_mem_reg);
 8262 %}
 8263 
 8264 instruct storeImmB(memory mem, immI8 src)
 8265 %{
 8266   match(Set mem (StoreB mem src));
 8267 
 8268   ins_cost(150); // XXX
 8269   format %{ "movb    $mem, $src\t# byte" %}
 8270   ins_encode %{
 8271     __ movb($mem$$Address, $src$$constant);
 8272   %}
 8273   ins_pipe(ialu_mem_imm);
 8274 %}
 8275 
 8276 // Store Float
 8277 instruct storeF(memory mem, regF src)
 8278 %{
 8279   match(Set mem (StoreF mem src));
 8280 
 8281   ins_cost(95); // XXX
 8282   format %{ "movss   $mem, $src\t# float" %}
 8283   ins_encode %{
 8284     __ movflt($mem$$Address, $src$$XMMRegister);
 8285   %}
 8286   ins_pipe(pipe_slow); // XXX
 8287 %}
 8288 
 8289 // Store immediate Float value (it is faster than store from XMM register)
 8290 instruct storeF0(memory mem, immF0 zero)
 8291 %{
 8292   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8293   match(Set mem (StoreF mem zero));
 8294 
 8295   ins_cost(25); // XXX
 8296   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8297   ins_encode %{
 8298     __ movl($mem$$Address, r12);
 8299   %}
 8300   ins_pipe(ialu_mem_reg);
 8301 %}
 8302 
 8303 instruct storeF_imm(memory mem, immF src)
 8304 %{
 8305   match(Set mem (StoreF mem src));
 8306 
 8307   ins_cost(50);
 8308   format %{ "movl    $mem, $src\t# float" %}
 8309   ins_encode %{
 8310     __ movl($mem$$Address, jint_cast($src$$constant));
 8311   %}
 8312   ins_pipe(ialu_mem_imm);
 8313 %}
 8314 
 8315 // Store Double
 8316 instruct storeD(memory mem, regD src)
 8317 %{
 8318   match(Set mem (StoreD mem src));
 8319 
 8320   ins_cost(95); // XXX
 8321   format %{ "movsd   $mem, $src\t# double" %}
 8322   ins_encode %{
 8323     __ movdbl($mem$$Address, $src$$XMMRegister);
 8324   %}
 8325   ins_pipe(pipe_slow); // XXX
 8326 %}
 8327 
 8328 // Store immediate double 0.0 (it is faster than store from XMM register)
 8329 instruct storeD0_imm(memory mem, immD0 src)
 8330 %{
 8331   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8332   match(Set mem (StoreD mem src));
 8333 
 8334   ins_cost(50);
 8335   format %{ "movq    $mem, $src\t# double 0." %}
 8336   ins_encode %{
 8337     __ movq($mem$$Address, $src$$constant);
 8338   %}
 8339   ins_pipe(ialu_mem_imm);
 8340 %}
 8341 
 8342 instruct storeD0(memory mem, immD0 zero)
 8343 %{
 8344   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8345   match(Set mem (StoreD mem zero));
 8346 
 8347   ins_cost(25); // XXX
 8348   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8349   ins_encode %{
 8350     __ movq($mem$$Address, r12);
 8351   %}
 8352   ins_pipe(ialu_mem_reg);
 8353 %}
 8354 
 8355 instruct storeSSI(stackSlotI dst, rRegI src)
 8356 %{
 8357   match(Set dst src);
 8358 
 8359   ins_cost(100);
 8360   format %{ "movl    $dst, $src\t# int stk" %}
 8361   ins_encode %{
 8362     __ movl($dst$$Address, $src$$Register);
 8363   %}
 8364   ins_pipe( ialu_mem_reg );
 8365 %}
 8366 
 8367 instruct storeSSL(stackSlotL dst, rRegL src)
 8368 %{
 8369   match(Set dst src);
 8370 
 8371   ins_cost(100);
 8372   format %{ "movq    $dst, $src\t# long stk" %}
 8373   ins_encode %{
 8374     __ movq($dst$$Address, $src$$Register);
 8375   %}
 8376   ins_pipe(ialu_mem_reg);
 8377 %}
 8378 
 8379 instruct storeSSP(stackSlotP dst, rRegP src)
 8380 %{
 8381   match(Set dst src);
 8382 
 8383   ins_cost(100);
 8384   format %{ "movq    $dst, $src\t# ptr stk" %}
 8385   ins_encode %{
 8386     __ movq($dst$$Address, $src$$Register);
 8387   %}
 8388   ins_pipe(ialu_mem_reg);
 8389 %}
 8390 
 8391 instruct storeSSF(stackSlotF dst, regF src)
 8392 %{
 8393   match(Set dst src);
 8394 
 8395   ins_cost(95); // XXX
 8396   format %{ "movss   $dst, $src\t# float stk" %}
 8397   ins_encode %{
 8398     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8399   %}
 8400   ins_pipe(pipe_slow); // XXX
 8401 %}
 8402 
 8403 instruct storeSSD(stackSlotD dst, regD src)
 8404 %{
 8405   match(Set dst src);
 8406 
 8407   ins_cost(95); // XXX
 8408   format %{ "movsd   $dst, $src\t# double stk" %}
 8409   ins_encode %{
 8410     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8411   %}
 8412   ins_pipe(pipe_slow); // XXX
 8413 %}
 8414 
 8415 instruct cacheWB(indirect addr)
 8416 %{
 8417   predicate(VM_Version::supports_data_cache_line_flush());
 8418   match(CacheWB addr);
 8419 
 8420   ins_cost(100);
 8421   format %{"cache wb $addr" %}
 8422   ins_encode %{
 8423     assert($addr->index_position() < 0, "should be");
 8424     assert($addr$$disp == 0, "should be");
 8425     __ cache_wb(Address($addr$$base$$Register, 0));
 8426   %}
 8427   ins_pipe(pipe_slow); // XXX
 8428 %}
 8429 
 8430 instruct cacheWBPreSync()
 8431 %{
 8432   predicate(VM_Version::supports_data_cache_line_flush());
 8433   match(CacheWBPreSync);
 8434 
 8435   ins_cost(100);
 8436   format %{"cache wb presync" %}
 8437   ins_encode %{
 8438     __ cache_wbsync(true);
 8439   %}
 8440   ins_pipe(pipe_slow); // XXX
 8441 %}
 8442 
 8443 instruct cacheWBPostSync()
 8444 %{
 8445   predicate(VM_Version::supports_data_cache_line_flush());
 8446   match(CacheWBPostSync);
 8447 
 8448   ins_cost(100);
 8449   format %{"cache wb postsync" %}
 8450   ins_encode %{
 8451     __ cache_wbsync(false);
 8452   %}
 8453   ins_pipe(pipe_slow); // XXX
 8454 %}
 8455 
 8456 //----------BSWAP Instructions-------------------------------------------------
 8457 instruct bytes_reverse_int(rRegI dst) %{
 8458   match(Set dst (ReverseBytesI dst));
 8459 
 8460   format %{ "bswapl  $dst" %}
 8461   ins_encode %{
 8462     __ bswapl($dst$$Register);
 8463   %}
 8464   ins_pipe( ialu_reg );
 8465 %}
 8466 
 8467 instruct bytes_reverse_long(rRegL dst) %{
 8468   match(Set dst (ReverseBytesL dst));
 8469 
 8470   format %{ "bswapq  $dst" %}
 8471   ins_encode %{
 8472     __ bswapq($dst$$Register);
 8473   %}
 8474   ins_pipe( ialu_reg);
 8475 %}
 8476 
 8477 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8478   match(Set dst (ReverseBytesUS dst));
 8479   effect(KILL cr);
 8480 
 8481   format %{ "bswapl  $dst\n\t"
 8482             "shrl    $dst,16\n\t" %}
 8483   ins_encode %{
 8484     __ bswapl($dst$$Register);
 8485     __ shrl($dst$$Register, 16);
 8486   %}
 8487   ins_pipe( ialu_reg );
 8488 %}
 8489 
 8490 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8491   match(Set dst (ReverseBytesS dst));
 8492   effect(KILL cr);
 8493 
 8494   format %{ "bswapl  $dst\n\t"
 8495             "sar     $dst,16\n\t" %}
 8496   ins_encode %{
 8497     __ bswapl($dst$$Register);
 8498     __ sarl($dst$$Register, 16);
 8499   %}
 8500   ins_pipe( ialu_reg );
 8501 %}
 8502 
 8503 //---------- Zeros Count Instructions ------------------------------------------
 8504 
 8505 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8506   predicate(UseCountLeadingZerosInstruction);
 8507   match(Set dst (CountLeadingZerosI src));
 8508   effect(KILL cr);
 8509 
 8510   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8511   ins_encode %{
 8512     __ lzcntl($dst$$Register, $src$$Register);
 8513   %}
 8514   ins_pipe(ialu_reg);
 8515 %}
 8516 
 8517 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8518   predicate(UseCountLeadingZerosInstruction);
 8519   match(Set dst (CountLeadingZerosI (LoadI src)));
 8520   effect(KILL cr);
 8521   ins_cost(175);
 8522   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8523   ins_encode %{
 8524     __ lzcntl($dst$$Register, $src$$Address);
 8525   %}
 8526   ins_pipe(ialu_reg_mem);
 8527 %}
 8528 
 8529 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8530   predicate(!UseCountLeadingZerosInstruction);
 8531   match(Set dst (CountLeadingZerosI src));
 8532   effect(KILL cr);
 8533 
 8534   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8535             "jnz     skip\n\t"
 8536             "movl    $dst, -1\n"
 8537       "skip:\n\t"
 8538             "negl    $dst\n\t"
 8539             "addl    $dst, 31" %}
 8540   ins_encode %{
 8541     Register Rdst = $dst$$Register;
 8542     Register Rsrc = $src$$Register;
 8543     Label skip;
 8544     __ bsrl(Rdst, Rsrc);
 8545     __ jccb(Assembler::notZero, skip);
 8546     __ movl(Rdst, -1);
 8547     __ bind(skip);
 8548     __ negl(Rdst);
 8549     __ addl(Rdst, BitsPerInt - 1);
 8550   %}
 8551   ins_pipe(ialu_reg);
 8552 %}
 8553 
 8554 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8555   predicate(UseCountLeadingZerosInstruction);
 8556   match(Set dst (CountLeadingZerosL src));
 8557   effect(KILL cr);
 8558 
 8559   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8560   ins_encode %{
 8561     __ lzcntq($dst$$Register, $src$$Register);
 8562   %}
 8563   ins_pipe(ialu_reg);
 8564 %}
 8565 
 8566 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8567   predicate(UseCountLeadingZerosInstruction);
 8568   match(Set dst (CountLeadingZerosL (LoadL src)));
 8569   effect(KILL cr);
 8570   ins_cost(175);
 8571   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8572   ins_encode %{
 8573     __ lzcntq($dst$$Register, $src$$Address);
 8574   %}
 8575   ins_pipe(ialu_reg_mem);
 8576 %}
 8577 
 8578 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8579   predicate(!UseCountLeadingZerosInstruction);
 8580   match(Set dst (CountLeadingZerosL src));
 8581   effect(KILL cr);
 8582 
 8583   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8584             "jnz     skip\n\t"
 8585             "movl    $dst, -1\n"
 8586       "skip:\n\t"
 8587             "negl    $dst\n\t"
 8588             "addl    $dst, 63" %}
 8589   ins_encode %{
 8590     Register Rdst = $dst$$Register;
 8591     Register Rsrc = $src$$Register;
 8592     Label skip;
 8593     __ bsrq(Rdst, Rsrc);
 8594     __ jccb(Assembler::notZero, skip);
 8595     __ movl(Rdst, -1);
 8596     __ bind(skip);
 8597     __ negl(Rdst);
 8598     __ addl(Rdst, BitsPerLong - 1);
 8599   %}
 8600   ins_pipe(ialu_reg);
 8601 %}
 8602 
 8603 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8604   predicate(UseCountTrailingZerosInstruction);
 8605   match(Set dst (CountTrailingZerosI src));
 8606   effect(KILL cr);
 8607 
 8608   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8609   ins_encode %{
 8610     __ tzcntl($dst$$Register, $src$$Register);
 8611   %}
 8612   ins_pipe(ialu_reg);
 8613 %}
 8614 
 8615 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8616   predicate(UseCountTrailingZerosInstruction);
 8617   match(Set dst (CountTrailingZerosI (LoadI src)));
 8618   effect(KILL cr);
 8619   ins_cost(175);
 8620   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8621   ins_encode %{
 8622     __ tzcntl($dst$$Register, $src$$Address);
 8623   %}
 8624   ins_pipe(ialu_reg_mem);
 8625 %}
 8626 
 8627 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8628   predicate(!UseCountTrailingZerosInstruction);
 8629   match(Set dst (CountTrailingZerosI src));
 8630   effect(KILL cr);
 8631 
 8632   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8633             "jnz     done\n\t"
 8634             "movl    $dst, 32\n"
 8635       "done:" %}
 8636   ins_encode %{
 8637     Register Rdst = $dst$$Register;
 8638     Label done;
 8639     __ bsfl(Rdst, $src$$Register);
 8640     __ jccb(Assembler::notZero, done);
 8641     __ movl(Rdst, BitsPerInt);
 8642     __ bind(done);
 8643   %}
 8644   ins_pipe(ialu_reg);
 8645 %}
 8646 
 8647 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8648   predicate(UseCountTrailingZerosInstruction);
 8649   match(Set dst (CountTrailingZerosL src));
 8650   effect(KILL cr);
 8651 
 8652   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8653   ins_encode %{
 8654     __ tzcntq($dst$$Register, $src$$Register);
 8655   %}
 8656   ins_pipe(ialu_reg);
 8657 %}
 8658 
 8659 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8660   predicate(UseCountTrailingZerosInstruction);
 8661   match(Set dst (CountTrailingZerosL (LoadL src)));
 8662   effect(KILL cr);
 8663   ins_cost(175);
 8664   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8665   ins_encode %{
 8666     __ tzcntq($dst$$Register, $src$$Address);
 8667   %}
 8668   ins_pipe(ialu_reg_mem);
 8669 %}
 8670 
 8671 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8672   predicate(!UseCountTrailingZerosInstruction);
 8673   match(Set dst (CountTrailingZerosL src));
 8674   effect(KILL cr);
 8675 
 8676   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8677             "jnz     done\n\t"
 8678             "movl    $dst, 64\n"
 8679       "done:" %}
 8680   ins_encode %{
 8681     Register Rdst = $dst$$Register;
 8682     Label done;
 8683     __ bsfq(Rdst, $src$$Register);
 8684     __ jccb(Assembler::notZero, done);
 8685     __ movl(Rdst, BitsPerLong);
 8686     __ bind(done);
 8687   %}
 8688   ins_pipe(ialu_reg);
 8689 %}
 8690 
 8691 //--------------- Reverse Operation Instructions ----------------
 8692 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8693   predicate(!VM_Version::supports_gfni());
 8694   match(Set dst (ReverseI src));
 8695   effect(TEMP dst, TEMP rtmp, KILL cr);
 8696   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8697   ins_encode %{
 8698     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8699   %}
 8700   ins_pipe( ialu_reg );
 8701 %}
 8702 
 8703 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8704   predicate(VM_Version::supports_gfni());
 8705   match(Set dst (ReverseI src));
 8706   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8707   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8708   ins_encode %{
 8709     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8710   %}
 8711   ins_pipe( ialu_reg );
 8712 %}
 8713 
 8714 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8715   predicate(!VM_Version::supports_gfni());
 8716   match(Set dst (ReverseL src));
 8717   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8718   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8719   ins_encode %{
 8720     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8721   %}
 8722   ins_pipe( ialu_reg );
 8723 %}
 8724 
 8725 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8726   predicate(VM_Version::supports_gfni());
 8727   match(Set dst (ReverseL src));
 8728   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8729   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8730   ins_encode %{
 8731     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8732   %}
 8733   ins_pipe( ialu_reg );
 8734 %}
 8735 
 8736 //---------- Population Count Instructions -------------------------------------
 8737 
 8738 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8739   predicate(UsePopCountInstruction);
 8740   match(Set dst (PopCountI src));
 8741   effect(KILL cr);
 8742 
 8743   format %{ "popcnt  $dst, $src" %}
 8744   ins_encode %{
 8745     __ popcntl($dst$$Register, $src$$Register);
 8746   %}
 8747   ins_pipe(ialu_reg);
 8748 %}
 8749 
 8750 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8751   predicate(UsePopCountInstruction);
 8752   match(Set dst (PopCountI (LoadI mem)));
 8753   effect(KILL cr);
 8754 
 8755   format %{ "popcnt  $dst, $mem" %}
 8756   ins_encode %{
 8757     __ popcntl($dst$$Register, $mem$$Address);
 8758   %}
 8759   ins_pipe(ialu_reg);
 8760 %}
 8761 
 8762 // Note: Long.bitCount(long) returns an int.
 8763 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8764   predicate(UsePopCountInstruction);
 8765   match(Set dst (PopCountL src));
 8766   effect(KILL cr);
 8767 
 8768   format %{ "popcnt  $dst, $src" %}
 8769   ins_encode %{
 8770     __ popcntq($dst$$Register, $src$$Register);
 8771   %}
 8772   ins_pipe(ialu_reg);
 8773 %}
 8774 
 8775 // Note: Long.bitCount(long) returns an int.
 8776 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8777   predicate(UsePopCountInstruction);
 8778   match(Set dst (PopCountL (LoadL mem)));
 8779   effect(KILL cr);
 8780 
 8781   format %{ "popcnt  $dst, $mem" %}
 8782   ins_encode %{
 8783     __ popcntq($dst$$Register, $mem$$Address);
 8784   %}
 8785   ins_pipe(ialu_reg);
 8786 %}
 8787 
 8788 
 8789 //----------MemBar Instructions-----------------------------------------------
 8790 // Memory barrier flavors
 8791 
 8792 instruct membar_acquire()
 8793 %{
 8794   match(MemBarAcquire);
 8795   match(LoadFence);
 8796   ins_cost(0);
 8797 
 8798   size(0);
 8799   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8800   ins_encode();
 8801   ins_pipe(empty);
 8802 %}
 8803 
 8804 instruct membar_acquire_lock()
 8805 %{
 8806   match(MemBarAcquireLock);
 8807   ins_cost(0);
 8808 
 8809   size(0);
 8810   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8811   ins_encode();
 8812   ins_pipe(empty);
 8813 %}
 8814 
 8815 instruct membar_release()
 8816 %{
 8817   match(MemBarRelease);
 8818   match(StoreFence);
 8819   ins_cost(0);
 8820 
 8821   size(0);
 8822   format %{ "MEMBAR-release ! (empty encoding)" %}
 8823   ins_encode();
 8824   ins_pipe(empty);
 8825 %}
 8826 
 8827 instruct membar_release_lock()
 8828 %{
 8829   match(MemBarReleaseLock);
 8830   ins_cost(0);
 8831 
 8832   size(0);
 8833   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8834   ins_encode();
 8835   ins_pipe(empty);
 8836 %}
 8837 
 8838 instruct membar_storeload(rFlagsReg cr) %{
 8839   match(MemBarStoreLoad);
 8840   effect(KILL cr);
 8841   ins_cost(400);
 8842 
 8843   format %{
 8844     $$template
 8845     $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
 8846   %}
 8847   ins_encode %{
 8848     __ membar(Assembler::StoreLoad);
 8849   %}
 8850   ins_pipe(pipe_slow);
 8851 %}
 8852 
 8853 instruct membar_volatile(rFlagsReg cr) %{
 8854   match(MemBarVolatile);
 8855   effect(KILL cr);
 8856   ins_cost(400);
 8857 
 8858   format %{
 8859     $$template
 8860     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8861   %}
 8862   ins_encode %{
 8863     __ membar(Assembler::StoreLoad);
 8864   %}
 8865   ins_pipe(pipe_slow);
 8866 %}
 8867 
 8868 instruct unnecessary_membar_volatile()
 8869 %{
 8870   match(MemBarVolatile);
 8871   predicate(Matcher::post_store_load_barrier(n));
 8872   ins_cost(0);
 8873 
 8874   size(0);
 8875   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8876   ins_encode();
 8877   ins_pipe(empty);
 8878 %}
 8879 
 8880 instruct membar_full(rFlagsReg cr) %{
 8881   match(MemBarFull);
 8882   effect(KILL cr);
 8883   ins_cost(400);
 8884 
 8885   format %{
 8886     $$template
 8887     $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
 8888   %}
 8889   ins_encode %{
 8890     __ membar(Assembler::StoreLoad);
 8891   %}
 8892   ins_pipe(pipe_slow);
 8893 %}
 8894 
 8895 instruct membar_storestore() %{
 8896   match(MemBarStoreStore);
 8897   match(StoreStoreFence);
 8898   ins_cost(0);
 8899 
 8900   size(0);
 8901   format %{ "MEMBAR-storestore (empty encoding)" %}
 8902   ins_encode( );
 8903   ins_pipe(empty);
 8904 %}
 8905 
 8906 //----------Move Instructions--------------------------------------------------
 8907 
 8908 instruct castX2P(rRegP dst, rRegL src)
 8909 %{
 8910   match(Set dst (CastX2P src));
 8911 
 8912   format %{ "movq    $dst, $src\t# long->ptr" %}
 8913   ins_encode %{
 8914     if ($dst$$reg != $src$$reg) {
 8915       __ movptr($dst$$Register, $src$$Register);
 8916     }
 8917   %}
 8918   ins_pipe(ialu_reg_reg); // XXX
 8919 %}
 8920 
 8921 instruct castP2X(rRegL dst, rRegP src)
 8922 %{
 8923   match(Set dst (CastP2X src));
 8924 
 8925   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8926   ins_encode %{
 8927     if ($dst$$reg != $src$$reg) {
 8928       __ movptr($dst$$Register, $src$$Register);
 8929     }
 8930   %}
 8931   ins_pipe(ialu_reg_reg); // XXX
 8932 %}
 8933 
 8934 // Convert oop into int for vectors alignment masking
 8935 instruct convP2I(rRegI dst, rRegP src)
 8936 %{
 8937   match(Set dst (ConvL2I (CastP2X src)));
 8938 
 8939   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8940   ins_encode %{
 8941     __ movl($dst$$Register, $src$$Register);
 8942   %}
 8943   ins_pipe(ialu_reg_reg); // XXX
 8944 %}
 8945 
 8946 // Convert compressed oop into int for vectors alignment masking
 8947 // in case of 32bit oops (heap < 4Gb).
 8948 instruct convN2I(rRegI dst, rRegN src)
 8949 %{
 8950   predicate(CompressedOops::shift() == 0);
 8951   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 8952 
 8953   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 8954   ins_encode %{
 8955     __ movl($dst$$Register, $src$$Register);
 8956   %}
 8957   ins_pipe(ialu_reg_reg); // XXX
 8958 %}
 8959 
 8960 // Convert oop pointer into compressed form
 8961 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 8962   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 8963   match(Set dst (EncodeP src));
 8964   effect(KILL cr);
 8965   format %{ "encode_heap_oop $dst,$src" %}
 8966   ins_encode %{
 8967     Register s = $src$$Register;
 8968     Register d = $dst$$Register;
 8969     if (s != d) {
 8970       __ movq(d, s);
 8971     }
 8972     __ encode_heap_oop(d);
 8973   %}
 8974   ins_pipe(ialu_reg_long);
 8975 %}
 8976 
 8977 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8978   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 8979   match(Set dst (EncodeP src));
 8980   effect(KILL cr);
 8981   format %{ "encode_heap_oop_not_null $dst,$src" %}
 8982   ins_encode %{
 8983     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 8984   %}
 8985   ins_pipe(ialu_reg_long);
 8986 %}
 8987 
 8988 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 8989   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 8990             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 8991   match(Set dst (DecodeN src));
 8992   effect(KILL cr);
 8993   format %{ "decode_heap_oop $dst,$src" %}
 8994   ins_encode %{
 8995     Register s = $src$$Register;
 8996     Register d = $dst$$Register;
 8997     if (s != d) {
 8998       __ movq(d, s);
 8999     }
 9000     __ decode_heap_oop(d);
 9001   %}
 9002   ins_pipe(ialu_reg_long);
 9003 %}
 9004 
 9005 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9006   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 9007             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 9008   match(Set dst (DecodeN src));
 9009   effect(KILL cr);
 9010   format %{ "decode_heap_oop_not_null $dst,$src" %}
 9011   ins_encode %{
 9012     Register s = $src$$Register;
 9013     Register d = $dst$$Register;
 9014     if (s != d) {
 9015       __ decode_heap_oop_not_null(d, s);
 9016     } else {
 9017       __ decode_heap_oop_not_null(d);
 9018     }
 9019   %}
 9020   ins_pipe(ialu_reg_long);
 9021 %}
 9022 
 9023 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9024   match(Set dst (EncodePKlass src));
 9025   effect(TEMP dst, KILL cr);
 9026   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 9027   ins_encode %{
 9028     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9029   %}
 9030   ins_pipe(ialu_reg_long);
 9031 %}
 9032 
 9033 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9034   match(Set dst (DecodeNKlass src));
 9035   effect(TEMP dst, KILL cr);
 9036   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 9037   ins_encode %{
 9038     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9039   %}
 9040   ins_pipe(ialu_reg_long);
 9041 %}
 9042 
 9043 //----------Conditional Move---------------------------------------------------
 9044 // Jump
 9045 // dummy instruction for generating temp registers
 9046 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 9047   match(Jump (LShiftL switch_val shift));
 9048   ins_cost(350);
 9049   predicate(false);
 9050   effect(TEMP dest);
 9051 
 9052   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9053             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 9054   ins_encode %{
 9055     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9056     // to do that and the compiler is using that register as one it can allocate.
 9057     // So we build it all by hand.
 9058     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 9059     // ArrayAddress dispatch(table, index);
 9060     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 9061     __ lea($dest$$Register, $constantaddress);
 9062     __ jmp(dispatch);
 9063   %}
 9064   ins_pipe(pipe_jmp);
 9065 %}
 9066 
 9067 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 9068   match(Jump (AddL (LShiftL switch_val shift) offset));
 9069   ins_cost(350);
 9070   effect(TEMP dest);
 9071 
 9072   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9073             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 9074   ins_encode %{
 9075     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9076     // to do that and the compiler is using that register as one it can allocate.
 9077     // So we build it all by hand.
 9078     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9079     // ArrayAddress dispatch(table, index);
 9080     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9081     __ lea($dest$$Register, $constantaddress);
 9082     __ jmp(dispatch);
 9083   %}
 9084   ins_pipe(pipe_jmp);
 9085 %}
 9086 
 9087 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 9088   match(Jump switch_val);
 9089   ins_cost(350);
 9090   effect(TEMP dest);
 9091 
 9092   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9093             "jmp     [$dest + $switch_val]\n\t" %}
 9094   ins_encode %{
 9095     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9096     // to do that and the compiler is using that register as one it can allocate.
 9097     // So we build it all by hand.
 9098     // Address index(noreg, switch_reg, Address::times_1);
 9099     // ArrayAddress dispatch(table, index);
 9100     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 9101     __ lea($dest$$Register, $constantaddress);
 9102     __ jmp(dispatch);
 9103   %}
 9104   ins_pipe(pipe_jmp);
 9105 %}
 9106 
 9107 // Conditional move
 9108 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 9109 %{
 9110   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9111   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9112 
 9113   ins_cost(100); // XXX
 9114   format %{ "setbn$cop $dst\t# signed, int" %}
 9115   ins_encode %{
 9116     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9117     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9118   %}
 9119   ins_pipe(ialu_reg);
 9120 %}
 9121 
 9122 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 9123 %{
 9124   predicate(!UseAPX);
 9125   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9126 
 9127   ins_cost(200); // XXX
 9128   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9129   ins_encode %{
 9130     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9131   %}
 9132   ins_pipe(pipe_cmov_reg);
 9133 %}
 9134 
 9135 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 9136 %{
 9137   predicate(UseAPX);
 9138   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9139 
 9140   ins_cost(200);
 9141   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9142   ins_encode %{
 9143     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9144   %}
 9145   ins_pipe(pipe_cmov_reg);
 9146 %}
 9147 
 9148 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 9149 %{
 9150   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9151   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9152 
 9153   ins_cost(100); // XXX
 9154   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9155   ins_encode %{
 9156     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9157     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9158   %}
 9159   ins_pipe(ialu_reg);
 9160 %}
 9161 
 9162 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9163   predicate(!UseAPX);
 9164   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9165 
 9166   ins_cost(200); // XXX
 9167   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9168   ins_encode %{
 9169     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9170   %}
 9171   ins_pipe(pipe_cmov_reg);
 9172 %}
 9173 
 9174 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9175   predicate(UseAPX);
 9176   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9177 
 9178   ins_cost(200);
 9179   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9180   ins_encode %{
 9181     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9182   %}
 9183   ins_pipe(pipe_cmov_reg);
 9184 %}
 9185 
 9186 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9187 %{
 9188   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9189   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9190 
 9191   ins_cost(100); // XXX
 9192   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9193   ins_encode %{
 9194     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9195     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9196   %}
 9197   ins_pipe(ialu_reg);
 9198 %}
 9199 
 9200 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9201 %{
 9202   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9203   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9204 
 9205   ins_cost(100); // XXX
 9206   format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
 9207   ins_encode %{
 9208     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9209     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9210   %}
 9211   ins_pipe(ialu_reg);
 9212 %}
 9213 
 9214 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9215   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9216 
 9217   ins_cost(200);
 9218   expand %{
 9219     cmovI_regU(cop, cr, dst, src);
 9220   %}
 9221 %}
 9222 
 9223 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
 9224   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9225 
 9226   ins_cost(200);
 9227   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9228   ins_encode %{
 9229     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9230   %}
 9231   ins_pipe(pipe_cmov_reg);
 9232 %}
 9233 
 9234 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9235   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9236   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9237 
 9238   ins_cost(200); // XXX
 9239   format %{ "cmovpl  $dst, $src\n\t"
 9240             "cmovnel $dst, $src" %}
 9241   ins_encode %{
 9242     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9243     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9244   %}
 9245   ins_pipe(pipe_cmov_reg);
 9246 %}
 9247 
 9248 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9249 // inputs of the CMove
 9250 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9251   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9252   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9253   effect(TEMP dst);
 9254 
 9255   ins_cost(200); // XXX
 9256   format %{ "cmovpl  $dst, $src\n\t"
 9257             "cmovnel $dst, $src" %}
 9258   ins_encode %{
 9259     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9260     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9261   %}
 9262   ins_pipe(pipe_cmov_reg);
 9263 %}
 9264 
 9265 // Conditional move
 9266 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9267   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9268 
 9269   ins_cost(250); // XXX
 9270   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9271   ins_encode %{
 9272     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9273   %}
 9274   ins_pipe(pipe_cmov_mem);
 9275 %}
 9276 
 9277 // Conditional move
 9278 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9279 %{
 9280   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9281 
 9282   ins_cost(250); // XXX
 9283   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9284   ins_encode %{
 9285     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9286   %}
 9287   ins_pipe(pipe_cmov_mem);
 9288 %}
 9289 
 9290 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9291   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9292 
 9293   ins_cost(250);
 9294   expand %{
 9295     cmovI_memU(cop, cr, dst, src);
 9296   %}
 9297 %}
 9298 
 9299 instruct cmovI_memUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI dst, memory src) %{
 9300   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9301 
 9302   ins_cost(250); // XXX
 9303   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9304   ins_encode %{
 9305     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9306   %}
 9307   ins_pipe(pipe_cmov_mem);
 9308 %}
 9309 
 9310 // Conditional move
 9311 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9312 %{
 9313   predicate(!UseAPX);
 9314   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9315 
 9316   ins_cost(200); // XXX
 9317   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9318   ins_encode %{
 9319     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9320   %}
 9321   ins_pipe(pipe_cmov_reg);
 9322 %}
 9323 
 9324 // Conditional move ndd
 9325 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9326 %{
 9327   predicate(UseAPX);
 9328   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9329 
 9330   ins_cost(200);
 9331   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9332   ins_encode %{
 9333     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9334   %}
 9335   ins_pipe(pipe_cmov_reg);
 9336 %}
 9337 
 9338 // Conditional move
 9339 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9340 %{
 9341   predicate(!UseAPX);
 9342   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9343 
 9344   ins_cost(200); // XXX
 9345   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9346   ins_encode %{
 9347     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9348   %}
 9349   ins_pipe(pipe_cmov_reg);
 9350 %}
 9351 
 9352 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9353   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9354 
 9355   ins_cost(200);
 9356   expand %{
 9357     cmovN_regU(cop, cr, dst, src);
 9358   %}
 9359 %}
 9360 
 9361 // Conditional move ndd
 9362 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9363 %{
 9364   predicate(UseAPX);
 9365   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9366 
 9367   ins_cost(200);
 9368   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9369   ins_encode %{
 9370     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9371   %}
 9372   ins_pipe(pipe_cmov_reg);
 9373 %}
 9374 
 9375 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
 9376   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9377 
 9378   ins_cost(200);
 9379   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
 9380   ins_encode %{
 9381     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9382   %}
 9383   ins_pipe(pipe_cmov_reg);
 9384 %}
 9385 
 9386 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9387   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9388   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9389 
 9390   ins_cost(200); // XXX
 9391   format %{ "cmovpl  $dst, $src\n\t"
 9392             "cmovnel $dst, $src" %}
 9393   ins_encode %{
 9394     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9395     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9396   %}
 9397   ins_pipe(pipe_cmov_reg);
 9398 %}
 9399 
 9400 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9401 // inputs of the CMove
 9402 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9403   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9404   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9405 
 9406   ins_cost(200); // XXX
 9407   format %{ "cmovpl  $dst, $src\n\t"
 9408             "cmovnel $dst, $src" %}
 9409   ins_encode %{
 9410     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9411     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9412   %}
 9413   ins_pipe(pipe_cmov_reg);
 9414 %}
 9415 
 9416 // Conditional move
 9417 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9418 %{
 9419   predicate(!UseAPX);
 9420   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9421 
 9422   ins_cost(200); // XXX
 9423   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9424   ins_encode %{
 9425     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9426   %}
 9427   ins_pipe(pipe_cmov_reg);  // XXX
 9428 %}
 9429 
 9430 // Conditional move ndd
 9431 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9432 %{
 9433   predicate(UseAPX);
 9434   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9435 
 9436   ins_cost(200);
 9437   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9438   ins_encode %{
 9439     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9440   %}
 9441   ins_pipe(pipe_cmov_reg);
 9442 %}
 9443 
 9444 // Conditional move
 9445 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9446 %{
 9447   predicate(!UseAPX);
 9448   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9449 
 9450   ins_cost(200); // XXX
 9451   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9452   ins_encode %{
 9453     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9454   %}
 9455   ins_pipe(pipe_cmov_reg); // XXX
 9456 %}
 9457 
 9458 // Conditional move ndd
 9459 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9460 %{
 9461   predicate(UseAPX);
 9462   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9463 
 9464   ins_cost(200);
 9465   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9466   ins_encode %{
 9467     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9468   %}
 9469   ins_pipe(pipe_cmov_reg);
 9470 %}
 9471 
 9472 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9473   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9474 
 9475   ins_cost(200);
 9476   expand %{
 9477     cmovP_regU(cop, cr, dst, src);
 9478   %}
 9479 %}
 9480 
 9481 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
 9482   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9483 
 9484   ins_cost(200);
 9485   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
 9486   ins_encode %{
 9487     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9488   %}
 9489   ins_pipe(pipe_cmov_reg);
 9490 %}
 9491 
 9492 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9493   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9494   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9495 
 9496   ins_cost(200); // XXX
 9497   format %{ "cmovpq  $dst, $src\n\t"
 9498             "cmovneq $dst, $src" %}
 9499   ins_encode %{
 9500     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9501     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9502   %}
 9503   ins_pipe(pipe_cmov_reg);
 9504 %}
 9505 
 9506 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9507 // inputs of the CMove
 9508 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9509   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9510   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9511 
 9512   ins_cost(200); // XXX
 9513   format %{ "cmovpq  $dst, $src\n\t"
 9514             "cmovneq $dst, $src" %}
 9515   ins_encode %{
 9516     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9517     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9518   %}
 9519   ins_pipe(pipe_cmov_reg);
 9520 %}
 9521 
 9522 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9523 %{
 9524   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9525   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9526 
 9527   ins_cost(100); // XXX
 9528   format %{ "setbn$cop $dst\t# signed, long" %}
 9529   ins_encode %{
 9530     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9531     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9532   %}
 9533   ins_pipe(ialu_reg);
 9534 %}
 9535 
 9536 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9537 %{
 9538   predicate(!UseAPX);
 9539   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9540 
 9541   ins_cost(200); // XXX
 9542   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9543   ins_encode %{
 9544     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9545   %}
 9546   ins_pipe(pipe_cmov_reg);  // XXX
 9547 %}
 9548 
 9549 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9550 %{
 9551   predicate(UseAPX);
 9552   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9553 
 9554   ins_cost(200);
 9555   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9556   ins_encode %{
 9557     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9558   %}
 9559   ins_pipe(pipe_cmov_reg);
 9560 %}
 9561 
 9562 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9563 %{
 9564   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9565 
 9566   ins_cost(200); // XXX
 9567   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9568   ins_encode %{
 9569     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9570   %}
 9571   ins_pipe(pipe_cmov_mem);  // XXX
 9572 %}
 9573 
 9574 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9575 %{
 9576   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9577   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9578 
 9579   ins_cost(100); // XXX
 9580   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9581   ins_encode %{
 9582     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9583     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9584   %}
 9585   ins_pipe(ialu_reg);
 9586 %}
 9587 
 9588 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9589 %{
 9590   predicate(!UseAPX);
 9591   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9592 
 9593   ins_cost(200); // XXX
 9594   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9595   ins_encode %{
 9596     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9597   %}
 9598   ins_pipe(pipe_cmov_reg); // XXX
 9599 %}
 9600 
 9601 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9602 %{
 9603   predicate(UseAPX);
 9604   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9605 
 9606   ins_cost(200);
 9607   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9608   ins_encode %{
 9609     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9610   %}
 9611   ins_pipe(pipe_cmov_reg);
 9612 %}
 9613 
 9614 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9615 %{
 9616   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9617   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9618 
 9619   ins_cost(100); // XXX
 9620   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9621   ins_encode %{
 9622     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9623     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9624   %}
 9625   ins_pipe(ialu_reg);
 9626 %}
 9627 
 9628 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9629 %{
 9630   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9631   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9632 
 9633   ins_cost(100); // XXX
 9634   format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
 9635   ins_encode %{
 9636     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9637     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9638   %}
 9639   ins_pipe(ialu_reg);
 9640 %}
 9641 
 9642 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9643   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9644 
 9645   ins_cost(200);
 9646   expand %{
 9647     cmovL_regU(cop, cr, dst, src);
 9648   %}
 9649 %}
 9650 
 9651 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
 9652 %{
 9653   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9654 
 9655   ins_cost(200);
 9656   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9657   ins_encode %{
 9658     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9659   %}
 9660   ins_pipe(pipe_cmov_reg);
 9661 %}
 9662 
 9663 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9664   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9665   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9666 
 9667   ins_cost(200); // XXX
 9668   format %{ "cmovpq  $dst, $src\n\t"
 9669             "cmovneq $dst, $src" %}
 9670   ins_encode %{
 9671     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9672     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9673   %}
 9674   ins_pipe(pipe_cmov_reg);
 9675 %}
 9676 
 9677 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9678 // inputs of the CMove
 9679 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9680   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9681   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9682 
 9683   ins_cost(200); // XXX
 9684   format %{ "cmovpq  $dst, $src\n\t"
 9685             "cmovneq $dst, $src" %}
 9686   ins_encode %{
 9687     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9688     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9689   %}
 9690   ins_pipe(pipe_cmov_reg);
 9691 %}
 9692 
 9693 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9694 %{
 9695   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9696 
 9697   ins_cost(200); // XXX
 9698   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9699   ins_encode %{
 9700     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9701   %}
 9702   ins_pipe(pipe_cmov_mem); // XXX
 9703 %}
 9704 
 9705 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9706   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9707 
 9708   ins_cost(200);
 9709   expand %{
 9710     cmovL_memU(cop, cr, dst, src);
 9711   %}
 9712 %}
 9713 
 9714 instruct cmovL_memUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL dst, memory src) %{
 9715   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9716 
 9717   ins_cost(200); // XXX
 9718   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9719   ins_encode %{
 9720     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9721   %}
 9722   ins_pipe(pipe_cmov_mem); // XXX
 9723 %}
 9724 
 9725 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9726 %{
 9727   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9728 
 9729   ins_cost(200); // XXX
 9730   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9731             "movss     $dst, $src\n"
 9732     "skip:" %}
 9733   ins_encode %{
 9734     Label Lskip;
 9735     // Invert sense of branch from sense of CMOV
 9736     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9737     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9738     __ bind(Lskip);
 9739   %}
 9740   ins_pipe(pipe_slow);
 9741 %}
 9742 
 9743 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9744 %{
 9745   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9746 
 9747   ins_cost(200); // XXX
 9748   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9749             "movss     $dst, $src\n"
 9750     "skip:" %}
 9751   ins_encode %{
 9752     Label Lskip;
 9753     // Invert sense of branch from sense of CMOV
 9754     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9755     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9756     __ bind(Lskip);
 9757   %}
 9758   ins_pipe(pipe_slow);
 9759 %}
 9760 
 9761 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9762   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9763 
 9764   ins_cost(200);
 9765   expand %{
 9766     cmovF_regU(cop, cr, dst, src);
 9767   %}
 9768 %}
 9769 
 9770 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
 9771 %{
 9772   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9773 
 9774   ins_cost(200); // XXX
 9775   format %{ "jn$cop    skip\t# signed, unsigned cmove float\n\t"
 9776             "movss     $dst, $src\n"
 9777     "skip:" %}
 9778   ins_encode %{
 9779     Label Lskip;
 9780     // Invert sense of branch from sense of CMOV
 9781     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9782     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9783     __ bind(Lskip);
 9784   %}
 9785   ins_pipe(pipe_slow);
 9786 %}
 9787 
 9788 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9789 %{
 9790   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9791 
 9792   ins_cost(200); // XXX
 9793   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9794             "movsd     $dst, $src\n"
 9795     "skip:" %}
 9796   ins_encode %{
 9797     Label Lskip;
 9798     // Invert sense of branch from sense of CMOV
 9799     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9800     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9801     __ bind(Lskip);
 9802   %}
 9803   ins_pipe(pipe_slow);
 9804 %}
 9805 
 9806 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9807 %{
 9808   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9809 
 9810   ins_cost(200); // XXX
 9811   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9812             "movsd     $dst, $src\n"
 9813     "skip:" %}
 9814   ins_encode %{
 9815     Label Lskip;
 9816     // Invert sense of branch from sense of CMOV
 9817     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9818     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9819     __ bind(Lskip);
 9820   %}
 9821   ins_pipe(pipe_slow);
 9822 %}
 9823 
 9824 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9825   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9826 
 9827   ins_cost(200);
 9828   expand %{
 9829     cmovD_regU(cop, cr, dst, src);
 9830   %}
 9831 %}
 9832 
 9833 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
 9834 %{
 9835   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9836 
 9837   ins_cost(200); // XXX
 9838   format %{ "jn$cop    skip\t# signed, unsigned cmove double\n\t"
 9839             "movsd     $dst, $src\n"
 9840     "skip:" %}
 9841   ins_encode %{
 9842     Label Lskip;
 9843     // Invert sense of branch from sense of CMOV
 9844     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9845     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9846     __ bind(Lskip);
 9847   %}
 9848   ins_pipe(pipe_slow);
 9849 %}
 9850 
 9851 //----------Arithmetic Instructions--------------------------------------------
 9852 //----------Addition Instructions----------------------------------------------
 9853 
 9854 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9855 %{
 9856   predicate(!UseAPX);
 9857   match(Set dst (AddI dst src));
 9858   effect(KILL cr);
 9859   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9860   format %{ "addl    $dst, $src\t# int" %}
 9861   ins_encode %{
 9862     __ addl($dst$$Register, $src$$Register);
 9863   %}
 9864   ins_pipe(ialu_reg_reg);
 9865 %}
 9866 
 9867 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
 9868 %{
 9869   predicate(UseAPX);
 9870   match(Set dst (AddI src1 src2));
 9871   effect(KILL cr);
 9872   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
 9873 
 9874   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9875   ins_encode %{
 9876     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
 9877   %}
 9878   ins_pipe(ialu_reg_reg);
 9879 %}
 9880 
 9881 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9882 %{
 9883   predicate(!UseAPX);
 9884   match(Set dst (AddI dst src));
 9885   effect(KILL cr);
 9886   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9887 
 9888   format %{ "addl    $dst, $src\t# int" %}
 9889   ins_encode %{
 9890     __ addl($dst$$Register, $src$$constant);
 9891   %}
 9892   ins_pipe( ialu_reg );
 9893 %}
 9894 
 9895 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
 9896 %{
 9897   predicate(UseAPX);
 9898   match(Set dst (AddI src1 src2));
 9899   effect(KILL cr);
 9900   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
 9901 
 9902   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9903   ins_encode %{
 9904     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
 9905   %}
 9906   ins_pipe( ialu_reg );
 9907 %}
 9908 
 9909 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 9910 %{
 9911   match(Set dst (AddI dst (LoadI src)));
 9912   effect(KILL cr);
 9913   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9914 
 9915   ins_cost(150); // XXX
 9916   format %{ "addl    $dst, $src\t# int" %}
 9917   ins_encode %{
 9918     __ addl($dst$$Register, $src$$Address);
 9919   %}
 9920   ins_pipe(ialu_reg_mem);
 9921 %}
 9922 
 9923 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9924 %{
 9925   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 9926   effect(KILL cr);
 9927   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9928 
 9929   ins_cost(150); // XXX
 9930   format %{ "addl    $dst, $src\t# int" %}
 9931   ins_encode %{
 9932     __ addl($dst$$Address, $src$$Register);
 9933   %}
 9934   ins_pipe(ialu_mem_reg);
 9935 %}
 9936 
 9937 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
 9938 %{
 9939   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 9940   effect(KILL cr);
 9941   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9942 
 9943 
 9944   ins_cost(125); // XXX
 9945   format %{ "addl    $dst, $src\t# int" %}
 9946   ins_encode %{
 9947     __ addl($dst$$Address, $src$$constant);
 9948   %}
 9949   ins_pipe(ialu_mem_imm);
 9950 %}
 9951 
 9952 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
 9953 %{
 9954   predicate(!UseAPX && UseIncDec);
 9955   match(Set dst (AddI dst src));
 9956   effect(KILL cr);
 9957 
 9958   format %{ "incl    $dst\t# int" %}
 9959   ins_encode %{
 9960     __ incrementl($dst$$Register);
 9961   %}
 9962   ins_pipe(ialu_reg);
 9963 %}
 9964 
 9965 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
 9966 %{
 9967   predicate(UseAPX && UseIncDec);
 9968   match(Set dst (AddI src val));
 9969   effect(KILL cr);
 9970   flag(PD::Flag_ndd_demotable_opr1);
 9971 
 9972   format %{ "eincl    $dst, $src\t# int ndd" %}
 9973   ins_encode %{
 9974     __ eincl($dst$$Register, $src$$Register, false);
 9975   %}
 9976   ins_pipe(ialu_reg);
 9977 %}
 9978 
 9979 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
 9980 %{
 9981   predicate(UseIncDec);
 9982   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 9983   effect(KILL cr);
 9984 
 9985   ins_cost(125); // XXX
 9986   format %{ "incl    $dst\t# int" %}
 9987   ins_encode %{
 9988     __ incrementl($dst$$Address);
 9989   %}
 9990   ins_pipe(ialu_mem_imm);
 9991 %}
 9992 
 9993 // XXX why does that use AddI
 9994 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
 9995 %{
 9996   predicate(!UseAPX && UseIncDec);
 9997   match(Set dst (AddI dst src));
 9998   effect(KILL cr);
 9999 
10000   format %{ "decl    $dst\t# int" %}
10001   ins_encode %{
10002     __ decrementl($dst$$Register);
10003   %}
10004   ins_pipe(ialu_reg);
10005 %}
10006 
10007 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10008 %{
10009   predicate(UseAPX && UseIncDec);
10010   match(Set dst (AddI src val));
10011   effect(KILL cr);
10012   flag(PD::Flag_ndd_demotable_opr1);
10013 
10014   format %{ "edecl    $dst, $src\t# int ndd" %}
10015   ins_encode %{
10016     __ edecl($dst$$Register, $src$$Register, false);
10017   %}
10018   ins_pipe(ialu_reg);
10019 %}
10020 
10021 // XXX why does that use AddI
10022 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10023 %{
10024   predicate(UseIncDec);
10025   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10026   effect(KILL cr);
10027 
10028   ins_cost(125); // XXX
10029   format %{ "decl    $dst\t# int" %}
10030   ins_encode %{
10031     __ decrementl($dst$$Address);
10032   %}
10033   ins_pipe(ialu_mem_imm);
10034 %}
10035 
10036 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10037 %{
10038   predicate(VM_Version::supports_fast_2op_lea());
10039   match(Set dst (AddI (LShiftI index scale) disp));
10040 
10041   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10042   ins_encode %{
10043     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10044     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10045   %}
10046   ins_pipe(ialu_reg_reg);
10047 %}
10048 
10049 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10050 %{
10051   predicate(VM_Version::supports_fast_3op_lea());
10052   match(Set dst (AddI (AddI base index) disp));
10053 
10054   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10055   ins_encode %{
10056     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10057   %}
10058   ins_pipe(ialu_reg_reg);
10059 %}
10060 
10061 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10062 %{
10063   predicate(VM_Version::supports_fast_2op_lea());
10064   match(Set dst (AddI base (LShiftI index scale)));
10065 
10066   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10067   ins_encode %{
10068     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10069     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10070   %}
10071   ins_pipe(ialu_reg_reg);
10072 %}
10073 
10074 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10075 %{
10076   predicate(VM_Version::supports_fast_3op_lea());
10077   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10078 
10079   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10080   ins_encode %{
10081     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10082     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10083   %}
10084   ins_pipe(ialu_reg_reg);
10085 %}
10086 
10087 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10088 %{
10089   predicate(!UseAPX);
10090   match(Set dst (AddL dst src));
10091   effect(KILL cr);
10092   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10093 
10094   format %{ "addq    $dst, $src\t# long" %}
10095   ins_encode %{
10096     __ addq($dst$$Register, $src$$Register);
10097   %}
10098   ins_pipe(ialu_reg_reg);
10099 %}
10100 
10101 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10102 %{
10103   predicate(UseAPX);
10104   match(Set dst (AddL src1 src2));
10105   effect(KILL cr);
10106   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10107 
10108   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10109   ins_encode %{
10110     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10111   %}
10112   ins_pipe(ialu_reg_reg);
10113 %}
10114 
10115 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10116 %{
10117   predicate(!UseAPX);
10118   match(Set dst (AddL dst src));
10119   effect(KILL cr);
10120   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10121 
10122   format %{ "addq    $dst, $src\t# long" %}
10123   ins_encode %{
10124     __ addq($dst$$Register, $src$$constant);
10125   %}
10126   ins_pipe( ialu_reg );
10127 %}
10128 
10129 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10130 %{
10131   predicate(UseAPX);
10132   match(Set dst (AddL src1 src2));
10133   effect(KILL cr);
10134   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10135 
10136   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10137   ins_encode %{
10138     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10139   %}
10140   ins_pipe( ialu_reg );
10141 %}
10142 
10143 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10144 %{
10145   match(Set dst (AddL dst (LoadL src)));
10146   effect(KILL cr);
10147   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10148 
10149   ins_cost(150); // XXX
10150   format %{ "addq    $dst, $src\t# long" %}
10151   ins_encode %{
10152     __ addq($dst$$Register, $src$$Address);
10153   %}
10154   ins_pipe(ialu_reg_mem);
10155 %}
10156 
10157 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10158 %{
10159   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10160   effect(KILL cr);
10161   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10162 
10163   ins_cost(150); // XXX
10164   format %{ "addq    $dst, $src\t# long" %}
10165   ins_encode %{
10166     __ addq($dst$$Address, $src$$Register);
10167   %}
10168   ins_pipe(ialu_mem_reg);
10169 %}
10170 
10171 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10172 %{
10173   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10174   effect(KILL cr);
10175   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10176 
10177   ins_cost(125); // XXX
10178   format %{ "addq    $dst, $src\t# long" %}
10179   ins_encode %{
10180     __ addq($dst$$Address, $src$$constant);
10181   %}
10182   ins_pipe(ialu_mem_imm);
10183 %}
10184 
10185 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10186 %{
10187   predicate(!UseAPX && UseIncDec);
10188   match(Set dst (AddL dst src));
10189   effect(KILL cr);
10190 
10191   format %{ "incq    $dst\t# long" %}
10192   ins_encode %{
10193     __ incrementq($dst$$Register);
10194   %}
10195   ins_pipe(ialu_reg);
10196 %}
10197 
10198 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10199 %{
10200   predicate(UseAPX && UseIncDec);
10201   match(Set dst (AddL src val));
10202   effect(KILL cr);
10203   flag(PD::Flag_ndd_demotable_opr1);
10204 
10205   format %{ "eincq    $dst, $src\t# long ndd" %}
10206   ins_encode %{
10207     __ eincq($dst$$Register, $src$$Register, false);
10208   %}
10209   ins_pipe(ialu_reg);
10210 %}
10211 
10212 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10213 %{
10214   predicate(UseIncDec);
10215   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10216   effect(KILL cr);
10217 
10218   ins_cost(125); // XXX
10219   format %{ "incq    $dst\t# long" %}
10220   ins_encode %{
10221     __ incrementq($dst$$Address);
10222   %}
10223   ins_pipe(ialu_mem_imm);
10224 %}
10225 
10226 // XXX why does that use AddL
10227 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10228 %{
10229   predicate(!UseAPX && UseIncDec);
10230   match(Set dst (AddL dst src));
10231   effect(KILL cr);
10232 
10233   format %{ "decq    $dst\t# long" %}
10234   ins_encode %{
10235     __ decrementq($dst$$Register);
10236   %}
10237   ins_pipe(ialu_reg);
10238 %}
10239 
10240 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10241 %{
10242   predicate(UseAPX && UseIncDec);
10243   match(Set dst (AddL src val));
10244   effect(KILL cr);
10245   flag(PD::Flag_ndd_demotable_opr1);
10246 
10247   format %{ "edecq    $dst, $src\t# long ndd" %}
10248   ins_encode %{
10249     __ edecq($dst$$Register, $src$$Register, false);
10250   %}
10251   ins_pipe(ialu_reg);
10252 %}
10253 
10254 // XXX why does that use AddL
10255 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10256 %{
10257   predicate(UseIncDec);
10258   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10259   effect(KILL cr);
10260 
10261   ins_cost(125); // XXX
10262   format %{ "decq    $dst\t# long" %}
10263   ins_encode %{
10264     __ decrementq($dst$$Address);
10265   %}
10266   ins_pipe(ialu_mem_imm);
10267 %}
10268 
10269 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10270 %{
10271   predicate(VM_Version::supports_fast_2op_lea());
10272   match(Set dst (AddL (LShiftL index scale) disp));
10273 
10274   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10275   ins_encode %{
10276     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10277     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10278   %}
10279   ins_pipe(ialu_reg_reg);
10280 %}
10281 
10282 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10283 %{
10284   predicate(VM_Version::supports_fast_3op_lea());
10285   match(Set dst (AddL (AddL base index) disp));
10286 
10287   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10288   ins_encode %{
10289     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10290   %}
10291   ins_pipe(ialu_reg_reg);
10292 %}
10293 
10294 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10295 %{
10296   predicate(VM_Version::supports_fast_2op_lea());
10297   match(Set dst (AddL base (LShiftL index scale)));
10298 
10299   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10300   ins_encode %{
10301     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10302     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10303   %}
10304   ins_pipe(ialu_reg_reg);
10305 %}
10306 
10307 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10308 %{
10309   predicate(VM_Version::supports_fast_3op_lea());
10310   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10311 
10312   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10313   ins_encode %{
10314     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10315     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10316   %}
10317   ins_pipe(ialu_reg_reg);
10318 %}
10319 
10320 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10321 %{
10322   match(Set dst (AddP dst src));
10323   effect(KILL cr);
10324   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10325 
10326   format %{ "addq    $dst, $src\t# ptr" %}
10327   ins_encode %{
10328     __ addq($dst$$Register, $src$$Register);
10329   %}
10330   ins_pipe(ialu_reg_reg);
10331 %}
10332 
10333 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10334 %{
10335   match(Set dst (AddP dst src));
10336   effect(KILL cr);
10337   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10338 
10339   format %{ "addq    $dst, $src\t# ptr" %}
10340   ins_encode %{
10341     __ addq($dst$$Register, $src$$constant);
10342   %}
10343   ins_pipe( ialu_reg );
10344 %}
10345 
10346 // XXX addP mem ops ????
10347 
10348 instruct checkCastPP(rRegP dst)
10349 %{
10350   match(Set dst (CheckCastPP dst));
10351 
10352   size(0);
10353   format %{ "# checkcastPP of $dst" %}
10354   ins_encode(/* empty encoding */);
10355   ins_pipe(empty);
10356 %}
10357 
10358 instruct castPP(rRegP dst)
10359 %{
10360   match(Set dst (CastPP dst));
10361 
10362   size(0);
10363   format %{ "# castPP of $dst" %}
10364   ins_encode(/* empty encoding */);
10365   ins_pipe(empty);
10366 %}
10367 
10368 instruct castII(rRegI dst)
10369 %{
10370   predicate(VerifyConstraintCasts == 0);
10371   match(Set dst (CastII dst));
10372 
10373   size(0);
10374   format %{ "# castII of $dst" %}
10375   ins_encode(/* empty encoding */);
10376   ins_cost(0);
10377   ins_pipe(empty);
10378 %}
10379 
10380 instruct castII_checked(rRegI dst, rFlagsReg cr)
10381 %{
10382   predicate(VerifyConstraintCasts > 0);
10383   match(Set dst (CastII dst));
10384 
10385   effect(KILL cr);
10386   format %{ "# cast_checked_II $dst" %}
10387   ins_encode %{
10388     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10389   %}
10390   ins_pipe(pipe_slow);
10391 %}
10392 
10393 instruct castLL(rRegL dst)
10394 %{
10395   predicate(VerifyConstraintCasts == 0);
10396   match(Set dst (CastLL dst));
10397 
10398   size(0);
10399   format %{ "# castLL of $dst" %}
10400   ins_encode(/* empty encoding */);
10401   ins_cost(0);
10402   ins_pipe(empty);
10403 %}
10404 
10405 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10406 %{
10407   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10408   match(Set dst (CastLL dst));
10409 
10410   effect(KILL cr);
10411   format %{ "# cast_checked_LL $dst" %}
10412   ins_encode %{
10413     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10414   %}
10415   ins_pipe(pipe_slow);
10416 %}
10417 
10418 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10419 %{
10420   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10421   match(Set dst (CastLL dst));
10422 
10423   effect(KILL cr, TEMP tmp);
10424   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10425   ins_encode %{
10426     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10427   %}
10428   ins_pipe(pipe_slow);
10429 %}
10430 
10431 instruct castFF(regF dst)
10432 %{
10433   match(Set dst (CastFF dst));
10434 
10435   size(0);
10436   format %{ "# castFF of $dst" %}
10437   ins_encode(/* empty encoding */);
10438   ins_cost(0);
10439   ins_pipe(empty);
10440 %}
10441 
10442 instruct castHH(regF dst)
10443 %{
10444   match(Set dst (CastHH dst));
10445 
10446   size(0);
10447   format %{ "# castHH of $dst" %}
10448   ins_encode(/* empty encoding */);
10449   ins_cost(0);
10450   ins_pipe(empty);
10451 %}
10452 
10453 instruct castDD(regD dst)
10454 %{
10455   match(Set dst (CastDD dst));
10456 
10457   size(0);
10458   format %{ "# castDD of $dst" %}
10459   ins_encode(/* empty encoding */);
10460   ins_cost(0);
10461   ins_pipe(empty);
10462 %}
10463 
10464 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10465 instruct compareAndSwapP(rRegI res,
10466                          memory mem_ptr,
10467                          rax_RegP oldval, rRegP newval,
10468                          rFlagsReg cr)
10469 %{
10470   predicate(n->as_LoadStore()->barrier_data() == 0);
10471   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10472   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10473   effect(KILL cr, KILL oldval);
10474 
10475   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10476             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10477             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10478   ins_encode %{
10479     __ lock();
10480     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10481     __ setcc(Assembler::equal, $res$$Register);
10482   %}
10483   ins_pipe( pipe_cmpxchg );
10484 %}
10485 
10486 instruct compareAndSwapL(rRegI res,
10487                          memory mem_ptr,
10488                          rax_RegL oldval, rRegL newval,
10489                          rFlagsReg cr)
10490 %{
10491   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10492   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10493   effect(KILL cr, KILL oldval);
10494 
10495   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10496             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10497             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10498   ins_encode %{
10499     __ lock();
10500     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10501     __ setcc(Assembler::equal, $res$$Register);
10502   %}
10503   ins_pipe( pipe_cmpxchg );
10504 %}
10505 
10506 instruct compareAndSwapI(rRegI res,
10507                          memory mem_ptr,
10508                          rax_RegI oldval, rRegI newval,
10509                          rFlagsReg cr)
10510 %{
10511   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10512   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10513   effect(KILL cr, KILL oldval);
10514 
10515   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10516             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10517             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10518   ins_encode %{
10519     __ lock();
10520     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10521     __ setcc(Assembler::equal, $res$$Register);
10522   %}
10523   ins_pipe( pipe_cmpxchg );
10524 %}
10525 
10526 instruct compareAndSwapB(rRegI res,
10527                          memory mem_ptr,
10528                          rax_RegI oldval, rRegI newval,
10529                          rFlagsReg cr)
10530 %{
10531   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10532   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10533   effect(KILL cr, KILL oldval);
10534 
10535   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10536             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10537             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10538   ins_encode %{
10539     __ lock();
10540     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10541     __ setcc(Assembler::equal, $res$$Register);
10542   %}
10543   ins_pipe( pipe_cmpxchg );
10544 %}
10545 
10546 instruct compareAndSwapS(rRegI res,
10547                          memory mem_ptr,
10548                          rax_RegI oldval, rRegI newval,
10549                          rFlagsReg cr)
10550 %{
10551   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10552   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10553   effect(KILL cr, KILL oldval);
10554 
10555   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10556             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10557             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10558   ins_encode %{
10559     __ lock();
10560     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10561     __ setcc(Assembler::equal, $res$$Register);
10562   %}
10563   ins_pipe( pipe_cmpxchg );
10564 %}
10565 
10566 instruct compareAndSwapN(rRegI res,
10567                           memory mem_ptr,
10568                           rax_RegN oldval, rRegN newval,
10569                           rFlagsReg cr) %{
10570   predicate(n->as_LoadStore()->barrier_data() == 0);
10571   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10572   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10573   effect(KILL cr, KILL oldval);
10574 
10575   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10576             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10577             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10578   ins_encode %{
10579     __ lock();
10580     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10581     __ setcc(Assembler::equal, $res$$Register);
10582   %}
10583   ins_pipe( pipe_cmpxchg );
10584 %}
10585 
10586 instruct compareAndExchangeB(
10587                          memory mem_ptr,
10588                          rax_RegI oldval, rRegI newval,
10589                          rFlagsReg cr)
10590 %{
10591   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10592   effect(KILL cr);
10593 
10594   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10595             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10596   ins_encode %{
10597     __ lock();
10598     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10599   %}
10600   ins_pipe( pipe_cmpxchg );
10601 %}
10602 
10603 instruct compareAndExchangeS(
10604                          memory mem_ptr,
10605                          rax_RegI oldval, rRegI newval,
10606                          rFlagsReg cr)
10607 %{
10608   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10609   effect(KILL cr);
10610 
10611   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10612             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10613   ins_encode %{
10614     __ lock();
10615     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10616   %}
10617   ins_pipe( pipe_cmpxchg );
10618 %}
10619 
10620 instruct compareAndExchangeI(
10621                          memory mem_ptr,
10622                          rax_RegI oldval, rRegI newval,
10623                          rFlagsReg cr)
10624 %{
10625   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10626   effect(KILL cr);
10627 
10628   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10629             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10630   ins_encode %{
10631     __ lock();
10632     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10633   %}
10634   ins_pipe( pipe_cmpxchg );
10635 %}
10636 
10637 instruct compareAndExchangeL(
10638                          memory mem_ptr,
10639                          rax_RegL oldval, rRegL newval,
10640                          rFlagsReg cr)
10641 %{
10642   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10643   effect(KILL cr);
10644 
10645   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10646             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10647   ins_encode %{
10648     __ lock();
10649     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10650   %}
10651   ins_pipe( pipe_cmpxchg );
10652 %}
10653 
10654 instruct compareAndExchangeN(
10655                           memory mem_ptr,
10656                           rax_RegN oldval, rRegN newval,
10657                           rFlagsReg cr) %{
10658   predicate(n->as_LoadStore()->barrier_data() == 0);
10659   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10660   effect(KILL cr);
10661 
10662   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10663             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10664   ins_encode %{
10665     __ lock();
10666     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10667   %}
10668   ins_pipe( pipe_cmpxchg );
10669 %}
10670 
10671 instruct compareAndExchangeP(
10672                          memory mem_ptr,
10673                          rax_RegP oldval, rRegP newval,
10674                          rFlagsReg cr)
10675 %{
10676   predicate(n->as_LoadStore()->barrier_data() == 0);
10677   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10678   effect(KILL cr);
10679 
10680   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10681             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10682   ins_encode %{
10683     __ lock();
10684     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10685   %}
10686   ins_pipe( pipe_cmpxchg );
10687 %}
10688 
10689 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10690   predicate(n->as_LoadStore()->result_not_used());
10691   match(Set dummy (GetAndAddB mem add));
10692   effect(KILL cr);
10693   format %{ "addb_lock   $mem, $add" %}
10694   ins_encode %{
10695     __ lock();
10696     __ addb($mem$$Address, $add$$Register);
10697   %}
10698   ins_pipe(pipe_cmpxchg);
10699 %}
10700 
10701 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10702   predicate(n->as_LoadStore()->result_not_used());
10703   match(Set dummy (GetAndAddB mem add));
10704   effect(KILL cr);
10705   format %{ "addb_lock   $mem, $add" %}
10706   ins_encode %{
10707     __ lock();
10708     __ addb($mem$$Address, $add$$constant);
10709   %}
10710   ins_pipe(pipe_cmpxchg);
10711 %}
10712 
10713 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10714   predicate(!n->as_LoadStore()->result_not_used());
10715   match(Set newval (GetAndAddB mem newval));
10716   effect(KILL cr);
10717   format %{ "xaddb_lock  $mem, $newval\t# $newval -> byte" %}
10718   ins_encode %{
10719     __ lock();
10720     __ xaddb($mem$$Address, $newval$$Register);
10721     __ narrow_subword_type($newval$$Register, T_BYTE);
10722   %}
10723   ins_pipe(pipe_cmpxchg);
10724 %}
10725 
10726 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10727   predicate(n->as_LoadStore()->result_not_used());
10728   match(Set dummy (GetAndAddS mem add));
10729   effect(KILL cr);
10730   format %{ "addw_lock   $mem, $add" %}
10731   ins_encode %{
10732     __ lock();
10733     __ addw($mem$$Address, $add$$Register);
10734   %}
10735   ins_pipe(pipe_cmpxchg);
10736 %}
10737 
10738 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10739   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10740   match(Set dummy (GetAndAddS mem add));
10741   effect(KILL cr);
10742   format %{ "addw_lock   $mem, $add" %}
10743   ins_encode %{
10744     __ lock();
10745     __ addw($mem$$Address, $add$$constant);
10746   %}
10747   ins_pipe(pipe_cmpxchg);
10748 %}
10749 
10750 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10751   predicate(!n->as_LoadStore()->result_not_used());
10752   match(Set newval (GetAndAddS mem newval));
10753   effect(KILL cr);
10754   format %{ "xaddw_lock  $mem, $newval\t# $newval -> short" %}
10755   ins_encode %{
10756     __ lock();
10757     __ xaddw($mem$$Address, $newval$$Register);
10758     __ narrow_subword_type($newval$$Register, T_SHORT);
10759   %}
10760   ins_pipe(pipe_cmpxchg);
10761 %}
10762 
10763 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10764   predicate(n->as_LoadStore()->result_not_used());
10765   match(Set dummy (GetAndAddI mem add));
10766   effect(KILL cr);
10767   format %{ "addl_lock   $mem, $add" %}
10768   ins_encode %{
10769     __ lock();
10770     __ addl($mem$$Address, $add$$Register);
10771   %}
10772   ins_pipe(pipe_cmpxchg);
10773 %}
10774 
10775 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10776   predicate(n->as_LoadStore()->result_not_used());
10777   match(Set dummy (GetAndAddI mem add));
10778   effect(KILL cr);
10779   format %{ "addl_lock   $mem, $add" %}
10780   ins_encode %{
10781     __ lock();
10782     __ addl($mem$$Address, $add$$constant);
10783   %}
10784   ins_pipe(pipe_cmpxchg);
10785 %}
10786 
10787 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10788   predicate(!n->as_LoadStore()->result_not_used());
10789   match(Set newval (GetAndAddI mem newval));
10790   effect(KILL cr);
10791   format %{ "xaddl_lock  $mem, $newval" %}
10792   ins_encode %{
10793     __ lock();
10794     __ xaddl($mem$$Address, $newval$$Register);
10795   %}
10796   ins_pipe(pipe_cmpxchg);
10797 %}
10798 
10799 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10800   predicate(n->as_LoadStore()->result_not_used());
10801   match(Set dummy (GetAndAddL mem add));
10802   effect(KILL cr);
10803   format %{ "addq_lock   $mem, $add" %}
10804   ins_encode %{
10805     __ lock();
10806     __ addq($mem$$Address, $add$$Register);
10807   %}
10808   ins_pipe(pipe_cmpxchg);
10809 %}
10810 
10811 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10812   predicate(n->as_LoadStore()->result_not_used());
10813   match(Set dummy (GetAndAddL mem add));
10814   effect(KILL cr);
10815   format %{ "addq_lock   $mem, $add" %}
10816   ins_encode %{
10817     __ lock();
10818     __ addq($mem$$Address, $add$$constant);
10819   %}
10820   ins_pipe(pipe_cmpxchg);
10821 %}
10822 
10823 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10824   predicate(!n->as_LoadStore()->result_not_used());
10825   match(Set newval (GetAndAddL mem newval));
10826   effect(KILL cr);
10827   format %{ "xaddq_lock  $mem, $newval" %}
10828   ins_encode %{
10829     __ lock();
10830     __ xaddq($mem$$Address, $newval$$Register);
10831   %}
10832   ins_pipe(pipe_cmpxchg);
10833 %}
10834 
10835 instruct xchgB( memory mem, rRegI newval) %{
10836   match(Set newval (GetAndSetB mem newval));
10837   format %{ "XCHGB  $newval,[$mem]\t# $newval -> byte" %}
10838   ins_encode %{
10839     __ xchgb($newval$$Register, $mem$$Address);
10840     __ narrow_subword_type($newval$$Register, T_BYTE);
10841   %}
10842   ins_pipe( pipe_cmpxchg );
10843 %}
10844 
10845 instruct xchgS( memory mem, rRegI newval) %{
10846   match(Set newval (GetAndSetS mem newval));
10847   format %{ "XCHGW  $newval,[$mem]\t# $newval -> short" %}
10848   ins_encode %{
10849     __ xchgw($newval$$Register, $mem$$Address);
10850     __ narrow_subword_type($newval$$Register, T_SHORT);
10851   %}
10852   ins_pipe( pipe_cmpxchg );
10853 %}
10854 
10855 instruct xchgI( memory mem, rRegI newval) %{
10856   match(Set newval (GetAndSetI mem newval));
10857   format %{ "XCHGL  $newval,[$mem]" %}
10858   ins_encode %{
10859     __ xchgl($newval$$Register, $mem$$Address);
10860   %}
10861   ins_pipe( pipe_cmpxchg );
10862 %}
10863 
10864 instruct xchgL( memory mem, rRegL newval) %{
10865   match(Set newval (GetAndSetL mem newval));
10866   format %{ "XCHGL  $newval,[$mem]" %}
10867   ins_encode %{
10868     __ xchgq($newval$$Register, $mem$$Address);
10869   %}
10870   ins_pipe( pipe_cmpxchg );
10871 %}
10872 
10873 instruct xchgP( memory mem, rRegP newval) %{
10874   match(Set newval (GetAndSetP mem newval));
10875   predicate(n->as_LoadStore()->barrier_data() == 0);
10876   format %{ "XCHGQ  $newval,[$mem]" %}
10877   ins_encode %{
10878     __ xchgq($newval$$Register, $mem$$Address);
10879   %}
10880   ins_pipe( pipe_cmpxchg );
10881 %}
10882 
10883 instruct xchgN( memory mem, rRegN newval) %{
10884   predicate(n->as_LoadStore()->barrier_data() == 0);
10885   match(Set newval (GetAndSetN mem newval));
10886   format %{ "XCHGL  $newval,$mem]" %}
10887   ins_encode %{
10888     __ xchgl($newval$$Register, $mem$$Address);
10889   %}
10890   ins_pipe( pipe_cmpxchg );
10891 %}
10892 
10893 //----------Abs Instructions-------------------------------------------
10894 
10895 // Integer Absolute Instructions
10896 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10897 %{
10898   match(Set dst (AbsI src));
10899   effect(TEMP dst, KILL cr);
10900   format %{ "xorl    $dst, $dst\t# abs int\n\t"
10901             "subl    $dst, $src\n\t"
10902             "cmovll  $dst, $src" %}
10903   ins_encode %{
10904     __ xorl($dst$$Register, $dst$$Register);
10905     __ subl($dst$$Register, $src$$Register);
10906     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
10907   %}
10908 
10909   ins_pipe(ialu_reg_reg);
10910 %}
10911 
10912 // Long Absolute Instructions
10913 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10914 %{
10915   match(Set dst (AbsL src));
10916   effect(TEMP dst, KILL cr);
10917   format %{ "xorl    $dst, $dst\t# abs long\n\t"
10918             "subq    $dst, $src\n\t"
10919             "cmovlq  $dst, $src" %}
10920   ins_encode %{
10921     __ xorl($dst$$Register, $dst$$Register);
10922     __ subq($dst$$Register, $src$$Register);
10923     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
10924   %}
10925 
10926   ins_pipe(ialu_reg_reg);
10927 %}
10928 
10929 //----------Subtraction Instructions-------------------------------------------
10930 
10931 // Integer Subtraction Instructions
10932 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10933 %{
10934   predicate(!UseAPX);
10935   match(Set dst (SubI dst src));
10936   effect(KILL cr);
10937   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10938 
10939   format %{ "subl    $dst, $src\t# int" %}
10940   ins_encode %{
10941     __ subl($dst$$Register, $src$$Register);
10942   %}
10943   ins_pipe(ialu_reg_reg);
10944 %}
10945 
10946 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
10947 %{
10948   predicate(UseAPX);
10949   match(Set dst (SubI src1 src2));
10950   effect(KILL cr);
10951   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10952 
10953   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
10954   ins_encode %{
10955     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
10956   %}
10957   ins_pipe(ialu_reg_reg);
10958 %}
10959 
10960 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
10961 %{
10962   predicate(UseAPX);
10963   match(Set dst (SubI src1 src2));
10964   effect(KILL cr);
10965   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10966 
10967   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
10968   ins_encode %{
10969     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
10970   %}
10971   ins_pipe(ialu_reg_reg);
10972 %}
10973 
10974 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10975 %{
10976   match(Set dst (SubI dst (LoadI src)));
10977   effect(KILL cr);
10978   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10979 
10980   ins_cost(150);
10981   format %{ "subl    $dst, $src\t# int" %}
10982   ins_encode %{
10983     __ subl($dst$$Register, $src$$Address);
10984   %}
10985   ins_pipe(ialu_reg_mem);
10986 %}
10987 
10988 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10989 %{
10990   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
10991   effect(KILL cr);
10992   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10993 
10994   ins_cost(150);
10995   format %{ "subl    $dst, $src\t# int" %}
10996   ins_encode %{
10997     __ subl($dst$$Address, $src$$Register);
10998   %}
10999   ins_pipe(ialu_mem_reg);
11000 %}
11001 
11002 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11003 %{
11004   predicate(!UseAPX);
11005   match(Set dst (SubL dst src));
11006   effect(KILL cr);
11007   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11008 
11009   format %{ "subq    $dst, $src\t# long" %}
11010   ins_encode %{
11011     __ subq($dst$$Register, $src$$Register);
11012   %}
11013   ins_pipe(ialu_reg_reg);
11014 %}
11015 
11016 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11017 %{
11018   predicate(UseAPX);
11019   match(Set dst (SubL src1 src2));
11020   effect(KILL cr);
11021   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11022 
11023   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11024   ins_encode %{
11025     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11026   %}
11027   ins_pipe(ialu_reg_reg);
11028 %}
11029 
11030 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11031 %{
11032   predicate(UseAPX);
11033   match(Set dst (SubL src1 src2));
11034   effect(KILL cr);
11035   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11036 
11037   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11038   ins_encode %{
11039     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11040   %}
11041   ins_pipe(ialu_reg_reg);
11042 %}
11043 
11044 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11045 %{
11046   match(Set dst (SubL dst (LoadL src)));
11047   effect(KILL cr);
11048   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11049 
11050   ins_cost(150);
11051   format %{ "subq    $dst, $src\t# long" %}
11052   ins_encode %{
11053     __ subq($dst$$Register, $src$$Address);
11054   %}
11055   ins_pipe(ialu_reg_mem);
11056 %}
11057 
11058 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11059 %{
11060   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11061   effect(KILL cr);
11062   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11063 
11064   ins_cost(150);
11065   format %{ "subq    $dst, $src\t# long" %}
11066   ins_encode %{
11067     __ subq($dst$$Address, $src$$Register);
11068   %}
11069   ins_pipe(ialu_mem_reg);
11070 %}
11071 
11072 // Subtract from a pointer
11073 // XXX hmpf???
11074 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11075 %{
11076   match(Set dst (AddP dst (SubI zero src)));
11077   effect(KILL cr);
11078 
11079   format %{ "subq    $dst, $src\t# ptr - int" %}
11080   ins_encode %{
11081     __ subq($dst$$Register, $src$$Register);
11082   %}
11083   ins_pipe(ialu_reg_reg);
11084 %}
11085 
11086 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11087 %{
11088   predicate(!UseAPX);
11089   match(Set dst (SubI zero dst));
11090   effect(KILL cr);
11091   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11092 
11093   format %{ "negl    $dst\t# int" %}
11094   ins_encode %{
11095     __ negl($dst$$Register);
11096   %}
11097   ins_pipe(ialu_reg);
11098 %}
11099 
11100 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11101 %{
11102   predicate(UseAPX);
11103   match(Set dst (SubI zero src));
11104   effect(KILL cr);
11105   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11106 
11107   format %{ "enegl    $dst, $src\t# int ndd" %}
11108   ins_encode %{
11109     __ enegl($dst$$Register, $src$$Register, false);
11110   %}
11111   ins_pipe(ialu_reg);
11112 %}
11113 
11114 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11115 %{
11116   predicate(!UseAPX);
11117   match(Set dst (NegI dst));
11118   effect(KILL cr);
11119   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11120 
11121   format %{ "negl    $dst\t# int" %}
11122   ins_encode %{
11123     __ negl($dst$$Register);
11124   %}
11125   ins_pipe(ialu_reg);
11126 %}
11127 
11128 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11129 %{
11130   predicate(UseAPX);
11131   match(Set dst (NegI src));
11132   effect(KILL cr);
11133   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11134 
11135   format %{ "enegl    $dst, $src\t# int ndd" %}
11136   ins_encode %{
11137     __ enegl($dst$$Register, $src$$Register, false);
11138   %}
11139   ins_pipe(ialu_reg);
11140 %}
11141 
11142 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11143 %{
11144   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11145   effect(KILL cr);
11146   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11147 
11148   format %{ "negl    $dst\t# int" %}
11149   ins_encode %{
11150     __ negl($dst$$Address);
11151   %}
11152   ins_pipe(ialu_reg);
11153 %}
11154 
11155 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11156 %{
11157   predicate(!UseAPX);
11158   match(Set dst (SubL zero dst));
11159   effect(KILL cr);
11160   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11161 
11162   format %{ "negq    $dst\t# long" %}
11163   ins_encode %{
11164     __ negq($dst$$Register);
11165   %}
11166   ins_pipe(ialu_reg);
11167 %}
11168 
11169 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11170 %{
11171   predicate(UseAPX);
11172   match(Set dst (SubL zero src));
11173   effect(KILL cr);
11174   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11175 
11176   format %{ "enegq    $dst, $src\t# long ndd" %}
11177   ins_encode %{
11178     __ enegq($dst$$Register, $src$$Register, false);
11179   %}
11180   ins_pipe(ialu_reg);
11181 %}
11182 
11183 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11184 %{
11185   predicate(!UseAPX);
11186   match(Set dst (NegL dst));
11187   effect(KILL cr);
11188   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11189 
11190   format %{ "negq    $dst\t# int" %}
11191   ins_encode %{
11192     __ negq($dst$$Register);
11193   %}
11194   ins_pipe(ialu_reg);
11195 %}
11196 
11197 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11198 %{
11199   predicate(UseAPX);
11200   match(Set dst (NegL src));
11201   effect(KILL cr);
11202   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11203 
11204   format %{ "enegq    $dst, $src\t# long ndd" %}
11205   ins_encode %{
11206     __ enegq($dst$$Register, $src$$Register, false);
11207   %}
11208   ins_pipe(ialu_reg);
11209 %}
11210 
11211 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11212 %{
11213   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11214   effect(KILL cr);
11215   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11216 
11217   format %{ "negq    $dst\t# long" %}
11218   ins_encode %{
11219     __ negq($dst$$Address);
11220   %}
11221   ins_pipe(ialu_reg);
11222 %}
11223 
11224 //----------Multiplication/Division Instructions-------------------------------
11225 // Integer Multiplication Instructions
11226 // Multiply Register
11227 
11228 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11229 %{
11230   predicate(!UseAPX);
11231   match(Set dst (MulI dst src));
11232   effect(KILL cr);
11233 
11234   ins_cost(300);
11235   format %{ "imull   $dst, $src\t# int" %}
11236   ins_encode %{
11237     __ imull($dst$$Register, $src$$Register);
11238   %}
11239   ins_pipe(ialu_reg_reg_alu0);
11240 %}
11241 
11242 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11243 %{
11244   predicate(UseAPX);
11245   match(Set dst (MulI src1 src2));
11246   effect(KILL cr);
11247   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11248 
11249   ins_cost(300);
11250   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11251   ins_encode %{
11252     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11253   %}
11254   ins_pipe(ialu_reg_reg_alu0);
11255 %}
11256 
11257 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11258 %{
11259   match(Set dst (MulI src imm));
11260   effect(KILL cr);
11261 
11262   ins_cost(300);
11263   format %{ "imull   $dst, $src, $imm\t# int" %}
11264   ins_encode %{
11265     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11266   %}
11267   ins_pipe(ialu_reg_reg_alu0);
11268 %}
11269 
11270 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11271 %{
11272   match(Set dst (MulI dst (LoadI src)));
11273   effect(KILL cr);
11274 
11275   ins_cost(350);
11276   format %{ "imull   $dst, $src\t# int" %}
11277   ins_encode %{
11278     __ imull($dst$$Register, $src$$Address);
11279   %}
11280   ins_pipe(ialu_reg_mem_alu0);
11281 %}
11282 
11283 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11284 %{
11285   match(Set dst (MulI (LoadI src) imm));
11286   effect(KILL cr);
11287 
11288   ins_cost(300);
11289   format %{ "imull   $dst, $src, $imm\t# int" %}
11290   ins_encode %{
11291     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11292   %}
11293   ins_pipe(ialu_reg_mem_alu0);
11294 %}
11295 
11296 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11297 %{
11298   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11299   effect(KILL cr, KILL src2);
11300 
11301   expand %{ mulI_rReg(dst, src1, cr);
11302            mulI_rReg(src2, src3, cr);
11303            addI_rReg(dst, src2, cr); %}
11304 %}
11305 
11306 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11307 %{
11308   predicate(!UseAPX);
11309   match(Set dst (MulL dst src));
11310   effect(KILL cr);
11311 
11312   ins_cost(300);
11313   format %{ "imulq   $dst, $src\t# long" %}
11314   ins_encode %{
11315     __ imulq($dst$$Register, $src$$Register);
11316   %}
11317   ins_pipe(ialu_reg_reg_alu0);
11318 %}
11319 
11320 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11321 %{
11322   predicate(UseAPX);
11323   match(Set dst (MulL src1 src2));
11324   effect(KILL cr);
11325   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11326 
11327   ins_cost(300);
11328   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11329   ins_encode %{
11330     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11331   %}
11332   ins_pipe(ialu_reg_reg_alu0);
11333 %}
11334 
11335 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11336 %{
11337   match(Set dst (MulL src imm));
11338   effect(KILL cr);
11339 
11340   ins_cost(300);
11341   format %{ "imulq   $dst, $src, $imm\t# long" %}
11342   ins_encode %{
11343     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11344   %}
11345   ins_pipe(ialu_reg_reg_alu0);
11346 %}
11347 
11348 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11349 %{
11350   match(Set dst (MulL dst (LoadL src)));
11351   effect(KILL cr);
11352 
11353   ins_cost(350);
11354   format %{ "imulq   $dst, $src\t# long" %}
11355   ins_encode %{
11356     __ imulq($dst$$Register, $src$$Address);
11357   %}
11358   ins_pipe(ialu_reg_mem_alu0);
11359 %}
11360 
11361 
11362 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11363 %{
11364   match(Set dst (MulL (LoadL src) imm));
11365   effect(KILL cr);
11366 
11367   ins_cost(300);
11368   format %{ "imulq   $dst, $src, $imm\t# long" %}
11369   ins_encode %{
11370     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11371   %}
11372   ins_pipe(ialu_reg_mem_alu0);
11373 %}
11374 
11375 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11376 %{
11377   match(Set dst (MulHiL src rax));
11378   effect(USE_KILL rax, KILL cr);
11379 
11380   ins_cost(300);
11381   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11382   ins_encode %{
11383     __ imulq($src$$Register);
11384   %}
11385   ins_pipe(ialu_reg_reg_alu0);
11386 %}
11387 
11388 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11389 %{
11390   match(Set dst (UMulHiL src rax));
11391   effect(USE_KILL rax, KILL cr);
11392 
11393   ins_cost(300);
11394   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11395   ins_encode %{
11396     __ mulq($src$$Register);
11397   %}
11398   ins_pipe(ialu_reg_reg_alu0);
11399 %}
11400 
11401 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11402                    rFlagsReg cr)
11403 %{
11404   match(Set rax (DivI rax div));
11405   effect(KILL rdx, KILL cr);
11406 
11407   ins_cost(30*100+10*100); // XXX
11408   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11409             "jne,s   normal\n\t"
11410             "xorl    rdx, rdx\n\t"
11411             "cmpl    $div, -1\n\t"
11412             "je,s    done\n"
11413     "normal: cdql\n\t"
11414             "idivl   $div\n"
11415     "done:"        %}
11416   ins_encode(cdql_enc(div));
11417   ins_pipe(ialu_reg_reg_alu0);
11418 %}
11419 
11420 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11421                    rFlagsReg cr)
11422 %{
11423   match(Set rax (DivL rax div));
11424   effect(KILL rdx, KILL cr);
11425 
11426   ins_cost(30*100+10*100); // XXX
11427   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11428             "cmpq    rax, rdx\n\t"
11429             "jne,s   normal\n\t"
11430             "xorl    rdx, rdx\n\t"
11431             "cmpq    $div, -1\n\t"
11432             "je,s    done\n"
11433     "normal: cdqq\n\t"
11434             "idivq   $div\n"
11435     "done:"        %}
11436   ins_encode(cdqq_enc(div));
11437   ins_pipe(ialu_reg_reg_alu0);
11438 %}
11439 
11440 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11441 %{
11442   match(Set rax (UDivI rax div));
11443   effect(KILL rdx, KILL cr);
11444 
11445   ins_cost(300);
11446   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11447   ins_encode %{
11448     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11449   %}
11450   ins_pipe(ialu_reg_reg_alu0);
11451 %}
11452 
11453 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11454 %{
11455   match(Set rax (UDivL rax div));
11456   effect(KILL rdx, KILL cr);
11457 
11458   ins_cost(300);
11459   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11460   ins_encode %{
11461      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11462   %}
11463   ins_pipe(ialu_reg_reg_alu0);
11464 %}
11465 
11466 // Integer DIVMOD with Register, both quotient and mod results
11467 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11468                              rFlagsReg cr)
11469 %{
11470   match(DivModI rax div);
11471   effect(KILL cr);
11472 
11473   ins_cost(30*100+10*100); // XXX
11474   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11475             "jne,s   normal\n\t"
11476             "xorl    rdx, rdx\n\t"
11477             "cmpl    $div, -1\n\t"
11478             "je,s    done\n"
11479     "normal: cdql\n\t"
11480             "idivl   $div\n"
11481     "done:"        %}
11482   ins_encode(cdql_enc(div));
11483   ins_pipe(pipe_slow);
11484 %}
11485 
11486 // Long DIVMOD with Register, both quotient and mod results
11487 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11488                              rFlagsReg cr)
11489 %{
11490   match(DivModL rax div);
11491   effect(KILL cr);
11492 
11493   ins_cost(30*100+10*100); // XXX
11494   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11495             "cmpq    rax, rdx\n\t"
11496             "jne,s   normal\n\t"
11497             "xorl    rdx, rdx\n\t"
11498             "cmpq    $div, -1\n\t"
11499             "je,s    done\n"
11500     "normal: cdqq\n\t"
11501             "idivq   $div\n"
11502     "done:"        %}
11503   ins_encode(cdqq_enc(div));
11504   ins_pipe(pipe_slow);
11505 %}
11506 
11507 // Unsigned integer DIVMOD with Register, both quotient and mod results
11508 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11509                               no_rax_rdx_RegI div, rFlagsReg cr)
11510 %{
11511   match(UDivModI rax div);
11512   effect(TEMP tmp, KILL cr);
11513 
11514   ins_cost(300);
11515   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11516             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11517           %}
11518   ins_encode %{
11519     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11520   %}
11521   ins_pipe(pipe_slow);
11522 %}
11523 
11524 // Unsigned long DIVMOD with Register, both quotient and mod results
11525 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11526                               no_rax_rdx_RegL div, rFlagsReg cr)
11527 %{
11528   match(UDivModL rax div);
11529   effect(TEMP tmp, KILL cr);
11530 
11531   ins_cost(300);
11532   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11533             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11534           %}
11535   ins_encode %{
11536     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11537   %}
11538   ins_pipe(pipe_slow);
11539 %}
11540 
11541 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11542                    rFlagsReg cr)
11543 %{
11544   match(Set rdx (ModI rax div));
11545   effect(KILL rax, KILL cr);
11546 
11547   ins_cost(300); // XXX
11548   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11549             "jne,s   normal\n\t"
11550             "xorl    rdx, rdx\n\t"
11551             "cmpl    $div, -1\n\t"
11552             "je,s    done\n"
11553     "normal: cdql\n\t"
11554             "idivl   $div\n"
11555     "done:"        %}
11556   ins_encode(cdql_enc(div));
11557   ins_pipe(ialu_reg_reg_alu0);
11558 %}
11559 
11560 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11561                    rFlagsReg cr)
11562 %{
11563   match(Set rdx (ModL rax div));
11564   effect(KILL rax, KILL cr);
11565 
11566   ins_cost(300); // XXX
11567   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11568             "cmpq    rax, rdx\n\t"
11569             "jne,s   normal\n\t"
11570             "xorl    rdx, rdx\n\t"
11571             "cmpq    $div, -1\n\t"
11572             "je,s    done\n"
11573     "normal: cdqq\n\t"
11574             "idivq   $div\n"
11575     "done:"        %}
11576   ins_encode(cdqq_enc(div));
11577   ins_pipe(ialu_reg_reg_alu0);
11578 %}
11579 
11580 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11581 %{
11582   match(Set rdx (UModI rax div));
11583   effect(KILL rax, KILL cr);
11584 
11585   ins_cost(300);
11586   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11587   ins_encode %{
11588     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11589   %}
11590   ins_pipe(ialu_reg_reg_alu0);
11591 %}
11592 
11593 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11594 %{
11595   match(Set rdx (UModL rax div));
11596   effect(KILL rax, KILL cr);
11597 
11598   ins_cost(300);
11599   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11600   ins_encode %{
11601     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11602   %}
11603   ins_pipe(ialu_reg_reg_alu0);
11604 %}
11605 
11606 // Integer Shift Instructions
11607 // Shift Left by one, two, three
11608 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11609 %{
11610   predicate(!UseAPX);
11611   match(Set dst (LShiftI dst shift));
11612   effect(KILL cr);
11613 
11614   format %{ "sall    $dst, $shift" %}
11615   ins_encode %{
11616     __ sall($dst$$Register, $shift$$constant);
11617   %}
11618   ins_pipe(ialu_reg);
11619 %}
11620 
11621 // Shift Left by one, two, three
11622 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11623 %{
11624   predicate(UseAPX);
11625   match(Set dst (LShiftI src shift));
11626   effect(KILL cr);
11627   flag(PD::Flag_ndd_demotable_opr1);
11628 
11629   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
11630   ins_encode %{
11631     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11632   %}
11633   ins_pipe(ialu_reg);
11634 %}
11635 
11636 // Shift Left by 8-bit immediate
11637 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11638 %{
11639   predicate(!UseAPX);
11640   match(Set dst (LShiftI dst shift));
11641   effect(KILL cr);
11642 
11643   format %{ "sall    $dst, $shift" %}
11644   ins_encode %{
11645     __ sall($dst$$Register, $shift$$constant);
11646   %}
11647   ins_pipe(ialu_reg);
11648 %}
11649 
11650 // Shift Left by 8-bit immediate
11651 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11652 %{
11653   predicate(UseAPX);
11654   match(Set dst (LShiftI src shift));
11655   effect(KILL cr);
11656   flag(PD::Flag_ndd_demotable_opr1);
11657 
11658   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11659   ins_encode %{
11660     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11661   %}
11662   ins_pipe(ialu_reg);
11663 %}
11664 
11665 // Shift Left by 8-bit immediate
11666 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11667 %{
11668   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11669   effect(KILL cr);
11670 
11671   format %{ "sall    $dst, $shift" %}
11672   ins_encode %{
11673     __ sall($dst$$Address, $shift$$constant);
11674   %}
11675   ins_pipe(ialu_mem_imm);
11676 %}
11677 
11678 // Shift Left by variable
11679 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11680 %{
11681   predicate(!VM_Version::supports_bmi2());
11682   match(Set dst (LShiftI dst shift));
11683   effect(KILL cr);
11684 
11685   format %{ "sall    $dst, $shift" %}
11686   ins_encode %{
11687     __ sall($dst$$Register);
11688   %}
11689   ins_pipe(ialu_reg_reg);
11690 %}
11691 
11692 // Shift Left by variable
11693 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11694 %{
11695   predicate(!VM_Version::supports_bmi2());
11696   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11697   effect(KILL cr);
11698 
11699   format %{ "sall    $dst, $shift" %}
11700   ins_encode %{
11701     __ sall($dst$$Address);
11702   %}
11703   ins_pipe(ialu_mem_reg);
11704 %}
11705 
11706 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11707 %{
11708   predicate(VM_Version::supports_bmi2());
11709   match(Set dst (LShiftI src shift));
11710 
11711   format %{ "shlxl   $dst, $src, $shift" %}
11712   ins_encode %{
11713     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
11714   %}
11715   ins_pipe(ialu_reg_reg);
11716 %}
11717 
11718 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
11719 %{
11720   predicate(VM_Version::supports_bmi2());
11721   match(Set dst (LShiftI (LoadI src) shift));
11722   ins_cost(175);
11723   format %{ "shlxl   $dst, $src, $shift" %}
11724   ins_encode %{
11725     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
11726   %}
11727   ins_pipe(ialu_reg_mem);
11728 %}
11729 
11730 // Arithmetic Shift Right by 8-bit immediate
11731 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11732 %{
11733   predicate(!UseAPX);
11734   match(Set dst (RShiftI dst shift));
11735   effect(KILL cr);
11736 
11737   format %{ "sarl    $dst, $shift" %}
11738   ins_encode %{
11739     __ sarl($dst$$Register, $shift$$constant);
11740   %}
11741   ins_pipe(ialu_mem_imm);
11742 %}
11743 
11744 // Arithmetic Shift Right by 8-bit immediate
11745 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11746 %{
11747   predicate(UseAPX);
11748   match(Set dst (RShiftI src shift));
11749   effect(KILL cr);
11750   flag(PD::Flag_ndd_demotable_opr1);
11751 
11752   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
11753   ins_encode %{
11754     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
11755   %}
11756   ins_pipe(ialu_mem_imm);
11757 %}
11758 
11759 // Arithmetic Shift Right by 8-bit immediate
11760 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11761 %{
11762   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11763   effect(KILL cr);
11764 
11765   format %{ "sarl    $dst, $shift" %}
11766   ins_encode %{
11767     __ sarl($dst$$Address, $shift$$constant);
11768   %}
11769   ins_pipe(ialu_mem_imm);
11770 %}
11771 
11772 // Arithmetic Shift Right by variable
11773 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11774 %{
11775   predicate(!VM_Version::supports_bmi2());
11776   match(Set dst (RShiftI dst shift));
11777   effect(KILL cr);
11778 
11779   format %{ "sarl    $dst, $shift" %}
11780   ins_encode %{
11781     __ sarl($dst$$Register);
11782   %}
11783   ins_pipe(ialu_reg_reg);
11784 %}
11785 
11786 // Arithmetic Shift Right by variable
11787 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11788 %{
11789   predicate(!VM_Version::supports_bmi2());
11790   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11791   effect(KILL cr);
11792 
11793   format %{ "sarl    $dst, $shift" %}
11794   ins_encode %{
11795     __ sarl($dst$$Address);
11796   %}
11797   ins_pipe(ialu_mem_reg);
11798 %}
11799 
11800 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11801 %{
11802   predicate(VM_Version::supports_bmi2());
11803   match(Set dst (RShiftI src shift));
11804 
11805   format %{ "sarxl   $dst, $src, $shift" %}
11806   ins_encode %{
11807     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
11808   %}
11809   ins_pipe(ialu_reg_reg);
11810 %}
11811 
11812 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
11813 %{
11814   predicate(VM_Version::supports_bmi2());
11815   match(Set dst (RShiftI (LoadI src) shift));
11816   ins_cost(175);
11817   format %{ "sarxl   $dst, $src, $shift" %}
11818   ins_encode %{
11819     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
11820   %}
11821   ins_pipe(ialu_reg_mem);
11822 %}
11823 
11824 // Logical Shift Right by 8-bit immediate
11825 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11826 %{
11827   predicate(!UseAPX);
11828   match(Set dst (URShiftI dst shift));
11829   effect(KILL cr);
11830 
11831   format %{ "shrl    $dst, $shift" %}
11832   ins_encode %{
11833     __ shrl($dst$$Register, $shift$$constant);
11834   %}
11835   ins_pipe(ialu_reg);
11836 %}
11837 
11838 // Logical Shift Right by 8-bit immediate
11839 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11840 %{
11841   predicate(UseAPX);
11842   match(Set dst (URShiftI src shift));
11843   effect(KILL cr);
11844   flag(PD::Flag_ndd_demotable_opr1);
11845 
11846   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
11847   ins_encode %{
11848     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
11849   %}
11850   ins_pipe(ialu_reg);
11851 %}
11852 
11853 // Logical Shift Right by 8-bit immediate
11854 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11855 %{
11856   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
11857   effect(KILL cr);
11858 
11859   format %{ "shrl    $dst, $shift" %}
11860   ins_encode %{
11861     __ shrl($dst$$Address, $shift$$constant);
11862   %}
11863   ins_pipe(ialu_mem_imm);
11864 %}
11865 
11866 // Logical Shift Right by variable
11867 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11868 %{
11869   predicate(!VM_Version::supports_bmi2());
11870   match(Set dst (URShiftI dst shift));
11871   effect(KILL cr);
11872 
11873   format %{ "shrl    $dst, $shift" %}
11874   ins_encode %{
11875     __ shrl($dst$$Register);
11876   %}
11877   ins_pipe(ialu_reg_reg);
11878 %}
11879 
11880 // Logical Shift Right by variable
11881 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11882 %{
11883   predicate(!VM_Version::supports_bmi2());
11884   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
11885   effect(KILL cr);
11886 
11887   format %{ "shrl    $dst, $shift" %}
11888   ins_encode %{
11889     __ shrl($dst$$Address);
11890   %}
11891   ins_pipe(ialu_mem_reg);
11892 %}
11893 
11894 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11895 %{
11896   predicate(VM_Version::supports_bmi2());
11897   match(Set dst (URShiftI src shift));
11898 
11899   format %{ "shrxl   $dst, $src, $shift" %}
11900   ins_encode %{
11901     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
11902   %}
11903   ins_pipe(ialu_reg_reg);
11904 %}
11905 
11906 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
11907 %{
11908   predicate(VM_Version::supports_bmi2());
11909   match(Set dst (URShiftI (LoadI src) shift));
11910   ins_cost(175);
11911   format %{ "shrxl   $dst, $src, $shift" %}
11912   ins_encode %{
11913     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
11914   %}
11915   ins_pipe(ialu_reg_mem);
11916 %}
11917 
11918 // Long Shift Instructions
11919 // Shift Left by one, two, three
11920 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
11921 %{
11922   predicate(!UseAPX);
11923   match(Set dst (LShiftL dst shift));
11924   effect(KILL cr);
11925 
11926   format %{ "salq    $dst, $shift" %}
11927   ins_encode %{
11928     __ salq($dst$$Register, $shift$$constant);
11929   %}
11930   ins_pipe(ialu_reg);
11931 %}
11932 
11933 // Shift Left by one, two, three
11934 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
11935 %{
11936   predicate(UseAPX);
11937   match(Set dst (LShiftL src shift));
11938   effect(KILL cr);
11939   flag(PD::Flag_ndd_demotable_opr1);
11940 
11941   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
11942   ins_encode %{
11943     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
11944   %}
11945   ins_pipe(ialu_reg);
11946 %}
11947 
11948 // Shift Left by 8-bit immediate
11949 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
11950 %{
11951   predicate(!UseAPX);
11952   match(Set dst (LShiftL dst shift));
11953   effect(KILL cr);
11954 
11955   format %{ "salq    $dst, $shift" %}
11956   ins_encode %{
11957     __ salq($dst$$Register, $shift$$constant);
11958   %}
11959   ins_pipe(ialu_reg);
11960 %}
11961 
11962 // Shift Left by 8-bit immediate
11963 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
11964 %{
11965   predicate(UseAPX);
11966   match(Set dst (LShiftL src shift));
11967   effect(KILL cr);
11968   flag(PD::Flag_ndd_demotable_opr1);
11969 
11970   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
11971   ins_encode %{
11972     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
11973   %}
11974   ins_pipe(ialu_reg);
11975 %}
11976 
11977 // Shift Left by 8-bit immediate
11978 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11979 %{
11980   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
11981   effect(KILL cr);
11982 
11983   format %{ "salq    $dst, $shift" %}
11984   ins_encode %{
11985     __ salq($dst$$Address, $shift$$constant);
11986   %}
11987   ins_pipe(ialu_mem_imm);
11988 %}
11989 
11990 // Shift Left by variable
11991 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
11992 %{
11993   predicate(!VM_Version::supports_bmi2());
11994   match(Set dst (LShiftL dst shift));
11995   effect(KILL cr);
11996 
11997   format %{ "salq    $dst, $shift" %}
11998   ins_encode %{
11999     __ salq($dst$$Register);
12000   %}
12001   ins_pipe(ialu_reg_reg);
12002 %}
12003 
12004 // Shift Left by variable
12005 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12006 %{
12007   predicate(!VM_Version::supports_bmi2());
12008   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12009   effect(KILL cr);
12010 
12011   format %{ "salq    $dst, $shift" %}
12012   ins_encode %{
12013     __ salq($dst$$Address);
12014   %}
12015   ins_pipe(ialu_mem_reg);
12016 %}
12017 
12018 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12019 %{
12020   predicate(VM_Version::supports_bmi2());
12021   match(Set dst (LShiftL src shift));
12022 
12023   format %{ "shlxq   $dst, $src, $shift" %}
12024   ins_encode %{
12025     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12026   %}
12027   ins_pipe(ialu_reg_reg);
12028 %}
12029 
12030 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12031 %{
12032   predicate(VM_Version::supports_bmi2());
12033   match(Set dst (LShiftL (LoadL src) shift));
12034   ins_cost(175);
12035   format %{ "shlxq   $dst, $src, $shift" %}
12036   ins_encode %{
12037     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12038   %}
12039   ins_pipe(ialu_reg_mem);
12040 %}
12041 
12042 // Arithmetic Shift Right by 8-bit immediate
12043 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12044 %{
12045   predicate(!UseAPX);
12046   match(Set dst (RShiftL dst shift));
12047   effect(KILL cr);
12048 
12049   format %{ "sarq    $dst, $shift" %}
12050   ins_encode %{
12051     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12052   %}
12053   ins_pipe(ialu_mem_imm);
12054 %}
12055 
12056 // Arithmetic Shift Right by 8-bit immediate
12057 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12058 %{
12059   predicate(UseAPX);
12060   match(Set dst (RShiftL src shift));
12061   effect(KILL cr);
12062   flag(PD::Flag_ndd_demotable_opr1);
12063 
12064   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12065   ins_encode %{
12066     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12067   %}
12068   ins_pipe(ialu_mem_imm);
12069 %}
12070 
12071 // Arithmetic Shift Right by 8-bit immediate
12072 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12073 %{
12074   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12075   effect(KILL cr);
12076 
12077   format %{ "sarq    $dst, $shift" %}
12078   ins_encode %{
12079     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12080   %}
12081   ins_pipe(ialu_mem_imm);
12082 %}
12083 
12084 // Arithmetic Shift Right by variable
12085 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12086 %{
12087   predicate(!VM_Version::supports_bmi2());
12088   match(Set dst (RShiftL dst shift));
12089   effect(KILL cr);
12090 
12091   format %{ "sarq    $dst, $shift" %}
12092   ins_encode %{
12093     __ sarq($dst$$Register);
12094   %}
12095   ins_pipe(ialu_reg_reg);
12096 %}
12097 
12098 // Arithmetic Shift Right by variable
12099 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12100 %{
12101   predicate(!VM_Version::supports_bmi2());
12102   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12103   effect(KILL cr);
12104 
12105   format %{ "sarq    $dst, $shift" %}
12106   ins_encode %{
12107     __ sarq($dst$$Address);
12108   %}
12109   ins_pipe(ialu_mem_reg);
12110 %}
12111 
12112 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12113 %{
12114   predicate(VM_Version::supports_bmi2());
12115   match(Set dst (RShiftL src shift));
12116 
12117   format %{ "sarxq   $dst, $src, $shift" %}
12118   ins_encode %{
12119     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12120   %}
12121   ins_pipe(ialu_reg_reg);
12122 %}
12123 
12124 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12125 %{
12126   predicate(VM_Version::supports_bmi2());
12127   match(Set dst (RShiftL (LoadL src) shift));
12128   ins_cost(175);
12129   format %{ "sarxq   $dst, $src, $shift" %}
12130   ins_encode %{
12131     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12132   %}
12133   ins_pipe(ialu_reg_mem);
12134 %}
12135 
12136 // Logical Shift Right by 8-bit immediate
12137 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12138 %{
12139   predicate(!UseAPX);
12140   match(Set dst (URShiftL dst shift));
12141   effect(KILL cr);
12142 
12143   format %{ "shrq    $dst, $shift" %}
12144   ins_encode %{
12145     __ shrq($dst$$Register, $shift$$constant);
12146   %}
12147   ins_pipe(ialu_reg);
12148 %}
12149 
12150 // Logical Shift Right by 8-bit immediate
12151 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12152 %{
12153   predicate(UseAPX);
12154   match(Set dst (URShiftL src shift));
12155   effect(KILL cr);
12156   flag(PD::Flag_ndd_demotable_opr1);
12157 
12158   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12159   ins_encode %{
12160     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12161   %}
12162   ins_pipe(ialu_reg);
12163 %}
12164 
12165 // Logical Shift Right by 8-bit immediate
12166 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12167 %{
12168   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12169   effect(KILL cr);
12170 
12171   format %{ "shrq    $dst, $shift" %}
12172   ins_encode %{
12173     __ shrq($dst$$Address, $shift$$constant);
12174   %}
12175   ins_pipe(ialu_mem_imm);
12176 %}
12177 
12178 // Logical Shift Right by variable
12179 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12180 %{
12181   predicate(!VM_Version::supports_bmi2());
12182   match(Set dst (URShiftL dst shift));
12183   effect(KILL cr);
12184 
12185   format %{ "shrq    $dst, $shift" %}
12186   ins_encode %{
12187     __ shrq($dst$$Register);
12188   %}
12189   ins_pipe(ialu_reg_reg);
12190 %}
12191 
12192 // Logical Shift Right by variable
12193 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12194 %{
12195   predicate(!VM_Version::supports_bmi2());
12196   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12197   effect(KILL cr);
12198 
12199   format %{ "shrq    $dst, $shift" %}
12200   ins_encode %{
12201     __ shrq($dst$$Address);
12202   %}
12203   ins_pipe(ialu_mem_reg);
12204 %}
12205 
12206 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12207 %{
12208   predicate(VM_Version::supports_bmi2());
12209   match(Set dst (URShiftL src shift));
12210 
12211   format %{ "shrxq   $dst, $src, $shift" %}
12212   ins_encode %{
12213     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12214   %}
12215   ins_pipe(ialu_reg_reg);
12216 %}
12217 
12218 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12219 %{
12220   predicate(VM_Version::supports_bmi2());
12221   match(Set dst (URShiftL (LoadL src) shift));
12222   ins_cost(175);
12223   format %{ "shrxq   $dst, $src, $shift" %}
12224   ins_encode %{
12225     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12226   %}
12227   ins_pipe(ialu_reg_mem);
12228 %}
12229 
12230 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12231 // This idiom is used by the compiler for the i2b bytecode.
12232 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12233 %{
12234   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12235 
12236   format %{ "movsbl  $dst, $src\t# i2b" %}
12237   ins_encode %{
12238     __ movsbl($dst$$Register, $src$$Register);
12239   %}
12240   ins_pipe(ialu_reg_reg);
12241 %}
12242 
12243 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12244 // This idiom is used by the compiler the i2s bytecode.
12245 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12246 %{
12247   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12248 
12249   format %{ "movswl  $dst, $src\t# i2s" %}
12250   ins_encode %{
12251     __ movswl($dst$$Register, $src$$Register);
12252   %}
12253   ins_pipe(ialu_reg_reg);
12254 %}
12255 
12256 // ROL/ROR instructions
12257 
12258 // Rotate left by constant.
12259 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12260 %{
12261   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12262   match(Set dst (RotateLeft dst shift));
12263   effect(KILL cr);
12264   format %{ "roll    $dst, $shift" %}
12265   ins_encode %{
12266     __ roll($dst$$Register, $shift$$constant);
12267   %}
12268   ins_pipe(ialu_reg);
12269 %}
12270 
12271 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12272 %{
12273   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12274   match(Set dst (RotateLeft src shift));
12275   format %{ "rolxl   $dst, $src, $shift" %}
12276   ins_encode %{
12277     int shift = 32 - ($shift$$constant & 31);
12278     __ rorxl($dst$$Register, $src$$Register, shift);
12279   %}
12280   ins_pipe(ialu_reg_reg);
12281 %}
12282 
12283 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12284 %{
12285   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12286   match(Set dst (RotateLeft (LoadI src) shift));
12287   ins_cost(175);
12288   format %{ "rolxl   $dst, $src, $shift" %}
12289   ins_encode %{
12290     int shift = 32 - ($shift$$constant & 31);
12291     __ rorxl($dst$$Register, $src$$Address, shift);
12292   %}
12293   ins_pipe(ialu_reg_mem);
12294 %}
12295 
12296 // Rotate Left by variable
12297 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12298 %{
12299   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12300   match(Set dst (RotateLeft dst shift));
12301   effect(KILL cr);
12302   format %{ "roll    $dst, $shift" %}
12303   ins_encode %{
12304     __ roll($dst$$Register);
12305   %}
12306   ins_pipe(ialu_reg_reg);
12307 %}
12308 
12309 // Rotate Left by variable
12310 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12311 %{
12312   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12313   match(Set dst (RotateLeft src shift));
12314   effect(KILL cr);
12315   flag(PD::Flag_ndd_demotable_opr1);
12316 
12317   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12318   ins_encode %{
12319     __ eroll($dst$$Register, $src$$Register, false);
12320   %}
12321   ins_pipe(ialu_reg_reg);
12322 %}
12323 
12324 // Rotate Right by constant.
12325 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12326 %{
12327   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12328   match(Set dst (RotateRight dst shift));
12329   effect(KILL cr);
12330   format %{ "rorl    $dst, $shift" %}
12331   ins_encode %{
12332     __ rorl($dst$$Register, $shift$$constant);
12333   %}
12334   ins_pipe(ialu_reg);
12335 %}
12336 
12337 // Rotate Right by constant.
12338 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12339 %{
12340   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12341   match(Set dst (RotateRight src shift));
12342   format %{ "rorxl   $dst, $src, $shift" %}
12343   ins_encode %{
12344     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12345   %}
12346   ins_pipe(ialu_reg_reg);
12347 %}
12348 
12349 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12350 %{
12351   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12352   match(Set dst (RotateRight (LoadI src) shift));
12353   ins_cost(175);
12354   format %{ "rorxl   $dst, $src, $shift" %}
12355   ins_encode %{
12356     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12357   %}
12358   ins_pipe(ialu_reg_mem);
12359 %}
12360 
12361 // Rotate Right by variable
12362 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12363 %{
12364   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12365   match(Set dst (RotateRight dst shift));
12366   effect(KILL cr);
12367   format %{ "rorl    $dst, $shift" %}
12368   ins_encode %{
12369     __ rorl($dst$$Register);
12370   %}
12371   ins_pipe(ialu_reg_reg);
12372 %}
12373 
12374 // Rotate Right by variable
12375 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12376 %{
12377   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12378   match(Set dst (RotateRight src shift));
12379   effect(KILL cr);
12380   flag(PD::Flag_ndd_demotable_opr1);
12381 
12382   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12383   ins_encode %{
12384     __ erorl($dst$$Register, $src$$Register, false);
12385   %}
12386   ins_pipe(ialu_reg_reg);
12387 %}
12388 
12389 // Rotate Left by constant.
12390 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12391 %{
12392   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12393   match(Set dst (RotateLeft dst shift));
12394   effect(KILL cr);
12395   format %{ "rolq    $dst, $shift" %}
12396   ins_encode %{
12397     __ rolq($dst$$Register, $shift$$constant);
12398   %}
12399   ins_pipe(ialu_reg);
12400 %}
12401 
12402 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12403 %{
12404   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12405   match(Set dst (RotateLeft src shift));
12406   format %{ "rolxq   $dst, $src, $shift" %}
12407   ins_encode %{
12408     int shift = 64 - ($shift$$constant & 63);
12409     __ rorxq($dst$$Register, $src$$Register, shift);
12410   %}
12411   ins_pipe(ialu_reg_reg);
12412 %}
12413 
12414 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12415 %{
12416   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12417   match(Set dst (RotateLeft (LoadL src) shift));
12418   ins_cost(175);
12419   format %{ "rolxq   $dst, $src, $shift" %}
12420   ins_encode %{
12421     int shift = 64 - ($shift$$constant & 63);
12422     __ rorxq($dst$$Register, $src$$Address, shift);
12423   %}
12424   ins_pipe(ialu_reg_mem);
12425 %}
12426 
12427 // Rotate Left by variable
12428 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12429 %{
12430   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12431   match(Set dst (RotateLeft dst shift));
12432   effect(KILL cr);
12433 
12434   format %{ "rolq    $dst, $shift" %}
12435   ins_encode %{
12436     __ rolq($dst$$Register);
12437   %}
12438   ins_pipe(ialu_reg_reg);
12439 %}
12440 
12441 // Rotate Left by variable
12442 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12443 %{
12444   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12445   match(Set dst (RotateLeft src shift));
12446   effect(KILL cr);
12447   flag(PD::Flag_ndd_demotable_opr1);
12448 
12449   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12450   ins_encode %{
12451     __ erolq($dst$$Register, $src$$Register, false);
12452   %}
12453   ins_pipe(ialu_reg_reg);
12454 %}
12455 
12456 // Rotate Right by constant.
12457 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12458 %{
12459   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12460   match(Set dst (RotateRight dst shift));
12461   effect(KILL cr);
12462   format %{ "rorq    $dst, $shift" %}
12463   ins_encode %{
12464     __ rorq($dst$$Register, $shift$$constant);
12465   %}
12466   ins_pipe(ialu_reg);
12467 %}
12468 
12469 // Rotate Right by constant
12470 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12471 %{
12472   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12473   match(Set dst (RotateRight src shift));
12474   format %{ "rorxq   $dst, $src, $shift" %}
12475   ins_encode %{
12476     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12477   %}
12478   ins_pipe(ialu_reg_reg);
12479 %}
12480 
12481 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12482 %{
12483   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12484   match(Set dst (RotateRight (LoadL src) shift));
12485   ins_cost(175);
12486   format %{ "rorxq   $dst, $src, $shift" %}
12487   ins_encode %{
12488     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12489   %}
12490   ins_pipe(ialu_reg_mem);
12491 %}
12492 
12493 // Rotate Right by variable
12494 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12495 %{
12496   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12497   match(Set dst (RotateRight dst shift));
12498   effect(KILL cr);
12499   format %{ "rorq    $dst, $shift" %}
12500   ins_encode %{
12501     __ rorq($dst$$Register);
12502   %}
12503   ins_pipe(ialu_reg_reg);
12504 %}
12505 
12506 // Rotate Right by variable
12507 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12508 %{
12509   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12510   match(Set dst (RotateRight src shift));
12511   effect(KILL cr);
12512   flag(PD::Flag_ndd_demotable_opr1);
12513 
12514   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12515   ins_encode %{
12516     __ erorq($dst$$Register, $src$$Register, false);
12517   %}
12518   ins_pipe(ialu_reg_reg);
12519 %}
12520 
12521 //----------------------------- CompressBits/ExpandBits ------------------------
12522 
12523 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12524   predicate(n->bottom_type()->isa_long());
12525   match(Set dst (CompressBits src mask));
12526   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12527   ins_encode %{
12528     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12529   %}
12530   ins_pipe( pipe_slow );
12531 %}
12532 
12533 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12534   predicate(n->bottom_type()->isa_long());
12535   match(Set dst (ExpandBits src mask));
12536   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12537   ins_encode %{
12538     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12539   %}
12540   ins_pipe( pipe_slow );
12541 %}
12542 
12543 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12544   predicate(n->bottom_type()->isa_long());
12545   match(Set dst (CompressBits src (LoadL mask)));
12546   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12547   ins_encode %{
12548     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12549   %}
12550   ins_pipe( pipe_slow );
12551 %}
12552 
12553 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12554   predicate(n->bottom_type()->isa_long());
12555   match(Set dst (ExpandBits src (LoadL mask)));
12556   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12557   ins_encode %{
12558     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12559   %}
12560   ins_pipe( pipe_slow );
12561 %}
12562 
12563 
12564 // Logical Instructions
12565 
12566 // Integer Logical Instructions
12567 
12568 // And Instructions
12569 // And Register with Register
12570 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12571 %{
12572   predicate(!UseAPX);
12573   match(Set dst (AndI dst src));
12574   effect(KILL cr);
12575   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12576 
12577   format %{ "andl    $dst, $src\t# int" %}
12578   ins_encode %{
12579     __ andl($dst$$Register, $src$$Register);
12580   %}
12581   ins_pipe(ialu_reg_reg);
12582 %}
12583 
12584 // And Register with Register using New Data Destination (NDD)
12585 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12586 %{
12587   predicate(UseAPX);
12588   match(Set dst (AndI src1 src2));
12589   effect(KILL cr);
12590   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12591 
12592   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
12593   ins_encode %{
12594     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12595 
12596   %}
12597   ins_pipe(ialu_reg_reg);
12598 %}
12599 
12600 // And Register with Immediate 255
12601 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12602 %{
12603   match(Set dst (AndI src mask));
12604 
12605   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
12606   ins_encode %{
12607     __ movzbl($dst$$Register, $src$$Register);
12608   %}
12609   ins_pipe(ialu_reg);
12610 %}
12611 
12612 // And Register with Immediate 255 and promote to long
12613 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12614 %{
12615   match(Set dst (ConvI2L (AndI src mask)));
12616 
12617   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
12618   ins_encode %{
12619     __ movzbl($dst$$Register, $src$$Register);
12620   %}
12621   ins_pipe(ialu_reg);
12622 %}
12623 
12624 // And Register with Immediate 65535
12625 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12626 %{
12627   match(Set dst (AndI src mask));
12628 
12629   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
12630   ins_encode %{
12631     __ movzwl($dst$$Register, $src$$Register);
12632   %}
12633   ins_pipe(ialu_reg);
12634 %}
12635 
12636 // And Register with Immediate 65535 and promote to long
12637 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12638 %{
12639   match(Set dst (ConvI2L (AndI src mask)));
12640 
12641   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
12642   ins_encode %{
12643     __ movzwl($dst$$Register, $src$$Register);
12644   %}
12645   ins_pipe(ialu_reg);
12646 %}
12647 
12648 // Can skip int2long conversions after AND with small bitmask
12649 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
12650 %{
12651   predicate(VM_Version::supports_bmi2());
12652   ins_cost(125);
12653   effect(TEMP tmp, KILL cr);
12654   match(Set dst (ConvI2L (AndI src mask)));
12655   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
12656   ins_encode %{
12657     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
12658     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
12659   %}
12660   ins_pipe(ialu_reg_reg);
12661 %}
12662 
12663 // And Register with Immediate
12664 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12665 %{
12666   predicate(!UseAPX);
12667   match(Set dst (AndI dst src));
12668   effect(KILL cr);
12669   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12670 
12671   format %{ "andl    $dst, $src\t# int" %}
12672   ins_encode %{
12673     __ andl($dst$$Register, $src$$constant);
12674   %}
12675   ins_pipe(ialu_reg);
12676 %}
12677 
12678 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
12679 %{
12680   predicate(UseAPX);
12681   match(Set dst (AndI src1 src2));
12682   effect(KILL cr);
12683   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
12684 
12685   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
12686   ins_encode %{
12687     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
12688   %}
12689   ins_pipe(ialu_reg);
12690 %}
12691 
12692 // And Register with Memory
12693 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
12694 %{
12695   match(Set dst (AndI dst (LoadI src)));
12696   effect(KILL cr);
12697   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12698 
12699   ins_cost(150);
12700   format %{ "andl    $dst, $src\t# int" %}
12701   ins_encode %{
12702     __ andl($dst$$Register, $src$$Address);
12703   %}
12704   ins_pipe(ialu_reg_mem);
12705 %}
12706 
12707 // And Memory with Register
12708 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12709 %{
12710   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
12711   effect(KILL cr);
12712   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12713 
12714   ins_cost(150);
12715   format %{ "andb    $dst, $src\t# byte" %}
12716   ins_encode %{
12717     __ andb($dst$$Address, $src$$Register);
12718   %}
12719   ins_pipe(ialu_mem_reg);
12720 %}
12721 
12722 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12723 %{
12724   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12725   effect(KILL cr);
12726   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12727 
12728   ins_cost(150);
12729   format %{ "andl    $dst, $src\t# int" %}
12730   ins_encode %{
12731     __ andl($dst$$Address, $src$$Register);
12732   %}
12733   ins_pipe(ialu_mem_reg);
12734 %}
12735 
12736 // And Memory with Immediate
12737 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
12738 %{
12739   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12740   effect(KILL cr);
12741   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12742 
12743   ins_cost(125);
12744   format %{ "andl    $dst, $src\t# int" %}
12745   ins_encode %{
12746     __ andl($dst$$Address, $src$$constant);
12747   %}
12748   ins_pipe(ialu_mem_imm);
12749 %}
12750 
12751 // BMI1 instructions
12752 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
12753   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
12754   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12755   effect(KILL cr);
12756   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12757 
12758   ins_cost(125);
12759   format %{ "andnl  $dst, $src1, $src2" %}
12760 
12761   ins_encode %{
12762     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
12763   %}
12764   ins_pipe(ialu_reg_mem);
12765 %}
12766 
12767 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
12768   match(Set dst (AndI (XorI src1 minus_1) src2));
12769   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12770   effect(KILL cr);
12771   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12772 
12773   format %{ "andnl  $dst, $src1, $src2" %}
12774 
12775   ins_encode %{
12776     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
12777   %}
12778   ins_pipe(ialu_reg);
12779 %}
12780 
12781 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
12782   match(Set dst (AndI (SubI imm_zero src) src));
12783   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12784   effect(KILL cr);
12785   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12786 
12787   format %{ "blsil  $dst, $src" %}
12788 
12789   ins_encode %{
12790     __ blsil($dst$$Register, $src$$Register);
12791   %}
12792   ins_pipe(ialu_reg);
12793 %}
12794 
12795 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
12796   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
12797   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12798   effect(KILL cr);
12799   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12800 
12801   ins_cost(125);
12802   format %{ "blsil  $dst, $src" %}
12803 
12804   ins_encode %{
12805     __ blsil($dst$$Register, $src$$Address);
12806   %}
12807   ins_pipe(ialu_reg_mem);
12808 %}
12809 
12810 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
12811 %{
12812   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
12813   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12814   effect(KILL cr);
12815   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
12816 
12817   ins_cost(125);
12818   format %{ "blsmskl $dst, $src" %}
12819 
12820   ins_encode %{
12821     __ blsmskl($dst$$Register, $src$$Address);
12822   %}
12823   ins_pipe(ialu_reg_mem);
12824 %}
12825 
12826 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
12827 %{
12828   match(Set dst (XorI (AddI src minus_1) src));
12829   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12830   effect(KILL cr);
12831   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
12832 
12833   format %{ "blsmskl $dst, $src" %}
12834 
12835   ins_encode %{
12836     __ blsmskl($dst$$Register, $src$$Register);
12837   %}
12838 
12839   ins_pipe(ialu_reg);
12840 %}
12841 
12842 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
12843 %{
12844   match(Set dst (AndI (AddI src minus_1) src) );
12845   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12846   effect(KILL cr);
12847   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12848 
12849   format %{ "blsrl  $dst, $src" %}
12850 
12851   ins_encode %{
12852     __ blsrl($dst$$Register, $src$$Register);
12853   %}
12854 
12855   ins_pipe(ialu_reg_mem);
12856 %}
12857 
12858 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
12859 %{
12860   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
12861   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12862   effect(KILL cr);
12863   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12864 
12865   ins_cost(125);
12866   format %{ "blsrl  $dst, $src" %}
12867 
12868   ins_encode %{
12869     __ blsrl($dst$$Register, $src$$Address);
12870   %}
12871 
12872   ins_pipe(ialu_reg);
12873 %}
12874 
12875 // Or Instructions
12876 // Or Register with Register
12877 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12878 %{
12879   predicate(!UseAPX);
12880   match(Set dst (OrI dst src));
12881   effect(KILL cr);
12882   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12883 
12884   format %{ "orl     $dst, $src\t# int" %}
12885   ins_encode %{
12886     __ orl($dst$$Register, $src$$Register);
12887   %}
12888   ins_pipe(ialu_reg_reg);
12889 %}
12890 
12891 // Or Register with Register using New Data Destination (NDD)
12892 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12893 %{
12894   predicate(UseAPX);
12895   match(Set dst (OrI src1 src2));
12896   effect(KILL cr);
12897   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12898 
12899   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
12900   ins_encode %{
12901     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
12902   %}
12903   ins_pipe(ialu_reg_reg);
12904 %}
12905 
12906 // Or Register with Immediate
12907 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12908 %{
12909   predicate(!UseAPX);
12910   match(Set dst (OrI dst src));
12911   effect(KILL cr);
12912   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12913 
12914   format %{ "orl     $dst, $src\t# int" %}
12915   ins_encode %{
12916     __ orl($dst$$Register, $src$$constant);
12917   %}
12918   ins_pipe(ialu_reg);
12919 %}
12920 
12921 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
12922 %{
12923   predicate(UseAPX);
12924   match(Set dst (OrI src1 src2));
12925   effect(KILL cr);
12926   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
12927 
12928   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
12929   ins_encode %{
12930     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
12931   %}
12932   ins_pipe(ialu_reg);
12933 %}
12934 
12935 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
12936 %{
12937   predicate(UseAPX);
12938   match(Set dst (OrI src1 src2));
12939   effect(KILL cr);
12940   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
12941 
12942   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
12943   ins_encode %{
12944     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
12945   %}
12946   ins_pipe(ialu_reg);
12947 %}
12948 
12949 // Or Register with Memory
12950 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
12951 %{
12952   match(Set dst (OrI dst (LoadI src)));
12953   effect(KILL cr);
12954   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12955 
12956   ins_cost(150);
12957   format %{ "orl     $dst, $src\t# int" %}
12958   ins_encode %{
12959     __ orl($dst$$Register, $src$$Address);
12960   %}
12961   ins_pipe(ialu_reg_mem);
12962 %}
12963 
12964 // Or Memory with Register
12965 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12966 %{
12967   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
12968   effect(KILL cr);
12969   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12970 
12971   ins_cost(150);
12972   format %{ "orb    $dst, $src\t# byte" %}
12973   ins_encode %{
12974     __ orb($dst$$Address, $src$$Register);
12975   %}
12976   ins_pipe(ialu_mem_reg);
12977 %}
12978 
12979 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12980 %{
12981   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
12982   effect(KILL cr);
12983   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12984 
12985   ins_cost(150);
12986   format %{ "orl     $dst, $src\t# int" %}
12987   ins_encode %{
12988     __ orl($dst$$Address, $src$$Register);
12989   %}
12990   ins_pipe(ialu_mem_reg);
12991 %}
12992 
12993 // Or Memory with Immediate
12994 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
12995 %{
12996   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
12997   effect(KILL cr);
12998   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12999 
13000   ins_cost(125);
13001   format %{ "orl     $dst, $src\t# int" %}
13002   ins_encode %{
13003     __ orl($dst$$Address, $src$$constant);
13004   %}
13005   ins_pipe(ialu_mem_imm);
13006 %}
13007 
13008 // Xor Instructions
13009 // Xor Register with Register
13010 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13011 %{
13012   predicate(!UseAPX);
13013   match(Set dst (XorI dst src));
13014   effect(KILL cr);
13015   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13016 
13017   format %{ "xorl    $dst, $src\t# int" %}
13018   ins_encode %{
13019     __ xorl($dst$$Register, $src$$Register);
13020   %}
13021   ins_pipe(ialu_reg_reg);
13022 %}
13023 
13024 // Xor Register with Register using New Data Destination (NDD)
13025 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13026 %{
13027   predicate(UseAPX);
13028   match(Set dst (XorI src1 src2));
13029   effect(KILL cr);
13030   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13031 
13032   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13033   ins_encode %{
13034     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13035   %}
13036   ins_pipe(ialu_reg_reg);
13037 %}
13038 
13039 // Xor Register with Immediate -1
13040 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13041 %{
13042   predicate(!UseAPX);
13043   match(Set dst (XorI dst imm));
13044 
13045   format %{ "notl    $dst" %}
13046   ins_encode %{
13047      __ notl($dst$$Register);
13048   %}
13049   ins_pipe(ialu_reg);
13050 %}
13051 
13052 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13053 %{
13054   match(Set dst (XorI src imm));
13055   predicate(UseAPX);
13056   flag(PD::Flag_ndd_demotable_opr1);
13057 
13058   format %{ "enotl    $dst, $src" %}
13059   ins_encode %{
13060      __ enotl($dst$$Register, $src$$Register);
13061   %}
13062   ins_pipe(ialu_reg);
13063 %}
13064 
13065 // Xor Register with Immediate
13066 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13067 %{
13068   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13069   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13070   match(Set dst (XorI dst src));
13071   effect(KILL cr);
13072   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13073 
13074   format %{ "xorl    $dst, $src\t# int" %}
13075   ins_encode %{
13076     __ xorl($dst$$Register, $src$$constant);
13077   %}
13078   ins_pipe(ialu_reg);
13079 %}
13080 
13081 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13082 %{
13083   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13084   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13085   match(Set dst (XorI src1 src2));
13086   effect(KILL cr);
13087   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13088 
13089   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13090   ins_encode %{
13091     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13092   %}
13093   ins_pipe(ialu_reg);
13094 %}
13095 
13096 // Xor Register with Memory
13097 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13098 %{
13099   match(Set dst (XorI dst (LoadI src)));
13100   effect(KILL cr);
13101   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13102 
13103   ins_cost(150);
13104   format %{ "xorl    $dst, $src\t# int" %}
13105   ins_encode %{
13106     __ xorl($dst$$Register, $src$$Address);
13107   %}
13108   ins_pipe(ialu_reg_mem);
13109 %}
13110 
13111 // Xor Memory with Register
13112 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13113 %{
13114   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13115   effect(KILL cr);
13116   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13117 
13118   ins_cost(150);
13119   format %{ "xorb    $dst, $src\t# byte" %}
13120   ins_encode %{
13121     __ xorb($dst$$Address, $src$$Register);
13122   %}
13123   ins_pipe(ialu_mem_reg);
13124 %}
13125 
13126 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13127 %{
13128   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13129   effect(KILL cr);
13130   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13131 
13132   ins_cost(150);
13133   format %{ "xorl    $dst, $src\t# int" %}
13134   ins_encode %{
13135     __ xorl($dst$$Address, $src$$Register);
13136   %}
13137   ins_pipe(ialu_mem_reg);
13138 %}
13139 
13140 // Xor Memory with Immediate
13141 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13142 %{
13143   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13144   effect(KILL cr);
13145   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13146 
13147   ins_cost(125);
13148   format %{ "xorl    $dst, $src\t# int" %}
13149   ins_encode %{
13150     __ xorl($dst$$Address, $src$$constant);
13151   %}
13152   ins_pipe(ialu_mem_imm);
13153 %}
13154 
13155 
13156 // Long Logical Instructions
13157 
13158 // And Instructions
13159 // And Register with Register
13160 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13161 %{
13162   predicate(!UseAPX);
13163   match(Set dst (AndL dst src));
13164   effect(KILL cr);
13165   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13166 
13167   format %{ "andq    $dst, $src\t# long" %}
13168   ins_encode %{
13169     __ andq($dst$$Register, $src$$Register);
13170   %}
13171   ins_pipe(ialu_reg_reg);
13172 %}
13173 
13174 // And Register with Register using New Data Destination (NDD)
13175 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13176 %{
13177   predicate(UseAPX);
13178   match(Set dst (AndL src1 src2));
13179   effect(KILL cr);
13180   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13181 
13182   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13183   ins_encode %{
13184     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13185 
13186   %}
13187   ins_pipe(ialu_reg_reg);
13188 %}
13189 
13190 // And Register with Immediate 255
13191 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13192 %{
13193   match(Set dst (AndL src mask));
13194 
13195   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13196   ins_encode %{
13197     // movzbl zeroes out the upper 32-bit and does not need REX.W
13198     __ movzbl($dst$$Register, $src$$Register);
13199   %}
13200   ins_pipe(ialu_reg);
13201 %}
13202 
13203 // And Register with Immediate 65535
13204 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13205 %{
13206   match(Set dst (AndL src mask));
13207 
13208   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13209   ins_encode %{
13210     // movzwl zeroes out the upper 32-bit and does not need REX.W
13211     __ movzwl($dst$$Register, $src$$Register);
13212   %}
13213   ins_pipe(ialu_reg);
13214 %}
13215 
13216 // And Register with Immediate
13217 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13218 %{
13219   predicate(!UseAPX);
13220   match(Set dst (AndL dst src));
13221   effect(KILL cr);
13222   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13223 
13224   format %{ "andq    $dst, $src\t# long" %}
13225   ins_encode %{
13226     __ andq($dst$$Register, $src$$constant);
13227   %}
13228   ins_pipe(ialu_reg);
13229 %}
13230 
13231 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13232 %{
13233   predicate(UseAPX);
13234   match(Set dst (AndL src1 src2));
13235   effect(KILL cr);
13236   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13237 
13238   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13239   ins_encode %{
13240     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13241   %}
13242   ins_pipe(ialu_reg);
13243 %}
13244 
13245 // And Register with Memory
13246 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13247 %{
13248   match(Set dst (AndL dst (LoadL src)));
13249   effect(KILL cr);
13250   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13251 
13252   ins_cost(150);
13253   format %{ "andq    $dst, $src\t# long" %}
13254   ins_encode %{
13255     __ andq($dst$$Register, $src$$Address);
13256   %}
13257   ins_pipe(ialu_reg_mem);
13258 %}
13259 
13260 // And Memory with Register
13261 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13262 %{
13263   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13264   effect(KILL cr);
13265   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13266 
13267   ins_cost(150);
13268   format %{ "andq    $dst, $src\t# long" %}
13269   ins_encode %{
13270     __ andq($dst$$Address, $src$$Register);
13271   %}
13272   ins_pipe(ialu_mem_reg);
13273 %}
13274 
13275 // And Memory with Immediate
13276 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13277 %{
13278   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13279   effect(KILL cr);
13280   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13281 
13282   ins_cost(125);
13283   format %{ "andq    $dst, $src\t# long" %}
13284   ins_encode %{
13285     __ andq($dst$$Address, $src$$constant);
13286   %}
13287   ins_pipe(ialu_mem_imm);
13288 %}
13289 
13290 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13291 %{
13292   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13293   // because AND/OR works well enough for 8/32-bit values.
13294   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13295 
13296   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13297   effect(KILL cr);
13298 
13299   ins_cost(125);
13300   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13301   ins_encode %{
13302     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13303   %}
13304   ins_pipe(ialu_mem_imm);
13305 %}
13306 
13307 // BMI1 instructions
13308 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13309   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13310   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13311   effect(KILL cr);
13312   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13313 
13314   ins_cost(125);
13315   format %{ "andnq  $dst, $src1, $src2" %}
13316 
13317   ins_encode %{
13318     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13319   %}
13320   ins_pipe(ialu_reg_mem);
13321 %}
13322 
13323 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13324   match(Set dst (AndL (XorL src1 minus_1) src2));
13325   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13326   effect(KILL cr);
13327   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13328 
13329   format %{ "andnq  $dst, $src1, $src2" %}
13330 
13331   ins_encode %{
13332   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13333   %}
13334   ins_pipe(ialu_reg_mem);
13335 %}
13336 
13337 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13338   match(Set dst (AndL (SubL imm_zero src) src));
13339   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13340   effect(KILL cr);
13341   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13342 
13343   format %{ "blsiq  $dst, $src" %}
13344 
13345   ins_encode %{
13346     __ blsiq($dst$$Register, $src$$Register);
13347   %}
13348   ins_pipe(ialu_reg);
13349 %}
13350 
13351 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13352   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13353   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13354   effect(KILL cr);
13355   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13356 
13357   ins_cost(125);
13358   format %{ "blsiq  $dst, $src" %}
13359 
13360   ins_encode %{
13361     __ blsiq($dst$$Register, $src$$Address);
13362   %}
13363   ins_pipe(ialu_reg_mem);
13364 %}
13365 
13366 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13367 %{
13368   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13369   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13370   effect(KILL cr);
13371   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13372 
13373   ins_cost(125);
13374   format %{ "blsmskq $dst, $src" %}
13375 
13376   ins_encode %{
13377     __ blsmskq($dst$$Register, $src$$Address);
13378   %}
13379   ins_pipe(ialu_reg_mem);
13380 %}
13381 
13382 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13383 %{
13384   match(Set dst (XorL (AddL src minus_1) src));
13385   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13386   effect(KILL cr);
13387   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13388 
13389   format %{ "blsmskq $dst, $src" %}
13390 
13391   ins_encode %{
13392     __ blsmskq($dst$$Register, $src$$Register);
13393   %}
13394 
13395   ins_pipe(ialu_reg);
13396 %}
13397 
13398 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13399 %{
13400   match(Set dst (AndL (AddL src minus_1) src) );
13401   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13402   effect(KILL cr);
13403   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13404 
13405   format %{ "blsrq  $dst, $src" %}
13406 
13407   ins_encode %{
13408     __ blsrq($dst$$Register, $src$$Register);
13409   %}
13410 
13411   ins_pipe(ialu_reg);
13412 %}
13413 
13414 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13415 %{
13416   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13417   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13418   effect(KILL cr);
13419   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13420 
13421   ins_cost(125);
13422   format %{ "blsrq  $dst, $src" %}
13423 
13424   ins_encode %{
13425     __ blsrq($dst$$Register, $src$$Address);
13426   %}
13427 
13428   ins_pipe(ialu_reg);
13429 %}
13430 
13431 // Or Instructions
13432 // Or Register with Register
13433 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13434 %{
13435   predicate(!UseAPX);
13436   match(Set dst (OrL dst src));
13437   effect(KILL cr);
13438   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13439 
13440   format %{ "orq     $dst, $src\t# long" %}
13441   ins_encode %{
13442     __ orq($dst$$Register, $src$$Register);
13443   %}
13444   ins_pipe(ialu_reg_reg);
13445 %}
13446 
13447 // Or Register with Register using New Data Destination (NDD)
13448 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13449 %{
13450   predicate(UseAPX);
13451   match(Set dst (OrL src1 src2));
13452   effect(KILL cr);
13453   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13454 
13455   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13456   ins_encode %{
13457     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13458 
13459   %}
13460   ins_pipe(ialu_reg_reg);
13461 %}
13462 
13463 // Use any_RegP to match R15 (TLS register) without spilling.
13464 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13465   predicate(!UseAPX);
13466   match(Set dst (OrL dst (CastP2X src)));
13467   effect(KILL cr);
13468   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13469 
13470   format %{ "orq     $dst, $src\t# long" %}
13471   ins_encode %{
13472     __ orq($dst$$Register, $src$$Register);
13473   %}
13474   ins_pipe(ialu_reg_reg);
13475 %}
13476 
13477 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13478   predicate(UseAPX);
13479   match(Set dst (OrL src1 (CastP2X src2)));
13480   effect(KILL cr);
13481   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13482 
13483   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13484   ins_encode %{
13485     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13486   %}
13487   ins_pipe(ialu_reg_reg);
13488 %}
13489 
13490 // Or Register with Immediate
13491 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13492 %{
13493   predicate(!UseAPX);
13494   match(Set dst (OrL dst src));
13495   effect(KILL cr);
13496   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13497 
13498   format %{ "orq     $dst, $src\t# long" %}
13499   ins_encode %{
13500     __ orq($dst$$Register, $src$$constant);
13501   %}
13502   ins_pipe(ialu_reg);
13503 %}
13504 
13505 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13506 %{
13507   predicate(UseAPX);
13508   match(Set dst (OrL src1 src2));
13509   effect(KILL cr);
13510   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13511 
13512   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13513   ins_encode %{
13514     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13515   %}
13516   ins_pipe(ialu_reg);
13517 %}
13518 
13519 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13520 %{
13521   predicate(UseAPX);
13522   match(Set dst (OrL src1 src2));
13523   effect(KILL cr);
13524   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13525 
13526   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
13527   ins_encode %{
13528     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
13529   %}
13530   ins_pipe(ialu_reg);
13531 %}
13532 
13533 // Or Register with Memory
13534 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13535 %{
13536   match(Set dst (OrL dst (LoadL src)));
13537   effect(KILL cr);
13538   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13539 
13540   ins_cost(150);
13541   format %{ "orq     $dst, $src\t# long" %}
13542   ins_encode %{
13543     __ orq($dst$$Register, $src$$Address);
13544   %}
13545   ins_pipe(ialu_reg_mem);
13546 %}
13547 
13548 // Or Memory with Register
13549 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13550 %{
13551   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13552   effect(KILL cr);
13553   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13554 
13555   ins_cost(150);
13556   format %{ "orq     $dst, $src\t# long" %}
13557   ins_encode %{
13558     __ orq($dst$$Address, $src$$Register);
13559   %}
13560   ins_pipe(ialu_mem_reg);
13561 %}
13562 
13563 // Or Memory with Immediate
13564 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13565 %{
13566   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13567   effect(KILL cr);
13568   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13569 
13570   ins_cost(125);
13571   format %{ "orq     $dst, $src\t# long" %}
13572   ins_encode %{
13573     __ orq($dst$$Address, $src$$constant);
13574   %}
13575   ins_pipe(ialu_mem_imm);
13576 %}
13577 
13578 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
13579 %{
13580   // con should be a pure 64-bit power of 2 immediate
13581   // because AND/OR works well enough for 8/32-bit values.
13582   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
13583 
13584   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
13585   effect(KILL cr);
13586 
13587   ins_cost(125);
13588   format %{ "btsq    $dst, log2($con)\t# long" %}
13589   ins_encode %{
13590     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
13591   %}
13592   ins_pipe(ialu_mem_imm);
13593 %}
13594 
13595 // Xor Instructions
13596 // Xor Register with Register
13597 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13598 %{
13599   predicate(!UseAPX);
13600   match(Set dst (XorL dst src));
13601   effect(KILL cr);
13602   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13603 
13604   format %{ "xorq    $dst, $src\t# long" %}
13605   ins_encode %{
13606     __ xorq($dst$$Register, $src$$Register);
13607   %}
13608   ins_pipe(ialu_reg_reg);
13609 %}
13610 
13611 // Xor Register with Register using New Data Destination (NDD)
13612 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13613 %{
13614   predicate(UseAPX);
13615   match(Set dst (XorL src1 src2));
13616   effect(KILL cr);
13617   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13618 
13619   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
13620   ins_encode %{
13621     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13622   %}
13623   ins_pipe(ialu_reg_reg);
13624 %}
13625 
13626 // Xor Register with Immediate -1
13627 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
13628 %{
13629   predicate(!UseAPX);
13630   match(Set dst (XorL dst imm));
13631 
13632   format %{ "notq   $dst" %}
13633   ins_encode %{
13634      __ notq($dst$$Register);
13635   %}
13636   ins_pipe(ialu_reg);
13637 %}
13638 
13639 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
13640 %{
13641   predicate(UseAPX);
13642   match(Set dst (XorL src imm));
13643   flag(PD::Flag_ndd_demotable_opr1);
13644 
13645   format %{ "enotq   $dst, $src" %}
13646   ins_encode %{
13647     __ enotq($dst$$Register, $src$$Register);
13648   %}
13649   ins_pipe(ialu_reg);
13650 %}
13651 
13652 // Xor Register with Immediate
13653 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13654 %{
13655   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
13656   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
13657   match(Set dst (XorL dst src));
13658   effect(KILL cr);
13659   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13660 
13661   format %{ "xorq    $dst, $src\t# long" %}
13662   ins_encode %{
13663     __ xorq($dst$$Register, $src$$constant);
13664   %}
13665   ins_pipe(ialu_reg);
13666 %}
13667 
13668 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13669 %{
13670   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
13671   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
13672   match(Set dst (XorL src1 src2));
13673   effect(KILL cr);
13674   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13675 
13676   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
13677   ins_encode %{
13678     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13679   %}
13680   ins_pipe(ialu_reg);
13681 %}
13682 
13683 // Xor Register with Memory
13684 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13685 %{
13686   match(Set dst (XorL dst (LoadL src)));
13687   effect(KILL cr);
13688   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13689 
13690   ins_cost(150);
13691   format %{ "xorq    $dst, $src\t# long" %}
13692   ins_encode %{
13693     __ xorq($dst$$Register, $src$$Address);
13694   %}
13695   ins_pipe(ialu_reg_mem);
13696 %}
13697 
13698 // Xor Memory with Register
13699 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13700 %{
13701   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
13702   effect(KILL cr);
13703   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13704 
13705   ins_cost(150);
13706   format %{ "xorq    $dst, $src\t# long" %}
13707   ins_encode %{
13708     __ xorq($dst$$Address, $src$$Register);
13709   %}
13710   ins_pipe(ialu_mem_reg);
13711 %}
13712 
13713 // Xor Memory with Immediate
13714 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13715 %{
13716   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
13717   effect(KILL cr);
13718   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13719 
13720   ins_cost(125);
13721   format %{ "xorq    $dst, $src\t# long" %}
13722   ins_encode %{
13723     __ xorq($dst$$Address, $src$$constant);
13724   %}
13725   ins_pipe(ialu_mem_imm);
13726 %}
13727 
13728 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
13729 %{
13730   match(Set dst (CmpLTMask p q));
13731   effect(KILL cr);
13732 
13733   ins_cost(400);
13734   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
13735             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
13736             "negl    $dst" %}
13737   ins_encode %{
13738     __ cmpl($p$$Register, $q$$Register);
13739     __ setcc(Assembler::less, $dst$$Register);
13740     __ negl($dst$$Register);
13741   %}
13742   ins_pipe(pipe_slow);
13743 %}
13744 
13745 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
13746 %{
13747   match(Set dst (CmpLTMask dst zero));
13748   effect(KILL cr);
13749 
13750   ins_cost(100);
13751   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
13752   ins_encode %{
13753     __ sarl($dst$$Register, 31);
13754   %}
13755   ins_pipe(ialu_reg);
13756 %}
13757 
13758 /* Better to save a register than avoid a branch */
13759 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
13760 %{
13761   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
13762   effect(KILL cr);
13763   ins_cost(300);
13764   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
13765             "jge     done\n\t"
13766             "addl    $p,$y\n"
13767             "done:   " %}
13768   ins_encode %{
13769     Register Rp = $p$$Register;
13770     Register Rq = $q$$Register;
13771     Register Ry = $y$$Register;
13772     Label done;
13773     __ subl(Rp, Rq);
13774     __ jccb(Assembler::greaterEqual, done);
13775     __ addl(Rp, Ry);
13776     __ bind(done);
13777   %}
13778   ins_pipe(pipe_cmplt);
13779 %}
13780 
13781 /* Better to save a register than avoid a branch */
13782 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
13783 %{
13784   match(Set y (AndI (CmpLTMask p q) y));
13785   effect(KILL cr);
13786 
13787   ins_cost(300);
13788 
13789   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
13790             "jlt     done\n\t"
13791             "xorl    $y, $y\n"
13792             "done:   " %}
13793   ins_encode %{
13794     Register Rp = $p$$Register;
13795     Register Rq = $q$$Register;
13796     Register Ry = $y$$Register;
13797     Label done;
13798     __ cmpl(Rp, Rq);
13799     __ jccb(Assembler::less, done);
13800     __ xorl(Ry, Ry);
13801     __ bind(done);
13802   %}
13803   ins_pipe(pipe_cmplt);
13804 %}
13805 
13806 
13807 //---------- FP Instructions------------------------------------------------
13808 
13809 // Really expensive, avoid
13810 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
13811 %{
13812   match(Set cr (CmpF src1 src2));
13813 
13814   ins_cost(500);
13815   format %{ "ucomiss $src1, $src2\n\t"
13816             "jnp,s   exit\n\t"
13817             "pushfq\t# saw NaN, set CF\n\t"
13818             "andq    [rsp], #0xffffff2b\n\t"
13819             "popfq\n"
13820     "exit:" %}
13821   ins_encode %{
13822     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
13823     emit_cmpfp_fixup(masm);
13824   %}
13825   ins_pipe(pipe_slow);
13826 %}
13827 
13828 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
13829   match(Set cr (CmpF src1 src2));
13830 
13831   ins_cost(100);
13832   format %{ "ucomiss $src1, $src2" %}
13833   ins_encode %{
13834     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
13835   %}
13836   ins_pipe(pipe_slow);
13837 %}
13838 
13839 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
13840   match(Set cr (CmpF src1 src2));
13841 
13842   ins_cost(100);
13843   format %{ "evucomxss $src1, $src2" %}
13844   ins_encode %{
13845     __ evucomxss($src1$$XMMRegister, $src2$$XMMRegister);
13846   %}
13847   ins_pipe(pipe_slow);
13848 %}
13849 
13850 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
13851   match(Set cr (CmpF src1 (LoadF src2)));
13852 
13853   ins_cost(100);
13854   format %{ "ucomiss $src1, $src2" %}
13855   ins_encode %{
13856     __ ucomiss($src1$$XMMRegister, $src2$$Address);
13857   %}
13858   ins_pipe(pipe_slow);
13859 %}
13860 
13861 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
13862   match(Set cr (CmpF src1 (LoadF src2)));
13863 
13864   ins_cost(100);
13865   format %{ "evucomxss $src1, $src2" %}
13866   ins_encode %{
13867     __ evucomxss($src1$$XMMRegister, $src2$$Address);
13868   %}
13869   ins_pipe(pipe_slow);
13870 %}
13871 
13872 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
13873   match(Set cr (CmpF src con));
13874 
13875   ins_cost(100);
13876   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
13877   ins_encode %{
13878     __ ucomiss($src$$XMMRegister, $constantaddress($con));
13879   %}
13880   ins_pipe(pipe_slow);
13881 %}
13882 
13883 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
13884   match(Set cr (CmpF src con));
13885 
13886   ins_cost(100);
13887   format %{ "evucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
13888   ins_encode %{
13889     __ evucomxss($src$$XMMRegister, $constantaddress($con));
13890   %}
13891   ins_pipe(pipe_slow);
13892 %}
13893 
13894 // Really expensive, avoid
13895 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
13896 %{
13897   match(Set cr (CmpD src1 src2));
13898 
13899   ins_cost(500);
13900   format %{ "ucomisd $src1, $src2\n\t"
13901             "jnp,s   exit\n\t"
13902             "pushfq\t# saw NaN, set CF\n\t"
13903             "andq    [rsp], #0xffffff2b\n\t"
13904             "popfq\n"
13905     "exit:" %}
13906   ins_encode %{
13907     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
13908     emit_cmpfp_fixup(masm);
13909   %}
13910   ins_pipe(pipe_slow);
13911 %}
13912 
13913 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
13914   match(Set cr (CmpD src1 src2));
13915 
13916   ins_cost(100);
13917   format %{ "ucomisd $src1, $src2 test" %}
13918   ins_encode %{
13919     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
13920   %}
13921   ins_pipe(pipe_slow);
13922 %}
13923 
13924 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
13925   match(Set cr (CmpD src1 src2));
13926 
13927   ins_cost(100);
13928   format %{ "evucomxsd $src1, $src2 test" %}
13929   ins_encode %{
13930     __ evucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
13931   %}
13932   ins_pipe(pipe_slow);
13933 %}
13934 
13935 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
13936   match(Set cr (CmpD src1 (LoadD src2)));
13937 
13938   ins_cost(100);
13939   format %{ "ucomisd $src1, $src2" %}
13940   ins_encode %{
13941     __ ucomisd($src1$$XMMRegister, $src2$$Address);
13942   %}
13943   ins_pipe(pipe_slow);
13944 %}
13945 
13946 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
13947   match(Set cr (CmpD src1 (LoadD src2)));
13948 
13949   ins_cost(100);
13950   format %{ "evucomxsd $src1, $src2" %}
13951   ins_encode %{
13952     __ evucomxsd($src1$$XMMRegister, $src2$$Address);
13953   %}
13954   ins_pipe(pipe_slow);
13955 %}
13956 
13957 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
13958   match(Set cr (CmpD src con));
13959   ins_cost(100);
13960   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
13961   ins_encode %{
13962     __ ucomisd($src$$XMMRegister, $constantaddress($con));
13963   %}
13964   ins_pipe(pipe_slow);
13965 %}
13966 
13967 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
13968   match(Set cr (CmpD src con));
13969 
13970   ins_cost(100);
13971   format %{ "evucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
13972   ins_encode %{
13973     __ evucomxsd($src$$XMMRegister, $constantaddress($con));
13974   %}
13975   ins_pipe(pipe_slow);
13976 %}
13977 
13978 // Compare into -1,0,1
13979 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
13980 %{
13981   match(Set dst (CmpF3 src1 src2));
13982   effect(KILL cr);
13983 
13984   ins_cost(275);
13985   format %{ "ucomiss $src1, $src2\n\t"
13986             "movl    $dst, #-1\n\t"
13987             "jp,s    done\n\t"
13988             "jb,s    done\n\t"
13989             "setne   $dst\n\t"
13990             "movzbl  $dst, $dst\n"
13991     "done:" %}
13992   ins_encode %{
13993     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
13994     emit_cmpfp3(masm, $dst$$Register);
13995   %}
13996   ins_pipe(pipe_slow);
13997 %}
13998 
13999 // Compare into -1,0,1
14000 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14001 %{
14002   match(Set dst (CmpF3 src1 (LoadF src2)));
14003   effect(KILL cr);
14004 
14005   ins_cost(275);
14006   format %{ "ucomiss $src1, $src2\n\t"
14007             "movl    $dst, #-1\n\t"
14008             "jp,s    done\n\t"
14009             "jb,s    done\n\t"
14010             "setne   $dst\n\t"
14011             "movzbl  $dst, $dst\n"
14012     "done:" %}
14013   ins_encode %{
14014     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14015     emit_cmpfp3(masm, $dst$$Register);
14016   %}
14017   ins_pipe(pipe_slow);
14018 %}
14019 
14020 // Compare into -1,0,1
14021 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14022   match(Set dst (CmpF3 src con));
14023   effect(KILL cr);
14024 
14025   ins_cost(275);
14026   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14027             "movl    $dst, #-1\n\t"
14028             "jp,s    done\n\t"
14029             "jb,s    done\n\t"
14030             "setne   $dst\n\t"
14031             "movzbl  $dst, $dst\n"
14032     "done:" %}
14033   ins_encode %{
14034     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14035     emit_cmpfp3(masm, $dst$$Register);
14036   %}
14037   ins_pipe(pipe_slow);
14038 %}
14039 
14040 // Compare into -1,0,1
14041 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14042 %{
14043   match(Set dst (CmpD3 src1 src2));
14044   effect(KILL cr);
14045 
14046   ins_cost(275);
14047   format %{ "ucomisd $src1, $src2\n\t"
14048             "movl    $dst, #-1\n\t"
14049             "jp,s    done\n\t"
14050             "jb,s    done\n\t"
14051             "setne   $dst\n\t"
14052             "movzbl  $dst, $dst\n"
14053     "done:" %}
14054   ins_encode %{
14055     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14056     emit_cmpfp3(masm, $dst$$Register);
14057   %}
14058   ins_pipe(pipe_slow);
14059 %}
14060 
14061 // Compare into -1,0,1
14062 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14063 %{
14064   match(Set dst (CmpD3 src1 (LoadD src2)));
14065   effect(KILL cr);
14066 
14067   ins_cost(275);
14068   format %{ "ucomisd $src1, $src2\n\t"
14069             "movl    $dst, #-1\n\t"
14070             "jp,s    done\n\t"
14071             "jb,s    done\n\t"
14072             "setne   $dst\n\t"
14073             "movzbl  $dst, $dst\n"
14074     "done:" %}
14075   ins_encode %{
14076     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14077     emit_cmpfp3(masm, $dst$$Register);
14078   %}
14079   ins_pipe(pipe_slow);
14080 %}
14081 
14082 // Compare into -1,0,1
14083 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14084   match(Set dst (CmpD3 src con));
14085   effect(KILL cr);
14086 
14087   ins_cost(275);
14088   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14089             "movl    $dst, #-1\n\t"
14090             "jp,s    done\n\t"
14091             "jb,s    done\n\t"
14092             "setne   $dst\n\t"
14093             "movzbl  $dst, $dst\n"
14094     "done:" %}
14095   ins_encode %{
14096     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14097     emit_cmpfp3(masm, $dst$$Register);
14098   %}
14099   ins_pipe(pipe_slow);
14100 %}
14101 
14102 //----------Arithmetic Conversion Instructions---------------------------------
14103 
14104 instruct convF2D_reg_reg(regD dst, regF src)
14105 %{
14106   match(Set dst (ConvF2D src));
14107 
14108   format %{ "cvtss2sd $dst, $src" %}
14109   ins_encode %{
14110     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14111   %}
14112   ins_pipe(pipe_slow); // XXX
14113 %}
14114 
14115 instruct convF2D_reg_mem(regD dst, memory src)
14116 %{
14117   predicate(UseAVX == 0);
14118   match(Set dst (ConvF2D (LoadF src)));
14119 
14120   format %{ "cvtss2sd $dst, $src" %}
14121   ins_encode %{
14122     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14123   %}
14124   ins_pipe(pipe_slow); // XXX
14125 %}
14126 
14127 instruct convD2F_reg_reg(regF dst, regD src)
14128 %{
14129   match(Set dst (ConvD2F src));
14130 
14131   format %{ "cvtsd2ss $dst, $src" %}
14132   ins_encode %{
14133     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14134   %}
14135   ins_pipe(pipe_slow); // XXX
14136 %}
14137 
14138 instruct convD2F_reg_mem(regF dst, memory src)
14139 %{
14140   predicate(UseAVX == 0);
14141   match(Set dst (ConvD2F (LoadD src)));
14142 
14143   format %{ "cvtsd2ss $dst, $src" %}
14144   ins_encode %{
14145     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14146   %}
14147   ins_pipe(pipe_slow); // XXX
14148 %}
14149 
14150 // XXX do mem variants
14151 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14152 %{
14153   predicate(!VM_Version::supports_avx10_2());
14154   match(Set dst (ConvF2I src));
14155   effect(KILL cr);
14156   format %{ "convert_f2i $dst, $src" %}
14157   ins_encode %{
14158     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14159   %}
14160   ins_pipe(pipe_slow);
14161 %}
14162 
14163 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14164 %{
14165   predicate(VM_Version::supports_avx10_2());
14166   match(Set dst (ConvF2I src));
14167   format %{ "evcvttss2sisl $dst, $src" %}
14168   ins_encode %{
14169     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14170   %}
14171   ins_pipe(pipe_slow);
14172 %}
14173 
14174 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14175 %{
14176   predicate(VM_Version::supports_avx10_2());
14177   match(Set dst (ConvF2I (LoadF src)));
14178   format %{ "evcvttss2sisl $dst, $src" %}
14179   ins_encode %{
14180     __ evcvttss2sisl($dst$$Register, $src$$Address);
14181   %}
14182   ins_pipe(pipe_slow);
14183 %}
14184 
14185 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14186 %{
14187   predicate(!VM_Version::supports_avx10_2());
14188   match(Set dst (ConvF2L src));
14189   effect(KILL cr);
14190   format %{ "convert_f2l $dst, $src"%}
14191   ins_encode %{
14192     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14193   %}
14194   ins_pipe(pipe_slow);
14195 %}
14196 
14197 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14198 %{
14199   predicate(VM_Version::supports_avx10_2());
14200   match(Set dst (ConvF2L src));
14201   format %{ "evcvttss2sisq $dst, $src" %}
14202   ins_encode %{
14203     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14204   %}
14205   ins_pipe(pipe_slow);
14206 %}
14207 
14208 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14209 %{
14210   predicate(VM_Version::supports_avx10_2());
14211   match(Set dst (ConvF2L (LoadF src)));
14212   format %{ "evcvttss2sisq $dst, $src" %}
14213   ins_encode %{
14214     __ evcvttss2sisq($dst$$Register, $src$$Address);
14215   %}
14216   ins_pipe(pipe_slow);
14217 %}
14218 
14219 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14220 %{
14221   predicate(!VM_Version::supports_avx10_2());
14222   match(Set dst (ConvD2I src));
14223   effect(KILL cr);
14224   format %{ "convert_d2i $dst, $src"%}
14225   ins_encode %{
14226     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14227   %}
14228   ins_pipe(pipe_slow);
14229 %}
14230 
14231 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14232 %{
14233   predicate(VM_Version::supports_avx10_2());
14234   match(Set dst (ConvD2I src));
14235   format %{ "evcvttsd2sisl $dst, $src" %}
14236   ins_encode %{
14237     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14238   %}
14239   ins_pipe(pipe_slow);
14240 %}
14241 
14242 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14243 %{
14244   predicate(VM_Version::supports_avx10_2());
14245   match(Set dst (ConvD2I (LoadD src)));
14246   format %{ "evcvttsd2sisl $dst, $src" %}
14247   ins_encode %{
14248     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14249   %}
14250   ins_pipe(pipe_slow);
14251 %}
14252 
14253 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14254 %{
14255   predicate(!VM_Version::supports_avx10_2());
14256   match(Set dst (ConvD2L src));
14257   effect(KILL cr);
14258   format %{ "convert_d2l $dst, $src"%}
14259   ins_encode %{
14260     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14261   %}
14262   ins_pipe(pipe_slow);
14263 %}
14264 
14265 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14266 %{
14267   predicate(VM_Version::supports_avx10_2());
14268   match(Set dst (ConvD2L src));
14269   format %{ "evcvttsd2sisq $dst, $src" %}
14270   ins_encode %{
14271     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14272   %}
14273   ins_pipe(pipe_slow);
14274 %}
14275 
14276 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14277 %{
14278   predicate(VM_Version::supports_avx10_2());
14279   match(Set dst (ConvD2L (LoadD src)));
14280   format %{ "evcvttsd2sisq $dst, $src" %}
14281   ins_encode %{
14282     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14283   %}
14284   ins_pipe(pipe_slow);
14285 %}
14286 
14287 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14288 %{
14289   match(Set dst (RoundD src));
14290   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14291   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14292   ins_encode %{
14293     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14294   %}
14295   ins_pipe(pipe_slow);
14296 %}
14297 
14298 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14299 %{
14300   match(Set dst (RoundF src));
14301   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14302   format %{ "round_float $dst,$src" %}
14303   ins_encode %{
14304     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14305   %}
14306   ins_pipe(pipe_slow);
14307 %}
14308 
14309 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14310 %{
14311   predicate(!UseXmmI2F);
14312   match(Set dst (ConvI2F src));
14313 
14314   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14315   ins_encode %{
14316     if (UseAVX > 0) {
14317       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14318     }
14319     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14320   %}
14321   ins_pipe(pipe_slow); // XXX
14322 %}
14323 
14324 instruct convI2F_reg_mem(regF dst, memory src)
14325 %{
14326   predicate(UseAVX == 0);
14327   match(Set dst (ConvI2F (LoadI src)));
14328 
14329   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14330   ins_encode %{
14331     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14332   %}
14333   ins_pipe(pipe_slow); // XXX
14334 %}
14335 
14336 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14337 %{
14338   predicate(!UseXmmI2D);
14339   match(Set dst (ConvI2D src));
14340 
14341   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14342   ins_encode %{
14343     if (UseAVX > 0) {
14344       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14345     }
14346     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14347   %}
14348   ins_pipe(pipe_slow); // XXX
14349 %}
14350 
14351 instruct convI2D_reg_mem(regD dst, memory src)
14352 %{
14353   predicate(UseAVX == 0);
14354   match(Set dst (ConvI2D (LoadI src)));
14355 
14356   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14357   ins_encode %{
14358     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14359   %}
14360   ins_pipe(pipe_slow); // XXX
14361 %}
14362 
14363 instruct convXI2F_reg(regF dst, rRegI src)
14364 %{
14365   predicate(UseXmmI2F);
14366   match(Set dst (ConvI2F src));
14367 
14368   format %{ "movdl $dst, $src\n\t"
14369             "cvtdq2psl $dst, $dst\t# i2f" %}
14370   ins_encode %{
14371     __ movdl($dst$$XMMRegister, $src$$Register);
14372     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14373   %}
14374   ins_pipe(pipe_slow); // XXX
14375 %}
14376 
14377 instruct convXI2D_reg(regD dst, rRegI src)
14378 %{
14379   predicate(UseXmmI2D);
14380   match(Set dst (ConvI2D src));
14381 
14382   format %{ "movdl $dst, $src\n\t"
14383             "cvtdq2pdl $dst, $dst\t# i2d" %}
14384   ins_encode %{
14385     __ movdl($dst$$XMMRegister, $src$$Register);
14386     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14387   %}
14388   ins_pipe(pipe_slow); // XXX
14389 %}
14390 
14391 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14392 %{
14393   match(Set dst (ConvL2F src));
14394 
14395   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14396   ins_encode %{
14397     if (UseAVX > 0) {
14398       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14399     }
14400     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14401   %}
14402   ins_pipe(pipe_slow); // XXX
14403 %}
14404 
14405 instruct convL2F_reg_mem(regF dst, memory src)
14406 %{
14407   predicate(UseAVX == 0);
14408   match(Set dst (ConvL2F (LoadL src)));
14409 
14410   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14411   ins_encode %{
14412     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14413   %}
14414   ins_pipe(pipe_slow); // XXX
14415 %}
14416 
14417 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14418 %{
14419   match(Set dst (ConvL2D src));
14420 
14421   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14422   ins_encode %{
14423     if (UseAVX > 0) {
14424       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14425     }
14426     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14427   %}
14428   ins_pipe(pipe_slow); // XXX
14429 %}
14430 
14431 instruct convL2D_reg_mem(regD dst, memory src)
14432 %{
14433   predicate(UseAVX == 0);
14434   match(Set dst (ConvL2D (LoadL src)));
14435 
14436   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14437   ins_encode %{
14438     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14439   %}
14440   ins_pipe(pipe_slow); // XXX
14441 %}
14442 
14443 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14444 %{
14445   match(Set dst (ConvI2L src));
14446 
14447   ins_cost(125);
14448   format %{ "movslq  $dst, $src\t# i2l" %}
14449   ins_encode %{
14450     __ movslq($dst$$Register, $src$$Register);
14451   %}
14452   ins_pipe(ialu_reg_reg);
14453 %}
14454 
14455 // Zero-extend convert int to long
14456 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14457 %{
14458   match(Set dst (AndL (ConvI2L src) mask));
14459 
14460   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14461   ins_encode %{
14462     if ($dst$$reg != $src$$reg) {
14463       __ movl($dst$$Register, $src$$Register);
14464     }
14465   %}
14466   ins_pipe(ialu_reg_reg);
14467 %}
14468 
14469 // Zero-extend convert int to long
14470 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14471 %{
14472   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14473 
14474   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14475   ins_encode %{
14476     __ movl($dst$$Register, $src$$Address);
14477   %}
14478   ins_pipe(ialu_reg_mem);
14479 %}
14480 
14481 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14482 %{
14483   match(Set dst (AndL src mask));
14484 
14485   format %{ "movl    $dst, $src\t# zero-extend long" %}
14486   ins_encode %{
14487     __ movl($dst$$Register, $src$$Register);
14488   %}
14489   ins_pipe(ialu_reg_reg);
14490 %}
14491 
14492 instruct convL2I_reg_reg(rRegI dst, rRegL src)
14493 %{
14494   match(Set dst (ConvL2I src));
14495 
14496   format %{ "movl    $dst, $src\t# l2i" %}
14497   ins_encode %{
14498     __ movl($dst$$Register, $src$$Register);
14499   %}
14500   ins_pipe(ialu_reg_reg);
14501 %}
14502 
14503 
14504 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
14505   match(Set dst (MoveF2I src));
14506   effect(DEF dst, USE src);
14507 
14508   ins_cost(125);
14509   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
14510   ins_encode %{
14511     __ movl($dst$$Register, Address(rsp, $src$$disp));
14512   %}
14513   ins_pipe(ialu_reg_mem);
14514 %}
14515 
14516 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
14517   match(Set dst (MoveI2F src));
14518   effect(DEF dst, USE src);
14519 
14520   ins_cost(125);
14521   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
14522   ins_encode %{
14523     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
14524   %}
14525   ins_pipe(pipe_slow);
14526 %}
14527 
14528 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
14529   match(Set dst (MoveD2L src));
14530   effect(DEF dst, USE src);
14531 
14532   ins_cost(125);
14533   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
14534   ins_encode %{
14535     __ movq($dst$$Register, Address(rsp, $src$$disp));
14536   %}
14537   ins_pipe(ialu_reg_mem);
14538 %}
14539 
14540 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
14541   predicate(!UseXmmLoadAndClearUpper);
14542   match(Set dst (MoveL2D src));
14543   effect(DEF dst, USE src);
14544 
14545   ins_cost(125);
14546   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
14547   ins_encode %{
14548     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14549   %}
14550   ins_pipe(pipe_slow);
14551 %}
14552 
14553 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
14554   predicate(UseXmmLoadAndClearUpper);
14555   match(Set dst (MoveL2D src));
14556   effect(DEF dst, USE src);
14557 
14558   ins_cost(125);
14559   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
14560   ins_encode %{
14561     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14562   %}
14563   ins_pipe(pipe_slow);
14564 %}
14565 
14566 
14567 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
14568   match(Set dst (MoveF2I src));
14569   effect(DEF dst, USE src);
14570 
14571   ins_cost(95); // XXX
14572   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
14573   ins_encode %{
14574     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
14575   %}
14576   ins_pipe(pipe_slow);
14577 %}
14578 
14579 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
14580   match(Set dst (MoveI2F src));
14581   effect(DEF dst, USE src);
14582 
14583   ins_cost(100);
14584   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
14585   ins_encode %{
14586     __ movl(Address(rsp, $dst$$disp), $src$$Register);
14587   %}
14588   ins_pipe( ialu_mem_reg );
14589 %}
14590 
14591 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
14592   match(Set dst (MoveD2L src));
14593   effect(DEF dst, USE src);
14594 
14595   ins_cost(95); // XXX
14596   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
14597   ins_encode %{
14598     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
14599   %}
14600   ins_pipe(pipe_slow);
14601 %}
14602 
14603 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
14604   match(Set dst (MoveL2D src));
14605   effect(DEF dst, USE src);
14606 
14607   ins_cost(100);
14608   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
14609   ins_encode %{
14610     __ movq(Address(rsp, $dst$$disp), $src$$Register);
14611   %}
14612   ins_pipe(ialu_mem_reg);
14613 %}
14614 
14615 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
14616   match(Set dst (MoveF2I src));
14617   effect(DEF dst, USE src);
14618   ins_cost(85);
14619   format %{ "movd    $dst,$src\t# MoveF2I" %}
14620   ins_encode %{
14621     __ movdl($dst$$Register, $src$$XMMRegister);
14622   %}
14623   ins_pipe( pipe_slow );
14624 %}
14625 
14626 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
14627   match(Set dst (MoveD2L src));
14628   effect(DEF dst, USE src);
14629   ins_cost(85);
14630   format %{ "movd    $dst,$src\t# MoveD2L" %}
14631   ins_encode %{
14632     __ movdq($dst$$Register, $src$$XMMRegister);
14633   %}
14634   ins_pipe( pipe_slow );
14635 %}
14636 
14637 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
14638   match(Set dst (MoveI2F src));
14639   effect(DEF dst, USE src);
14640   ins_cost(100);
14641   format %{ "movd    $dst,$src\t# MoveI2F" %}
14642   ins_encode %{
14643     __ movdl($dst$$XMMRegister, $src$$Register);
14644   %}
14645   ins_pipe( pipe_slow );
14646 %}
14647 
14648 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
14649   match(Set dst (MoveL2D src));
14650   effect(DEF dst, USE src);
14651   ins_cost(100);
14652   format %{ "movd    $dst,$src\t# MoveL2D" %}
14653   ins_encode %{
14654      __ movdq($dst$$XMMRegister, $src$$Register);
14655   %}
14656   ins_pipe( pipe_slow );
14657 %}
14658 
14659 // Fast clearing of an array
14660 // Small non-constant lenght ClearArray for non-AVX512 targets.
14661 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
14662                   Universe dummy, rFlagsReg cr)
14663 %{
14664   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
14665   match(Set dummy (ClearArray cnt base));
14666   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
14667 
14668   format %{ $$template
14669     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14670     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
14671     $$emit$$"jg      LARGE\n\t"
14672     $$emit$$"dec     rcx\n\t"
14673     $$emit$$"js      DONE\t# Zero length\n\t"
14674     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
14675     $$emit$$"dec     rcx\n\t"
14676     $$emit$$"jge     LOOP\n\t"
14677     $$emit$$"jmp     DONE\n\t"
14678     $$emit$$"# LARGE:\n\t"
14679     if (UseFastStosb) {
14680        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
14681        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
14682     } else if (UseXMMForObjInit) {
14683        $$emit$$"mov     rdi,rax\n\t"
14684        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
14685        $$emit$$"jmpq    L_zero_64_bytes\n\t"
14686        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14687        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14688        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14689        $$emit$$"add     0x40,rax\n\t"
14690        $$emit$$"# L_zero_64_bytes:\n\t"
14691        $$emit$$"sub     0x8,rcx\n\t"
14692        $$emit$$"jge     L_loop\n\t"
14693        $$emit$$"add     0x4,rcx\n\t"
14694        $$emit$$"jl      L_tail\n\t"
14695        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14696        $$emit$$"add     0x20,rax\n\t"
14697        $$emit$$"sub     0x4,rcx\n\t"
14698        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14699        $$emit$$"add     0x4,rcx\n\t"
14700        $$emit$$"jle     L_end\n\t"
14701        $$emit$$"dec     rcx\n\t"
14702        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14703        $$emit$$"vmovq   xmm0,(rax)\n\t"
14704        $$emit$$"add     0x8,rax\n\t"
14705        $$emit$$"dec     rcx\n\t"
14706        $$emit$$"jge     L_sloop\n\t"
14707        $$emit$$"# L_end:\n\t"
14708     } else {
14709        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
14710     }
14711     $$emit$$"# DONE"
14712   %}
14713   ins_encode %{
14714     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14715                  $tmp$$XMMRegister, false, knoreg);
14716   %}
14717   ins_pipe(pipe_slow);
14718 %}
14719 
14720 // Small non-constant length ClearArray for AVX512 targets.
14721 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
14722                        Universe dummy, rFlagsReg cr)
14723 %{
14724   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
14725   match(Set dummy (ClearArray cnt base));
14726   ins_cost(125);
14727   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
14728 
14729   format %{ $$template
14730     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14731     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
14732     $$emit$$"jg      LARGE\n\t"
14733     $$emit$$"dec     rcx\n\t"
14734     $$emit$$"js      DONE\t# Zero length\n\t"
14735     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
14736     $$emit$$"dec     rcx\n\t"
14737     $$emit$$"jge     LOOP\n\t"
14738     $$emit$$"jmp     DONE\n\t"
14739     $$emit$$"# LARGE:\n\t"
14740     if (UseFastStosb) {
14741        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
14742        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
14743     } else if (UseXMMForObjInit) {
14744        $$emit$$"mov     rdi,rax\n\t"
14745        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
14746        $$emit$$"jmpq    L_zero_64_bytes\n\t"
14747        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14748        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14749        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14750        $$emit$$"add     0x40,rax\n\t"
14751        $$emit$$"# L_zero_64_bytes:\n\t"
14752        $$emit$$"sub     0x8,rcx\n\t"
14753        $$emit$$"jge     L_loop\n\t"
14754        $$emit$$"add     0x4,rcx\n\t"
14755        $$emit$$"jl      L_tail\n\t"
14756        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14757        $$emit$$"add     0x20,rax\n\t"
14758        $$emit$$"sub     0x4,rcx\n\t"
14759        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14760        $$emit$$"add     0x4,rcx\n\t"
14761        $$emit$$"jle     L_end\n\t"
14762        $$emit$$"dec     rcx\n\t"
14763        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14764        $$emit$$"vmovq   xmm0,(rax)\n\t"
14765        $$emit$$"add     0x8,rax\n\t"
14766        $$emit$$"dec     rcx\n\t"
14767        $$emit$$"jge     L_sloop\n\t"
14768        $$emit$$"# L_end:\n\t"
14769     } else {
14770        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
14771     }
14772     $$emit$$"# DONE"
14773   %}
14774   ins_encode %{
14775     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14776                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
14777   %}
14778   ins_pipe(pipe_slow);
14779 %}
14780 
14781 // Large non-constant length ClearArray for non-AVX512 targets.
14782 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
14783                         Universe dummy, rFlagsReg cr)
14784 %{
14785   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
14786   match(Set dummy (ClearArray cnt base));
14787   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
14788 
14789   format %{ $$template
14790     if (UseFastStosb) {
14791        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14792        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
14793        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
14794     } else if (UseXMMForObjInit) {
14795        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
14796        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
14797        $$emit$$"jmpq    L_zero_64_bytes\n\t"
14798        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14799        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14800        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14801        $$emit$$"add     0x40,rax\n\t"
14802        $$emit$$"# L_zero_64_bytes:\n\t"
14803        $$emit$$"sub     0x8,rcx\n\t"
14804        $$emit$$"jge     L_loop\n\t"
14805        $$emit$$"add     0x4,rcx\n\t"
14806        $$emit$$"jl      L_tail\n\t"
14807        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14808        $$emit$$"add     0x20,rax\n\t"
14809        $$emit$$"sub     0x4,rcx\n\t"
14810        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14811        $$emit$$"add     0x4,rcx\n\t"
14812        $$emit$$"jle     L_end\n\t"
14813        $$emit$$"dec     rcx\n\t"
14814        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14815        $$emit$$"vmovq   xmm0,(rax)\n\t"
14816        $$emit$$"add     0x8,rax\n\t"
14817        $$emit$$"dec     rcx\n\t"
14818        $$emit$$"jge     L_sloop\n\t"
14819        $$emit$$"# L_end:\n\t"
14820     } else {
14821        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14822        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
14823     }
14824   %}
14825   ins_encode %{
14826     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14827                  $tmp$$XMMRegister, true, knoreg);
14828   %}
14829   ins_pipe(pipe_slow);
14830 %}
14831 
14832 // Large non-constant length ClearArray for AVX512 targets.
14833 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
14834                              Universe dummy, rFlagsReg cr)
14835 %{
14836   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
14837   match(Set dummy (ClearArray cnt base));
14838   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
14839 
14840   format %{ $$template
14841     if (UseFastStosb) {
14842        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14843        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
14844        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
14845     } else if (UseXMMForObjInit) {
14846        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
14847        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
14848        $$emit$$"jmpq    L_zero_64_bytes\n\t"
14849        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14850        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14851        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14852        $$emit$$"add     0x40,rax\n\t"
14853        $$emit$$"# L_zero_64_bytes:\n\t"
14854        $$emit$$"sub     0x8,rcx\n\t"
14855        $$emit$$"jge     L_loop\n\t"
14856        $$emit$$"add     0x4,rcx\n\t"
14857        $$emit$$"jl      L_tail\n\t"
14858        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14859        $$emit$$"add     0x20,rax\n\t"
14860        $$emit$$"sub     0x4,rcx\n\t"
14861        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14862        $$emit$$"add     0x4,rcx\n\t"
14863        $$emit$$"jle     L_end\n\t"
14864        $$emit$$"dec     rcx\n\t"
14865        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14866        $$emit$$"vmovq   xmm0,(rax)\n\t"
14867        $$emit$$"add     0x8,rax\n\t"
14868        $$emit$$"dec     rcx\n\t"
14869        $$emit$$"jge     L_sloop\n\t"
14870        $$emit$$"# L_end:\n\t"
14871     } else {
14872        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14873        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
14874     }
14875   %}
14876   ins_encode %{
14877     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14878                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
14879   %}
14880   ins_pipe(pipe_slow);
14881 %}
14882 
14883 // Small constant length ClearArray for AVX512 targets.
14884 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
14885 %{
14886   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
14887   match(Set dummy (ClearArray cnt base));
14888   ins_cost(100);
14889   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
14890   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
14891   ins_encode %{
14892    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
14893   %}
14894   ins_pipe(pipe_slow);
14895 %}
14896 
14897 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14898                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
14899 %{
14900   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
14901   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14902   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14903 
14904   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
14905   ins_encode %{
14906     __ string_compare($str1$$Register, $str2$$Register,
14907                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14908                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
14909   %}
14910   ins_pipe( pipe_slow );
14911 %}
14912 
14913 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14914                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
14915 %{
14916   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
14917   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14918   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14919 
14920   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
14921   ins_encode %{
14922     __ string_compare($str1$$Register, $str2$$Register,
14923                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14924                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
14925   %}
14926   ins_pipe( pipe_slow );
14927 %}
14928 
14929 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14930                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
14931 %{
14932   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
14933   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14934   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14935 
14936   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
14937   ins_encode %{
14938     __ string_compare($str1$$Register, $str2$$Register,
14939                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14940                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
14941   %}
14942   ins_pipe( pipe_slow );
14943 %}
14944 
14945 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14946                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
14947 %{
14948   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
14949   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14950   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14951 
14952   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
14953   ins_encode %{
14954     __ string_compare($str1$$Register, $str2$$Register,
14955                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14956                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
14957   %}
14958   ins_pipe( pipe_slow );
14959 %}
14960 
14961 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14962                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
14963 %{
14964   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
14965   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14966   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14967 
14968   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
14969   ins_encode %{
14970     __ string_compare($str1$$Register, $str2$$Register,
14971                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14972                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
14973   %}
14974   ins_pipe( pipe_slow );
14975 %}
14976 
14977 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14978                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
14979 %{
14980   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
14981   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14982   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14983 
14984   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
14985   ins_encode %{
14986     __ string_compare($str1$$Register, $str2$$Register,
14987                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14988                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
14989   %}
14990   ins_pipe( pipe_slow );
14991 %}
14992 
14993 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
14994                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
14995 %{
14996   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
14997   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14998   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14999 
15000   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15001   ins_encode %{
15002     __ string_compare($str2$$Register, $str1$$Register,
15003                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15004                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15005   %}
15006   ins_pipe( pipe_slow );
15007 %}
15008 
15009 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15010                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15011 %{
15012   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15013   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15014   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15015 
15016   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15017   ins_encode %{
15018     __ string_compare($str2$$Register, $str1$$Register,
15019                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15020                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15021   %}
15022   ins_pipe( pipe_slow );
15023 %}
15024 
15025 // fast search of substring with known size.
15026 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15027                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15028 %{
15029   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15030   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15031   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15032 
15033   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15034   ins_encode %{
15035     int icnt2 = (int)$int_cnt2$$constant;
15036     if (icnt2 >= 16) {
15037       // IndexOf for constant substrings with size >= 16 elements
15038       // which don't need to be loaded through stack.
15039       __ string_indexofC8($str1$$Register, $str2$$Register,
15040                           $cnt1$$Register, $cnt2$$Register,
15041                           icnt2, $result$$Register,
15042                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15043     } else {
15044       // Small strings are loaded through stack if they cross page boundary.
15045       __ string_indexof($str1$$Register, $str2$$Register,
15046                         $cnt1$$Register, $cnt2$$Register,
15047                         icnt2, $result$$Register,
15048                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15049     }
15050   %}
15051   ins_pipe( pipe_slow );
15052 %}
15053 
15054 // fast search of substring with known size.
15055 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15056                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15057 %{
15058   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15059   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15060   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15061 
15062   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15063   ins_encode %{
15064     int icnt2 = (int)$int_cnt2$$constant;
15065     if (icnt2 >= 8) {
15066       // IndexOf for constant substrings with size >= 8 elements
15067       // which don't need to be loaded through stack.
15068       __ string_indexofC8($str1$$Register, $str2$$Register,
15069                           $cnt1$$Register, $cnt2$$Register,
15070                           icnt2, $result$$Register,
15071                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15072     } else {
15073       // Small strings are loaded through stack if they cross page boundary.
15074       __ string_indexof($str1$$Register, $str2$$Register,
15075                         $cnt1$$Register, $cnt2$$Register,
15076                         icnt2, $result$$Register,
15077                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15078     }
15079   %}
15080   ins_pipe( pipe_slow );
15081 %}
15082 
15083 // fast search of substring with known size.
15084 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15085                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15086 %{
15087   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15088   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15089   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15090 
15091   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15092   ins_encode %{
15093     int icnt2 = (int)$int_cnt2$$constant;
15094     if (icnt2 >= 8) {
15095       // IndexOf for constant substrings with size >= 8 elements
15096       // which don't need to be loaded through stack.
15097       __ string_indexofC8($str1$$Register, $str2$$Register,
15098                           $cnt1$$Register, $cnt2$$Register,
15099                           icnt2, $result$$Register,
15100                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15101     } else {
15102       // Small strings are loaded through stack if they cross page boundary.
15103       __ string_indexof($str1$$Register, $str2$$Register,
15104                         $cnt1$$Register, $cnt2$$Register,
15105                         icnt2, $result$$Register,
15106                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15107     }
15108   %}
15109   ins_pipe( pipe_slow );
15110 %}
15111 
15112 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15113                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15114 %{
15115   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15116   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15117   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15118 
15119   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15120   ins_encode %{
15121     __ string_indexof($str1$$Register, $str2$$Register,
15122                       $cnt1$$Register, $cnt2$$Register,
15123                       (-1), $result$$Register,
15124                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15125   %}
15126   ins_pipe( pipe_slow );
15127 %}
15128 
15129 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15130                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15131 %{
15132   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15133   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15134   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15135 
15136   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15137   ins_encode %{
15138     __ string_indexof($str1$$Register, $str2$$Register,
15139                       $cnt1$$Register, $cnt2$$Register,
15140                       (-1), $result$$Register,
15141                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15142   %}
15143   ins_pipe( pipe_slow );
15144 %}
15145 
15146 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15147                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15148 %{
15149   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15150   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15151   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15152 
15153   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15154   ins_encode %{
15155     __ string_indexof($str1$$Register, $str2$$Register,
15156                       $cnt1$$Register, $cnt2$$Register,
15157                       (-1), $result$$Register,
15158                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15159   %}
15160   ins_pipe( pipe_slow );
15161 %}
15162 
15163 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15164                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15165 %{
15166   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15167   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15168   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15169   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15170   ins_encode %{
15171     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15172                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15173   %}
15174   ins_pipe( pipe_slow );
15175 %}
15176 
15177 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15178                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15179 %{
15180   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15181   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15182   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15183   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15184   ins_encode %{
15185     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15186                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15187   %}
15188   ins_pipe( pipe_slow );
15189 %}
15190 
15191 // fast string equals
15192 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15193                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15194 %{
15195   predicate(!VM_Version::supports_avx512vlbw());
15196   match(Set result (StrEquals (Binary str1 str2) cnt));
15197   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15198 
15199   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15200   ins_encode %{
15201     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15202                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15203                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15204   %}
15205   ins_pipe( pipe_slow );
15206 %}
15207 
15208 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15209                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15210 %{
15211   predicate(VM_Version::supports_avx512vlbw());
15212   match(Set result (StrEquals (Binary str1 str2) cnt));
15213   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15214 
15215   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15216   ins_encode %{
15217     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15218                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15219                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15220   %}
15221   ins_pipe( pipe_slow );
15222 %}
15223 
15224 // fast array equals
15225 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15226                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15227 %{
15228   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15229   match(Set result (AryEq ary1 ary2));
15230   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15231 
15232   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15233   ins_encode %{
15234     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15235                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15236                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15237   %}
15238   ins_pipe( pipe_slow );
15239 %}
15240 
15241 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15242                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15243 %{
15244   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15245   match(Set result (AryEq ary1 ary2));
15246   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15247 
15248   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15249   ins_encode %{
15250     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15251                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15252                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15253   %}
15254   ins_pipe( pipe_slow );
15255 %}
15256 
15257 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15258                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15259 %{
15260   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15261   match(Set result (AryEq ary1 ary2));
15262   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15263 
15264   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15265   ins_encode %{
15266     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15267                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15268                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15269   %}
15270   ins_pipe( pipe_slow );
15271 %}
15272 
15273 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15274                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15275 %{
15276   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15277   match(Set result (AryEq ary1 ary2));
15278   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15279 
15280   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15281   ins_encode %{
15282     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15283                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15284                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15285   %}
15286   ins_pipe( pipe_slow );
15287 %}
15288 
15289 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15290                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15291                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15292                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15293                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15294 %{
15295   predicate(UseAVX >= 2);
15296   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15297   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15298          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15299          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15300          USE basic_type, KILL cr);
15301 
15302   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
15303   ins_encode %{
15304     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15305                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15306                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15307                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15308                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15309                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15310                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15311   %}
15312   ins_pipe( pipe_slow );
15313 %}
15314 
15315 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15316                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15317 %{
15318   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15319   match(Set result (CountPositives ary1 len));
15320   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15321 
15322   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15323   ins_encode %{
15324     __ count_positives($ary1$$Register, $len$$Register,
15325                        $result$$Register, $tmp3$$Register,
15326                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15327   %}
15328   ins_pipe( pipe_slow );
15329 %}
15330 
15331 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15332                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15333 %{
15334   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15335   match(Set result (CountPositives ary1 len));
15336   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15337 
15338   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15339   ins_encode %{
15340     __ count_positives($ary1$$Register, $len$$Register,
15341                        $result$$Register, $tmp3$$Register,
15342                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15343   %}
15344   ins_pipe( pipe_slow );
15345 %}
15346 
15347 // fast char[] to byte[] compression
15348 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15349                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15350   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15351   match(Set result (StrCompressedCopy src (Binary dst len)));
15352   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15353          USE_KILL len, KILL tmp5, KILL cr);
15354 
15355   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15356   ins_encode %{
15357     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15358                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15359                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15360                            knoreg, knoreg);
15361   %}
15362   ins_pipe( pipe_slow );
15363 %}
15364 
15365 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15366                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15367   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15368   match(Set result (StrCompressedCopy src (Binary dst len)));
15369   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15370          USE_KILL len, KILL tmp5, KILL cr);
15371 
15372   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15373   ins_encode %{
15374     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15375                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15376                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15377                            $ktmp1$$KRegister, $ktmp2$$KRegister);
15378   %}
15379   ins_pipe( pipe_slow );
15380 %}
15381 // fast byte[] to char[] inflation
15382 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15383                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15384   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15385   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15386   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15387 
15388   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15389   ins_encode %{
15390     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15391                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15392   %}
15393   ins_pipe( pipe_slow );
15394 %}
15395 
15396 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15397                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15398   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15399   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15400   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15401 
15402   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15403   ins_encode %{
15404     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15405                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15406   %}
15407   ins_pipe( pipe_slow );
15408 %}
15409 
15410 // encode char[] to byte[] in ISO_8859_1
15411 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15412                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15413                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15414   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15415   match(Set result (EncodeISOArray src (Binary dst len)));
15416   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15417 
15418   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15419   ins_encode %{
15420     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15421                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15422                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15423   %}
15424   ins_pipe( pipe_slow );
15425 %}
15426 
15427 // encode char[] to byte[] in ASCII
15428 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15429                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15430                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15431   predicate(((EncodeISOArrayNode*)n)->is_ascii());
15432   match(Set result (EncodeISOArray src (Binary dst len)));
15433   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15434 
15435   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15436   ins_encode %{
15437     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15438                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15439                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15440   %}
15441   ins_pipe( pipe_slow );
15442 %}
15443 
15444 //----------Overflow Math Instructions-----------------------------------------
15445 
15446 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15447 %{
15448   match(Set cr (OverflowAddI op1 op2));
15449   effect(DEF cr, USE_KILL op1, USE op2);
15450 
15451   format %{ "addl    $op1, $op2\t# overflow check int" %}
15452 
15453   ins_encode %{
15454     __ addl($op1$$Register, $op2$$Register);
15455   %}
15456   ins_pipe(ialu_reg_reg);
15457 %}
15458 
15459 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
15460 %{
15461   match(Set cr (OverflowAddI op1 op2));
15462   effect(DEF cr, USE_KILL op1, USE op2);
15463 
15464   format %{ "addl    $op1, $op2\t# overflow check int" %}
15465 
15466   ins_encode %{
15467     __ addl($op1$$Register, $op2$$constant);
15468   %}
15469   ins_pipe(ialu_reg_reg);
15470 %}
15471 
15472 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15473 %{
15474   match(Set cr (OverflowAddL op1 op2));
15475   effect(DEF cr, USE_KILL op1, USE op2);
15476 
15477   format %{ "addq    $op1, $op2\t# overflow check long" %}
15478   ins_encode %{
15479     __ addq($op1$$Register, $op2$$Register);
15480   %}
15481   ins_pipe(ialu_reg_reg);
15482 %}
15483 
15484 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
15485 %{
15486   match(Set cr (OverflowAddL op1 op2));
15487   effect(DEF cr, USE_KILL op1, USE op2);
15488 
15489   format %{ "addq    $op1, $op2\t# overflow check long" %}
15490   ins_encode %{
15491     __ addq($op1$$Register, $op2$$constant);
15492   %}
15493   ins_pipe(ialu_reg_reg);
15494 %}
15495 
15496 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15497 %{
15498   match(Set cr (OverflowSubI op1 op2));
15499 
15500   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
15501   ins_encode %{
15502     __ cmpl($op1$$Register, $op2$$Register);
15503   %}
15504   ins_pipe(ialu_reg_reg);
15505 %}
15506 
15507 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15508 %{
15509   match(Set cr (OverflowSubI op1 op2));
15510 
15511   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
15512   ins_encode %{
15513     __ cmpl($op1$$Register, $op2$$constant);
15514   %}
15515   ins_pipe(ialu_reg_reg);
15516 %}
15517 
15518 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15519 %{
15520   match(Set cr (OverflowSubL op1 op2));
15521 
15522   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
15523   ins_encode %{
15524     __ cmpq($op1$$Register, $op2$$Register);
15525   %}
15526   ins_pipe(ialu_reg_reg);
15527 %}
15528 
15529 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15530 %{
15531   match(Set cr (OverflowSubL op1 op2));
15532 
15533   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
15534   ins_encode %{
15535     __ cmpq($op1$$Register, $op2$$constant);
15536   %}
15537   ins_pipe(ialu_reg_reg);
15538 %}
15539 
15540 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
15541 %{
15542   match(Set cr (OverflowSubI zero op2));
15543   effect(DEF cr, USE_KILL op2);
15544 
15545   format %{ "negl    $op2\t# overflow check int" %}
15546   ins_encode %{
15547     __ negl($op2$$Register);
15548   %}
15549   ins_pipe(ialu_reg_reg);
15550 %}
15551 
15552 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
15553 %{
15554   match(Set cr (OverflowSubL zero op2));
15555   effect(DEF cr, USE_KILL op2);
15556 
15557   format %{ "negq    $op2\t# overflow check long" %}
15558   ins_encode %{
15559     __ negq($op2$$Register);
15560   %}
15561   ins_pipe(ialu_reg_reg);
15562 %}
15563 
15564 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15565 %{
15566   match(Set cr (OverflowMulI op1 op2));
15567   effect(DEF cr, USE_KILL op1, USE op2);
15568 
15569   format %{ "imull    $op1, $op2\t# overflow check int" %}
15570   ins_encode %{
15571     __ imull($op1$$Register, $op2$$Register);
15572   %}
15573   ins_pipe(ialu_reg_reg_alu0);
15574 %}
15575 
15576 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
15577 %{
15578   match(Set cr (OverflowMulI op1 op2));
15579   effect(DEF cr, TEMP tmp, USE op1, USE op2);
15580 
15581   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
15582   ins_encode %{
15583     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
15584   %}
15585   ins_pipe(ialu_reg_reg_alu0);
15586 %}
15587 
15588 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15589 %{
15590   match(Set cr (OverflowMulL op1 op2));
15591   effect(DEF cr, USE_KILL op1, USE op2);
15592 
15593   format %{ "imulq    $op1, $op2\t# overflow check long" %}
15594   ins_encode %{
15595     __ imulq($op1$$Register, $op2$$Register);
15596   %}
15597   ins_pipe(ialu_reg_reg_alu0);
15598 %}
15599 
15600 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
15601 %{
15602   match(Set cr (OverflowMulL op1 op2));
15603   effect(DEF cr, TEMP tmp, USE op1, USE op2);
15604 
15605   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
15606   ins_encode %{
15607     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
15608   %}
15609   ins_pipe(ialu_reg_reg_alu0);
15610 %}
15611 
15612 
15613 //----------Control Flow Instructions------------------------------------------
15614 // Signed compare Instructions
15615 
15616 // XXX more variants!!
15617 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15618 %{
15619   match(Set cr (CmpI op1 op2));
15620   effect(DEF cr, USE op1, USE op2);
15621 
15622   format %{ "cmpl    $op1, $op2" %}
15623   ins_encode %{
15624     __ cmpl($op1$$Register, $op2$$Register);
15625   %}
15626   ins_pipe(ialu_cr_reg_reg);
15627 %}
15628 
15629 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15630 %{
15631   match(Set cr (CmpI op1 op2));
15632 
15633   format %{ "cmpl    $op1, $op2" %}
15634   ins_encode %{
15635     __ cmpl($op1$$Register, $op2$$constant);
15636   %}
15637   ins_pipe(ialu_cr_reg_imm);
15638 %}
15639 
15640 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
15641 %{
15642   match(Set cr (CmpI op1 (LoadI op2)));
15643 
15644   ins_cost(500); // XXX
15645   format %{ "cmpl    $op1, $op2" %}
15646   ins_encode %{
15647     __ cmpl($op1$$Register, $op2$$Address);
15648   %}
15649   ins_pipe(ialu_cr_reg_mem);
15650 %}
15651 
15652 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
15653 %{
15654   match(Set cr (CmpI src zero));
15655 
15656   format %{ "testl   $src, $src" %}
15657   ins_encode %{
15658     __ testl($src$$Register, $src$$Register);
15659   %}
15660   ins_pipe(ialu_cr_reg_imm);
15661 %}
15662 
15663 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
15664 %{
15665   match(Set cr (CmpI (AndI src con) zero));
15666 
15667   format %{ "testl   $src, $con" %}
15668   ins_encode %{
15669     __ testl($src$$Register, $con$$constant);
15670   %}
15671   ins_pipe(ialu_cr_reg_imm);
15672 %}
15673 
15674 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
15675 %{
15676   match(Set cr (CmpI (AndI src1 src2) zero));
15677 
15678   format %{ "testl   $src1, $src2" %}
15679   ins_encode %{
15680     __ testl($src1$$Register, $src2$$Register);
15681   %}
15682   ins_pipe(ialu_cr_reg_imm);
15683 %}
15684 
15685 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
15686 %{
15687   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
15688 
15689   format %{ "testl   $src, $mem" %}
15690   ins_encode %{
15691     __ testl($src$$Register, $mem$$Address);
15692   %}
15693   ins_pipe(ialu_cr_reg_mem);
15694 %}
15695 
15696 // Unsigned compare Instructions; really, same as signed except they
15697 // produce an rFlagsRegU instead of rFlagsReg.
15698 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
15699 %{
15700   match(Set cr (CmpU op1 op2));
15701 
15702   format %{ "cmpl    $op1, $op2\t# unsigned" %}
15703   ins_encode %{
15704     __ cmpl($op1$$Register, $op2$$Register);
15705   %}
15706   ins_pipe(ialu_cr_reg_reg);
15707 %}
15708 
15709 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
15710 %{
15711   match(Set cr (CmpU op1 op2));
15712 
15713   format %{ "cmpl    $op1, $op2\t# unsigned" %}
15714   ins_encode %{
15715     __ cmpl($op1$$Register, $op2$$constant);
15716   %}
15717   ins_pipe(ialu_cr_reg_imm);
15718 %}
15719 
15720 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
15721 %{
15722   match(Set cr (CmpU op1 (LoadI op2)));
15723 
15724   ins_cost(500); // XXX
15725   format %{ "cmpl    $op1, $op2\t# unsigned" %}
15726   ins_encode %{
15727     __ cmpl($op1$$Register, $op2$$Address);
15728   %}
15729   ins_pipe(ialu_cr_reg_mem);
15730 %}
15731 
15732 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
15733 %{
15734   match(Set cr (CmpU src zero));
15735 
15736   format %{ "testl   $src, $src\t# unsigned" %}
15737   ins_encode %{
15738     __ testl($src$$Register, $src$$Register);
15739   %}
15740   ins_pipe(ialu_cr_reg_imm);
15741 %}
15742 
15743 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
15744 %{
15745   match(Set cr (CmpP op1 op2));
15746 
15747   format %{ "cmpq    $op1, $op2\t# ptr" %}
15748   ins_encode %{
15749     __ cmpq($op1$$Register, $op2$$Register);
15750   %}
15751   ins_pipe(ialu_cr_reg_reg);
15752 %}
15753 
15754 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
15755 %{
15756   match(Set cr (CmpP op1 (LoadP op2)));
15757   predicate(n->in(2)->as_Load()->barrier_data() == 0);
15758 
15759   ins_cost(500); // XXX
15760   format %{ "cmpq    $op1, $op2\t# ptr" %}
15761   ins_encode %{
15762     __ cmpq($op1$$Register, $op2$$Address);
15763   %}
15764   ins_pipe(ialu_cr_reg_mem);
15765 %}
15766 
15767 // XXX this is generalized by compP_rReg_mem???
15768 // Compare raw pointer (used in out-of-heap check).
15769 // Only works because non-oop pointers must be raw pointers
15770 // and raw pointers have no anti-dependencies.
15771 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
15772 %{
15773   predicate(n->in(2)->in(2)->bottom_type()->isa_rawptr() != nullptr &&
15774             n->in(2)->as_Load()->barrier_data() == 0);
15775   match(Set cr (CmpP op1 (LoadP op2)));
15776 
15777   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
15778   ins_encode %{
15779     __ cmpq($op1$$Register, $op2$$Address);
15780   %}
15781   ins_pipe(ialu_cr_reg_mem);
15782 %}
15783 
15784 // This will generate a signed flags result. This should be OK since
15785 // any compare to a zero should be eq/neq.
15786 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
15787 %{
15788   match(Set cr (CmpP src zero));
15789 
15790   format %{ "testq   $src, $src\t# ptr" %}
15791   ins_encode %{
15792     __ testq($src$$Register, $src$$Register);
15793   %}
15794   ins_pipe(ialu_cr_reg_imm);
15795 %}
15796 
15797 // This will generate a signed flags result. This should be OK since
15798 // any compare to a zero should be eq/neq.
15799 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
15800 %{
15801   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
15802             n->in(1)->as_Load()->barrier_data() == 0);
15803   match(Set cr (CmpP (LoadP op) zero));
15804 
15805   ins_cost(500); // XXX
15806   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
15807   ins_encode %{
15808     __ testq($op$$Address, 0xFFFFFFFF);
15809   %}
15810   ins_pipe(ialu_cr_reg_imm);
15811 %}
15812 
15813 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
15814 %{
15815   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
15816             n->in(1)->as_Load()->barrier_data() == 0);
15817   match(Set cr (CmpP (LoadP mem) zero));
15818 
15819   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
15820   ins_encode %{
15821     __ cmpq(r12, $mem$$Address);
15822   %}
15823   ins_pipe(ialu_cr_reg_mem);
15824 %}
15825 
15826 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
15827 %{
15828   match(Set cr (CmpN op1 op2));
15829 
15830   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
15831   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
15832   ins_pipe(ialu_cr_reg_reg);
15833 %}
15834 
15835 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
15836 %{
15837   predicate(n->in(2)->as_Load()->barrier_data() == 0);
15838   match(Set cr (CmpN src (LoadN mem)));
15839 
15840   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
15841   ins_encode %{
15842     __ cmpl($src$$Register, $mem$$Address);
15843   %}
15844   ins_pipe(ialu_cr_reg_mem);
15845 %}
15846 
15847 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
15848   match(Set cr (CmpN op1 op2));
15849 
15850   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
15851   ins_encode %{
15852     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
15853   %}
15854   ins_pipe(ialu_cr_reg_imm);
15855 %}
15856 
15857 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
15858 %{
15859   predicate(n->in(2)->as_Load()->barrier_data() == 0);
15860   match(Set cr (CmpN src (LoadN mem)));
15861 
15862   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
15863   ins_encode %{
15864     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
15865   %}
15866   ins_pipe(ialu_cr_reg_mem);
15867 %}
15868 
15869 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
15870   match(Set cr (CmpN op1 op2));
15871 
15872   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
15873   ins_encode %{
15874     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
15875   %}
15876   ins_pipe(ialu_cr_reg_imm);
15877 %}
15878 
15879 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
15880 %{
15881   predicate(!UseCompactObjectHeaders);
15882   match(Set cr (CmpN src (LoadNKlass mem)));
15883 
15884   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
15885   ins_encode %{
15886     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
15887   %}
15888   ins_pipe(ialu_cr_reg_mem);
15889 %}
15890 
15891 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
15892   match(Set cr (CmpN src zero));
15893 
15894   format %{ "testl   $src, $src\t# compressed ptr" %}
15895   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
15896   ins_pipe(ialu_cr_reg_imm);
15897 %}
15898 
15899 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
15900 %{
15901   predicate(CompressedOops::base() != nullptr &&
15902             n->in(1)->as_Load()->barrier_data() == 0);
15903   match(Set cr (CmpN (LoadN mem) zero));
15904 
15905   ins_cost(500); // XXX
15906   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
15907   ins_encode %{
15908     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
15909   %}
15910   ins_pipe(ialu_cr_reg_mem);
15911 %}
15912 
15913 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
15914 %{
15915   predicate(CompressedOops::base() == nullptr &&
15916             n->in(1)->as_Load()->barrier_data() == 0);
15917   match(Set cr (CmpN (LoadN mem) zero));
15918 
15919   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
15920   ins_encode %{
15921     __ cmpl(r12, $mem$$Address);
15922   %}
15923   ins_pipe(ialu_cr_reg_mem);
15924 %}
15925 
15926 // Yanked all unsigned pointer compare operations.
15927 // Pointer compares are done with CmpP which is already unsigned.
15928 
15929 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15930 %{
15931   match(Set cr (CmpL op1 op2));
15932 
15933   format %{ "cmpq    $op1, $op2" %}
15934   ins_encode %{
15935     __ cmpq($op1$$Register, $op2$$Register);
15936   %}
15937   ins_pipe(ialu_cr_reg_reg);
15938 %}
15939 
15940 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15941 %{
15942   match(Set cr (CmpL op1 op2));
15943 
15944   format %{ "cmpq    $op1, $op2" %}
15945   ins_encode %{
15946     __ cmpq($op1$$Register, $op2$$constant);
15947   %}
15948   ins_pipe(ialu_cr_reg_imm);
15949 %}
15950 
15951 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
15952 %{
15953   match(Set cr (CmpL op1 (LoadL op2)));
15954 
15955   format %{ "cmpq    $op1, $op2" %}
15956   ins_encode %{
15957     __ cmpq($op1$$Register, $op2$$Address);
15958   %}
15959   ins_pipe(ialu_cr_reg_mem);
15960 %}
15961 
15962 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
15963 %{
15964   match(Set cr (CmpL src zero));
15965 
15966   format %{ "testq   $src, $src" %}
15967   ins_encode %{
15968     __ testq($src$$Register, $src$$Register);
15969   %}
15970   ins_pipe(ialu_cr_reg_imm);
15971 %}
15972 
15973 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
15974 %{
15975   match(Set cr (CmpL (AndL src con) zero));
15976 
15977   format %{ "testq   $src, $con\t# long" %}
15978   ins_encode %{
15979     __ testq($src$$Register, $con$$constant);
15980   %}
15981   ins_pipe(ialu_cr_reg_imm);
15982 %}
15983 
15984 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
15985 %{
15986   match(Set cr (CmpL (AndL src1 src2) zero));
15987 
15988   format %{ "testq   $src1, $src2\t# long" %}
15989   ins_encode %{
15990     __ testq($src1$$Register, $src2$$Register);
15991   %}
15992   ins_pipe(ialu_cr_reg_imm);
15993 %}
15994 
15995 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
15996 %{
15997   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
15998 
15999   format %{ "testq   $src, $mem" %}
16000   ins_encode %{
16001     __ testq($src$$Register, $mem$$Address);
16002   %}
16003   ins_pipe(ialu_cr_reg_mem);
16004 %}
16005 
16006 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16007 %{
16008   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16009 
16010   format %{ "testq   $src, $mem" %}
16011   ins_encode %{
16012     __ testq($src$$Register, $mem$$Address);
16013   %}
16014   ins_pipe(ialu_cr_reg_mem);
16015 %}
16016 
16017 // Manifest a CmpU result in an integer register.  Very painful.
16018 // This is the test to avoid.
16019 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16020 %{
16021   match(Set dst (CmpU3 src1 src2));
16022   effect(KILL flags);
16023 
16024   ins_cost(275); // XXX
16025   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16026             "movl    $dst, -1\n\t"
16027             "jb,u    done\n\t"
16028             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16029     "done:" %}
16030   ins_encode %{
16031     Label done;
16032     __ cmpl($src1$$Register, $src2$$Register);
16033     __ movl($dst$$Register, -1);
16034     __ jccb(Assembler::below, done);
16035     __ setcc(Assembler::notZero, $dst$$Register);
16036     __ bind(done);
16037   %}
16038   ins_pipe(pipe_slow);
16039 %}
16040 
16041 // Manifest a CmpL result in an integer register.  Very painful.
16042 // This is the test to avoid.
16043 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16044 %{
16045   match(Set dst (CmpL3 src1 src2));
16046   effect(KILL flags);
16047 
16048   ins_cost(275); // XXX
16049   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16050             "movl    $dst, -1\n\t"
16051             "jl,s    done\n\t"
16052             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16053     "done:" %}
16054   ins_encode %{
16055     Label done;
16056     __ cmpq($src1$$Register, $src2$$Register);
16057     __ movl($dst$$Register, -1);
16058     __ jccb(Assembler::less, done);
16059     __ setcc(Assembler::notZero, $dst$$Register);
16060     __ bind(done);
16061   %}
16062   ins_pipe(pipe_slow);
16063 %}
16064 
16065 // Manifest a CmpUL result in an integer register.  Very painful.
16066 // This is the test to avoid.
16067 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16068 %{
16069   match(Set dst (CmpUL3 src1 src2));
16070   effect(KILL flags);
16071 
16072   ins_cost(275); // XXX
16073   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16074             "movl    $dst, -1\n\t"
16075             "jb,u    done\n\t"
16076             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16077     "done:" %}
16078   ins_encode %{
16079     Label done;
16080     __ cmpq($src1$$Register, $src2$$Register);
16081     __ movl($dst$$Register, -1);
16082     __ jccb(Assembler::below, done);
16083     __ setcc(Assembler::notZero, $dst$$Register);
16084     __ bind(done);
16085   %}
16086   ins_pipe(pipe_slow);
16087 %}
16088 
16089 // Unsigned long compare Instructions; really, same as signed long except they
16090 // produce an rFlagsRegU instead of rFlagsReg.
16091 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16092 %{
16093   match(Set cr (CmpUL op1 op2));
16094 
16095   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16096   ins_encode %{
16097     __ cmpq($op1$$Register, $op2$$Register);
16098   %}
16099   ins_pipe(ialu_cr_reg_reg);
16100 %}
16101 
16102 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16103 %{
16104   match(Set cr (CmpUL op1 op2));
16105 
16106   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16107   ins_encode %{
16108     __ cmpq($op1$$Register, $op2$$constant);
16109   %}
16110   ins_pipe(ialu_cr_reg_imm);
16111 %}
16112 
16113 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16114 %{
16115   match(Set cr (CmpUL op1 (LoadL op2)));
16116 
16117   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16118   ins_encode %{
16119     __ cmpq($op1$$Register, $op2$$Address);
16120   %}
16121   ins_pipe(ialu_cr_reg_mem);
16122 %}
16123 
16124 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16125 %{
16126   match(Set cr (CmpUL src zero));
16127 
16128   format %{ "testq   $src, $src\t# unsigned" %}
16129   ins_encode %{
16130     __ testq($src$$Register, $src$$Register);
16131   %}
16132   ins_pipe(ialu_cr_reg_imm);
16133 %}
16134 
16135 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16136 %{
16137   match(Set cr (CmpI (LoadB mem) imm));
16138 
16139   ins_cost(125);
16140   format %{ "cmpb    $mem, $imm" %}
16141   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16142   ins_pipe(ialu_cr_reg_mem);
16143 %}
16144 
16145 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16146 %{
16147   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16148 
16149   ins_cost(125);
16150   format %{ "testb   $mem, $imm\t# ubyte" %}
16151   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16152   ins_pipe(ialu_cr_reg_mem);
16153 %}
16154 
16155 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16156 %{
16157   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16158 
16159   ins_cost(125);
16160   format %{ "testb   $mem, $imm\t# byte" %}
16161   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16162   ins_pipe(ialu_cr_reg_mem);
16163 %}
16164 
16165 //----------Max and Min--------------------------------------------------------
16166 // Min Instructions
16167 
16168 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16169 %{
16170   predicate(!UseAPX);
16171   effect(USE_DEF dst, USE src, USE cr);
16172 
16173   format %{ "cmovlgt $dst, $src\t# min" %}
16174   ins_encode %{
16175     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16176   %}
16177   ins_pipe(pipe_cmov_reg);
16178 %}
16179 
16180 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16181 %{
16182   predicate(UseAPX);
16183   effect(DEF dst, USE src1, USE src2, USE cr);
16184 
16185   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16186   ins_encode %{
16187     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16188   %}
16189   ins_pipe(pipe_cmov_reg);
16190 %}
16191 
16192 instruct minI_rReg(rRegI dst, rRegI src)
16193 %{
16194   predicate(!UseAPX);
16195   match(Set dst (MinI dst src));
16196 
16197   ins_cost(200);
16198   expand %{
16199     rFlagsReg cr;
16200     compI_rReg(cr, dst, src);
16201     cmovI_reg_g(dst, src, cr);
16202   %}
16203 %}
16204 
16205 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16206 %{
16207   predicate(UseAPX);
16208   match(Set dst (MinI src1 src2));
16209   effect(DEF dst, USE src1, USE src2);
16210   flag(PD::Flag_ndd_demotable_opr1);
16211 
16212   ins_cost(200);
16213   expand %{
16214     rFlagsReg cr;
16215     compI_rReg(cr, src1, src2);
16216     cmovI_reg_g_ndd(dst, src1, src2, cr);
16217   %}
16218 %}
16219 
16220 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16221 %{
16222   predicate(!UseAPX);
16223   effect(USE_DEF dst, USE src, USE cr);
16224 
16225   format %{ "cmovllt $dst, $src\t# max" %}
16226   ins_encode %{
16227     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16228   %}
16229   ins_pipe(pipe_cmov_reg);
16230 %}
16231 
16232 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16233 %{
16234   predicate(UseAPX);
16235   effect(DEF dst, USE src1, USE src2, USE cr);
16236 
16237   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16238   ins_encode %{
16239     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16240   %}
16241   ins_pipe(pipe_cmov_reg);
16242 %}
16243 
16244 instruct maxI_rReg(rRegI dst, rRegI src)
16245 %{
16246   predicate(!UseAPX);
16247   match(Set dst (MaxI dst src));
16248 
16249   ins_cost(200);
16250   expand %{
16251     rFlagsReg cr;
16252     compI_rReg(cr, dst, src);
16253     cmovI_reg_l(dst, src, cr);
16254   %}
16255 %}
16256 
16257 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16258 %{
16259   predicate(UseAPX);
16260   match(Set dst (MaxI src1 src2));
16261   effect(DEF dst, USE src1, USE src2);
16262   flag(PD::Flag_ndd_demotable_opr1);
16263 
16264   ins_cost(200);
16265   expand %{
16266     rFlagsReg cr;
16267     compI_rReg(cr, src1, src2);
16268     cmovI_reg_l_ndd(dst, src1, src2, cr);
16269   %}
16270 %}
16271 
16272 // ============================================================================
16273 // Branch Instructions
16274 
16275 // Jump Direct - Label defines a relative address from JMP+1
16276 instruct jmpDir(label labl)
16277 %{
16278   match(Goto);
16279   effect(USE labl);
16280 
16281   ins_cost(300);
16282   format %{ "jmp     $labl" %}
16283   size(5);
16284   ins_encode %{
16285     Label* L = $labl$$label;
16286     __ jmp(*L, false); // Always long jump
16287   %}
16288   ins_pipe(pipe_jmp);
16289 %}
16290 
16291 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16292 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16293 %{
16294   match(If cop cr);
16295   effect(USE labl);
16296 
16297   ins_cost(300);
16298   format %{ "j$cop     $labl" %}
16299   size(6);
16300   ins_encode %{
16301     Label* L = $labl$$label;
16302     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16303   %}
16304   ins_pipe(pipe_jcc);
16305 %}
16306 
16307 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16308 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16309 %{
16310   match(CountedLoopEnd cop cr);
16311   effect(USE labl);
16312 
16313   ins_cost(300);
16314   format %{ "j$cop     $labl\t# loop end" %}
16315   size(6);
16316   ins_encode %{
16317     Label* L = $labl$$label;
16318     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16319   %}
16320   ins_pipe(pipe_jcc);
16321 %}
16322 
16323 // Jump Direct Conditional - using unsigned comparison
16324 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16325   match(If cop cmp);
16326   effect(USE labl);
16327 
16328   ins_cost(300);
16329   format %{ "j$cop,u   $labl" %}
16330   size(6);
16331   ins_encode %{
16332     Label* L = $labl$$label;
16333     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16334   %}
16335   ins_pipe(pipe_jcc);
16336 %}
16337 
16338 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16339   match(If cop cmp);
16340   effect(USE labl);
16341 
16342   ins_cost(200);
16343   format %{ "j$cop,u   $labl" %}
16344   size(6);
16345   ins_encode %{
16346     Label* L = $labl$$label;
16347     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16348   %}
16349   ins_pipe(pipe_jcc);
16350 %}
16351 
16352 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16353   match(If cop cmp);
16354   effect(USE labl);
16355 
16356   ins_cost(200);
16357   format %{ $$template
16358     if ($cop$$cmpcode == Assembler::notEqual) {
16359       $$emit$$"jp,u    $labl\n\t"
16360       $$emit$$"j$cop,u   $labl"
16361     } else {
16362       $$emit$$"jp,u    done\n\t"
16363       $$emit$$"j$cop,u   $labl\n\t"
16364       $$emit$$"done:"
16365     }
16366   %}
16367   ins_encode %{
16368     Label* l = $labl$$label;
16369     if ($cop$$cmpcode == Assembler::notEqual) {
16370       __ jcc(Assembler::parity, *l, false);
16371       __ jcc(Assembler::notEqual, *l, false);
16372     } else if ($cop$$cmpcode == Assembler::equal) {
16373       Label done;
16374       __ jccb(Assembler::parity, done);
16375       __ jcc(Assembler::equal, *l, false);
16376       __ bind(done);
16377     } else {
16378        ShouldNotReachHere();
16379     }
16380   %}
16381   ins_pipe(pipe_jcc);
16382 %}
16383 
16384 // Jump Direct Conditional - using signed and unsigned comparison
16385 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16386   match(If cop cmp);
16387   effect(USE labl);
16388 
16389   ins_cost(200);
16390   format %{ "j$cop,su   $labl" %}
16391   size(6);
16392   ins_encode %{
16393     Label* L = $labl$$label;
16394     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16395   %}
16396   ins_pipe(pipe_jcc);
16397 %}
16398 
16399 // ============================================================================
16400 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
16401 // superklass array for an instance of the superklass.  Set a hidden
16402 // internal cache on a hit (cache is checked with exposed code in
16403 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
16404 // encoding ALSO sets flags.
16405 
16406 instruct partialSubtypeCheck(rdi_RegP result,
16407                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16408                              rFlagsReg cr)
16409 %{
16410   match(Set result (PartialSubtypeCheck sub super));
16411   predicate(!UseSecondarySupersTable);
16412   effect(KILL rcx, KILL cr);
16413 
16414   ins_cost(1100);  // slightly larger than the next version
16415   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16416             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16417             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16418             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16419             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
16420             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16421             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
16422     "miss:\t" %}
16423 
16424   ins_encode %{
16425     Label miss;
16426     // NB: Callers may assume that, when $result is a valid register,
16427     // check_klass_subtype_slow_path_linear sets it to a nonzero
16428     // value.
16429     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16430                                             $rcx$$Register, $result$$Register,
16431                                             nullptr, &miss,
16432                                             /*set_cond_codes:*/ true);
16433     __ xorptr($result$$Register, $result$$Register);
16434     __ bind(miss);
16435   %}
16436 
16437   ins_pipe(pipe_slow);
16438 %}
16439 
16440 // ============================================================================
16441 // Two versions of hashtable-based partialSubtypeCheck, both used when
16442 // we need to search for a super class in the secondary supers array.
16443 // The first is used when we don't know _a priori_ the class being
16444 // searched for. The second, far more common, is used when we do know:
16445 // this is used for instanceof, checkcast, and any case where C2 can
16446 // determine it by constant propagation.
16447 
16448 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
16449                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16450                                        rFlagsReg cr)
16451 %{
16452   match(Set result (PartialSubtypeCheck sub super));
16453   predicate(UseSecondarySupersTable);
16454   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16455 
16456   ins_cost(1000);
16457   format %{ "partialSubtypeCheck $result, $sub, $super" %}
16458 
16459   ins_encode %{
16460     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
16461 					 $temp3$$Register, $temp4$$Register, $result$$Register);
16462   %}
16463 
16464   ins_pipe(pipe_slow);
16465 %}
16466 
16467 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
16468                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16469                                        rFlagsReg cr)
16470 %{
16471   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
16472   predicate(UseSecondarySupersTable);
16473   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16474 
16475   ins_cost(700);  // smaller than the next version
16476   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
16477 
16478   ins_encode %{
16479     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
16480     if (InlineSecondarySupersTest) {
16481       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
16482                                        $temp3$$Register, $temp4$$Register, $result$$Register,
16483                                        super_klass_slot);
16484     } else {
16485       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
16486     }
16487   %}
16488 
16489   ins_pipe(pipe_slow);
16490 %}
16491 
16492 // ============================================================================
16493 // Branch Instructions -- short offset versions
16494 //
16495 // These instructions are used to replace jumps of a long offset (the default
16496 // match) with jumps of a shorter offset.  These instructions are all tagged
16497 // with the ins_short_branch attribute, which causes the ADLC to suppress the
16498 // match rules in general matching.  Instead, the ADLC generates a conversion
16499 // method in the MachNode which can be used to do in-place replacement of the
16500 // long variant with the shorter variant.  The compiler will determine if a
16501 // branch can be taken by the is_short_branch_offset() predicate in the machine
16502 // specific code section of the file.
16503 
16504 // Jump Direct - Label defines a relative address from JMP+1
16505 instruct jmpDir_short(label labl) %{
16506   match(Goto);
16507   effect(USE labl);
16508 
16509   ins_cost(300);
16510   format %{ "jmp,s   $labl" %}
16511   size(2);
16512   ins_encode %{
16513     Label* L = $labl$$label;
16514     __ jmpb(*L);
16515   %}
16516   ins_pipe(pipe_jmp);
16517   ins_short_branch(1);
16518 %}
16519 
16520 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16521 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
16522   match(If cop cr);
16523   effect(USE labl);
16524 
16525   ins_cost(300);
16526   format %{ "j$cop,s   $labl" %}
16527   size(2);
16528   ins_encode %{
16529     Label* L = $labl$$label;
16530     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16531   %}
16532   ins_pipe(pipe_jcc);
16533   ins_short_branch(1);
16534 %}
16535 
16536 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16537 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
16538   match(CountedLoopEnd cop cr);
16539   effect(USE labl);
16540 
16541   ins_cost(300);
16542   format %{ "j$cop,s   $labl\t# loop end" %}
16543   size(2);
16544   ins_encode %{
16545     Label* L = $labl$$label;
16546     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16547   %}
16548   ins_pipe(pipe_jcc);
16549   ins_short_branch(1);
16550 %}
16551 
16552 // Jump Direct Conditional - using unsigned comparison
16553 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16554   match(If cop cmp);
16555   effect(USE labl);
16556 
16557   ins_cost(300);
16558   format %{ "j$cop,us  $labl" %}
16559   size(2);
16560   ins_encode %{
16561     Label* L = $labl$$label;
16562     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16563   %}
16564   ins_pipe(pipe_jcc);
16565   ins_short_branch(1);
16566 %}
16567 
16568 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16569   match(If cop cmp);
16570   effect(USE labl);
16571 
16572   ins_cost(300);
16573   format %{ "j$cop,us  $labl" %}
16574   size(2);
16575   ins_encode %{
16576     Label* L = $labl$$label;
16577     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16578   %}
16579   ins_pipe(pipe_jcc);
16580   ins_short_branch(1);
16581 %}
16582 
16583 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16584   match(If cop cmp);
16585   effect(USE labl);
16586 
16587   ins_cost(300);
16588   format %{ $$template
16589     if ($cop$$cmpcode == Assembler::notEqual) {
16590       $$emit$$"jp,u,s  $labl\n\t"
16591       $$emit$$"j$cop,u,s  $labl"
16592     } else {
16593       $$emit$$"jp,u,s  done\n\t"
16594       $$emit$$"j$cop,u,s  $labl\n\t"
16595       $$emit$$"done:"
16596     }
16597   %}
16598   size(4);
16599   ins_encode %{
16600     Label* l = $labl$$label;
16601     if ($cop$$cmpcode == Assembler::notEqual) {
16602       __ jccb(Assembler::parity, *l);
16603       __ jccb(Assembler::notEqual, *l);
16604     } else if ($cop$$cmpcode == Assembler::equal) {
16605       Label done;
16606       __ jccb(Assembler::parity, done);
16607       __ jccb(Assembler::equal, *l);
16608       __ bind(done);
16609     } else {
16610        ShouldNotReachHere();
16611     }
16612   %}
16613   ins_pipe(pipe_jcc);
16614   ins_short_branch(1);
16615 %}
16616 
16617 // Jump Direct Conditional - using signed and unsigned comparison
16618 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16619   match(If cop cmp);
16620   effect(USE labl);
16621 
16622   ins_cost(300);
16623   format %{ "j$cop,sus  $labl" %}
16624   size(2);
16625   ins_encode %{
16626     Label* L = $labl$$label;
16627     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16628   %}
16629   ins_pipe(pipe_jcc);
16630   ins_short_branch(1);
16631 %}
16632 
16633 // ============================================================================
16634 // inlined locking and unlocking
16635 
16636 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
16637   match(Set cr (FastLock object box));
16638   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
16639   ins_cost(300);
16640   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
16641   ins_encode %{
16642     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16643   %}
16644   ins_pipe(pipe_slow);
16645 %}
16646 
16647 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
16648   match(Set cr (FastUnlock object rax_reg));
16649   effect(TEMP tmp, USE_KILL rax_reg);
16650   ins_cost(300);
16651   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
16652   ins_encode %{
16653     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16654   %}
16655   ins_pipe(pipe_slow);
16656 %}
16657 
16658 
16659 // ============================================================================
16660 // Safepoint Instructions
16661 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
16662 %{
16663   match(SafePoint poll);
16664   effect(KILL cr, USE poll);
16665 
16666   format %{ "testl   rax, [$poll]\t"
16667             "# Safepoint: poll for GC" %}
16668   ins_cost(125);
16669   ins_encode %{
16670     __ relocate(relocInfo::poll_type);
16671     address pre_pc = __ pc();
16672     __ testl(rax, Address($poll$$Register, 0));
16673     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
16674   %}
16675   ins_pipe(ialu_reg_mem);
16676 %}
16677 
16678 instruct mask_all_evexL(kReg dst, rRegL src) %{
16679   match(Set dst (MaskAll src));
16680   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
16681   ins_encode %{
16682     int mask_len = Matcher::vector_length(this);
16683     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
16684   %}
16685   ins_pipe( pipe_slow );
16686 %}
16687 
16688 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
16689   predicate(Matcher::vector_length(n) > 32);
16690   match(Set dst (MaskAll src));
16691   effect(TEMP tmp);
16692   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
16693   ins_encode %{
16694     int mask_len = Matcher::vector_length(this);
16695     __ movslq($tmp$$Register, $src$$Register);
16696     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
16697   %}
16698   ins_pipe( pipe_slow );
16699 %}
16700 
16701 // ============================================================================
16702 // Procedure Call/Return Instructions
16703 // Call Java Static Instruction
16704 // Note: If this code changes, the corresponding ret_addr_offset() and
16705 //       compute_padding() functions will have to be adjusted.
16706 instruct CallStaticJavaDirect(method meth) %{
16707   match(CallStaticJava);
16708   effect(USE meth);
16709 
16710   ins_cost(300);
16711   format %{ "call,static " %}
16712   opcode(0xE8); /* E8 cd */
16713   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
16714   ins_pipe(pipe_slow);
16715   ins_alignment(4);
16716 %}
16717 
16718 // Call Java Dynamic Instruction
16719 // Note: If this code changes, the corresponding ret_addr_offset() and
16720 //       compute_padding() functions will have to be adjusted.
16721 instruct CallDynamicJavaDirect(method meth)
16722 %{
16723   match(CallDynamicJava);
16724   effect(USE meth);
16725 
16726   ins_cost(300);
16727   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
16728             "call,dynamic " %}
16729   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
16730   ins_pipe(pipe_slow);
16731   ins_alignment(4);
16732 %}
16733 
16734 // Call Runtime Instruction
16735 instruct CallRuntimeDirect(method meth)
16736 %{
16737   match(CallRuntime);
16738   effect(USE meth);
16739 
16740   ins_cost(300);
16741   format %{ "call,runtime " %}
16742   ins_encode(clear_avx, Java_To_Runtime(meth));
16743   ins_pipe(pipe_slow);
16744 %}
16745 
16746 // Call runtime without safepoint
16747 instruct CallLeafDirect(method meth)
16748 %{
16749   match(CallLeaf);
16750   effect(USE meth);
16751 
16752   ins_cost(300);
16753   format %{ "call_leaf,runtime " %}
16754   ins_encode(clear_avx, Java_To_Runtime(meth));
16755   ins_pipe(pipe_slow);
16756 %}
16757 
16758 // Call runtime without safepoint and with vector arguments
16759 instruct CallLeafDirectVector(method meth)
16760 %{
16761   match(CallLeafVector);
16762   effect(USE meth);
16763 
16764   ins_cost(300);
16765   format %{ "call_leaf,vector " %}
16766   ins_encode(Java_To_Runtime(meth));
16767   ins_pipe(pipe_slow);
16768 %}
16769 
16770 // Call runtime without safepoint
16771 instruct CallLeafNoFPDirect(method meth)
16772 %{
16773   match(CallLeafNoFP);
16774   effect(USE meth);
16775 
16776   ins_cost(300);
16777   format %{ "call_leaf_nofp,runtime " %}
16778   ins_encode(clear_avx, Java_To_Runtime(meth));
16779   ins_pipe(pipe_slow);
16780 %}
16781 
16782 // Return Instruction
16783 // Remove the return address & jump to it.
16784 // Notice: We always emit a nop after a ret to make sure there is room
16785 // for safepoint patching
16786 instruct Ret()
16787 %{
16788   match(Return);
16789 
16790   format %{ "ret" %}
16791   ins_encode %{
16792     __ ret(0);
16793   %}
16794   ins_pipe(pipe_jmp);
16795 %}
16796 
16797 // Tail Call; Jump from runtime stub to Java code.
16798 // Also known as an 'interprocedural jump'.
16799 // Target of jump will eventually return to caller.
16800 // TailJump below removes the return address.
16801 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
16802 // emitted just above the TailCall which has reset rbp to the caller state.
16803 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
16804 %{
16805   match(TailCall jump_target method_ptr);
16806 
16807   ins_cost(300);
16808   format %{ "jmp     $jump_target\t# rbx holds method" %}
16809   ins_encode %{
16810     __ jmp($jump_target$$Register);
16811   %}
16812   ins_pipe(pipe_jmp);
16813 %}
16814 
16815 // Tail Jump; remove the return address; jump to target.
16816 // TailCall above leaves the return address around.
16817 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
16818 %{
16819   match(TailJump jump_target ex_oop);
16820 
16821   ins_cost(300);
16822   format %{ "popq    rdx\t# pop return address\n\t"
16823             "jmp     $jump_target" %}
16824   ins_encode %{
16825     __ popq(as_Register(RDX_enc));
16826     __ jmp($jump_target$$Register);
16827   %}
16828   ins_pipe(pipe_jmp);
16829 %}
16830 
16831 // Forward exception.
16832 instruct ForwardExceptionjmp()
16833 %{
16834   match(ForwardException);
16835 
16836   format %{ "jmp     forward_exception_stub" %}
16837   ins_encode %{
16838     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
16839   %}
16840   ins_pipe(pipe_jmp);
16841 %}
16842 
16843 // Create exception oop: created by stack-crawling runtime code.
16844 // Created exception is now available to this handler, and is setup
16845 // just prior to jumping to this handler.  No code emitted.
16846 instruct CreateException(rax_RegP ex_oop)
16847 %{
16848   match(Set ex_oop (CreateEx));
16849 
16850   size(0);
16851   // use the following format syntax
16852   format %{ "# exception oop is in rax; no code emitted" %}
16853   ins_encode();
16854   ins_pipe(empty);
16855 %}
16856 
16857 // Rethrow exception:
16858 // The exception oop will come in the first argument position.
16859 // Then JUMP (not call) to the rethrow stub code.
16860 instruct RethrowException()
16861 %{
16862   match(Rethrow);
16863 
16864   // use the following format syntax
16865   format %{ "jmp     rethrow_stub" %}
16866   ins_encode %{
16867     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
16868   %}
16869   ins_pipe(pipe_jmp);
16870 %}
16871 
16872 // ============================================================================
16873 // This name is KNOWN by the ADLC and cannot be changed.
16874 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
16875 // for this guy.
16876 instruct tlsLoadP(r15_RegP dst) %{
16877   match(Set dst (ThreadLocal));
16878   effect(DEF dst);
16879 
16880   size(0);
16881   format %{ "# TLS is in R15" %}
16882   ins_encode( /*empty encoding*/ );
16883   ins_pipe(ialu_reg_reg);
16884 %}
16885 
16886 instruct addF_reg(regF dst, regF src) %{
16887   predicate(UseAVX == 0);
16888   match(Set dst (AddF dst src));
16889 
16890   format %{ "addss   $dst, $src" %}
16891   ins_cost(150);
16892   ins_encode %{
16893     __ addss($dst$$XMMRegister, $src$$XMMRegister);
16894   %}
16895   ins_pipe(pipe_slow);
16896 %}
16897 
16898 instruct addF_mem(regF dst, memory src) %{
16899   predicate(UseAVX == 0);
16900   match(Set dst (AddF dst (LoadF src)));
16901 
16902   format %{ "addss   $dst, $src" %}
16903   ins_cost(150);
16904   ins_encode %{
16905     __ addss($dst$$XMMRegister, $src$$Address);
16906   %}
16907   ins_pipe(pipe_slow);
16908 %}
16909 
16910 instruct addF_imm(regF dst, immF con) %{
16911   predicate(UseAVX == 0);
16912   match(Set dst (AddF dst con));
16913   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
16914   ins_cost(150);
16915   ins_encode %{
16916     __ addss($dst$$XMMRegister, $constantaddress($con));
16917   %}
16918   ins_pipe(pipe_slow);
16919 %}
16920 
16921 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
16922   predicate(UseAVX > 0);
16923   match(Set dst (AddF src1 src2));
16924 
16925   format %{ "vaddss  $dst, $src1, $src2" %}
16926   ins_cost(150);
16927   ins_encode %{
16928     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
16929   %}
16930   ins_pipe(pipe_slow);
16931 %}
16932 
16933 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
16934   predicate(UseAVX > 0);
16935   match(Set dst (AddF src1 (LoadF src2)));
16936 
16937   format %{ "vaddss  $dst, $src1, $src2" %}
16938   ins_cost(150);
16939   ins_encode %{
16940     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
16941   %}
16942   ins_pipe(pipe_slow);
16943 %}
16944 
16945 instruct addF_reg_imm(regF dst, regF src, immF con) %{
16946   predicate(UseAVX > 0);
16947   match(Set dst (AddF src con));
16948 
16949   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
16950   ins_cost(150);
16951   ins_encode %{
16952     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
16953   %}
16954   ins_pipe(pipe_slow);
16955 %}
16956 
16957 instruct addD_reg(regD dst, regD src) %{
16958   predicate(UseAVX == 0);
16959   match(Set dst (AddD dst src));
16960 
16961   format %{ "addsd   $dst, $src" %}
16962   ins_cost(150);
16963   ins_encode %{
16964     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
16965   %}
16966   ins_pipe(pipe_slow);
16967 %}
16968 
16969 instruct addD_mem(regD dst, memory src) %{
16970   predicate(UseAVX == 0);
16971   match(Set dst (AddD dst (LoadD src)));
16972 
16973   format %{ "addsd   $dst, $src" %}
16974   ins_cost(150);
16975   ins_encode %{
16976     __ addsd($dst$$XMMRegister, $src$$Address);
16977   %}
16978   ins_pipe(pipe_slow);
16979 %}
16980 
16981 instruct addD_imm(regD dst, immD con) %{
16982   predicate(UseAVX == 0);
16983   match(Set dst (AddD dst con));
16984   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
16985   ins_cost(150);
16986   ins_encode %{
16987     __ addsd($dst$$XMMRegister, $constantaddress($con));
16988   %}
16989   ins_pipe(pipe_slow);
16990 %}
16991 
16992 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
16993   predicate(UseAVX > 0);
16994   match(Set dst (AddD src1 src2));
16995 
16996   format %{ "vaddsd  $dst, $src1, $src2" %}
16997   ins_cost(150);
16998   ins_encode %{
16999     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17000   %}
17001   ins_pipe(pipe_slow);
17002 %}
17003 
17004 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17005   predicate(UseAVX > 0);
17006   match(Set dst (AddD src1 (LoadD src2)));
17007 
17008   format %{ "vaddsd  $dst, $src1, $src2" %}
17009   ins_cost(150);
17010   ins_encode %{
17011     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17012   %}
17013   ins_pipe(pipe_slow);
17014 %}
17015 
17016 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17017   predicate(UseAVX > 0);
17018   match(Set dst (AddD src con));
17019 
17020   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17021   ins_cost(150);
17022   ins_encode %{
17023     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17024   %}
17025   ins_pipe(pipe_slow);
17026 %}
17027 
17028 instruct subF_reg(regF dst, regF src) %{
17029   predicate(UseAVX == 0);
17030   match(Set dst (SubF dst src));
17031 
17032   format %{ "subss   $dst, $src" %}
17033   ins_cost(150);
17034   ins_encode %{
17035     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17036   %}
17037   ins_pipe(pipe_slow);
17038 %}
17039 
17040 instruct subF_mem(regF dst, memory src) %{
17041   predicate(UseAVX == 0);
17042   match(Set dst (SubF dst (LoadF src)));
17043 
17044   format %{ "subss   $dst, $src" %}
17045   ins_cost(150);
17046   ins_encode %{
17047     __ subss($dst$$XMMRegister, $src$$Address);
17048   %}
17049   ins_pipe(pipe_slow);
17050 %}
17051 
17052 instruct subF_imm(regF dst, immF con) %{
17053   predicate(UseAVX == 0);
17054   match(Set dst (SubF dst con));
17055   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17056   ins_cost(150);
17057   ins_encode %{
17058     __ subss($dst$$XMMRegister, $constantaddress($con));
17059   %}
17060   ins_pipe(pipe_slow);
17061 %}
17062 
17063 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17064   predicate(UseAVX > 0);
17065   match(Set dst (SubF src1 src2));
17066 
17067   format %{ "vsubss  $dst, $src1, $src2" %}
17068   ins_cost(150);
17069   ins_encode %{
17070     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17071   %}
17072   ins_pipe(pipe_slow);
17073 %}
17074 
17075 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17076   predicate(UseAVX > 0);
17077   match(Set dst (SubF src1 (LoadF src2)));
17078 
17079   format %{ "vsubss  $dst, $src1, $src2" %}
17080   ins_cost(150);
17081   ins_encode %{
17082     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17083   %}
17084   ins_pipe(pipe_slow);
17085 %}
17086 
17087 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17088   predicate(UseAVX > 0);
17089   match(Set dst (SubF src con));
17090 
17091   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17092   ins_cost(150);
17093   ins_encode %{
17094     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17095   %}
17096   ins_pipe(pipe_slow);
17097 %}
17098 
17099 instruct subD_reg(regD dst, regD src) %{
17100   predicate(UseAVX == 0);
17101   match(Set dst (SubD dst src));
17102 
17103   format %{ "subsd   $dst, $src" %}
17104   ins_cost(150);
17105   ins_encode %{
17106     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17107   %}
17108   ins_pipe(pipe_slow);
17109 %}
17110 
17111 instruct subD_mem(regD dst, memory src) %{
17112   predicate(UseAVX == 0);
17113   match(Set dst (SubD dst (LoadD src)));
17114 
17115   format %{ "subsd   $dst, $src" %}
17116   ins_cost(150);
17117   ins_encode %{
17118     __ subsd($dst$$XMMRegister, $src$$Address);
17119   %}
17120   ins_pipe(pipe_slow);
17121 %}
17122 
17123 instruct subD_imm(regD dst, immD con) %{
17124   predicate(UseAVX == 0);
17125   match(Set dst (SubD dst con));
17126   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17127   ins_cost(150);
17128   ins_encode %{
17129     __ subsd($dst$$XMMRegister, $constantaddress($con));
17130   %}
17131   ins_pipe(pipe_slow);
17132 %}
17133 
17134 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17135   predicate(UseAVX > 0);
17136   match(Set dst (SubD src1 src2));
17137 
17138   format %{ "vsubsd  $dst, $src1, $src2" %}
17139   ins_cost(150);
17140   ins_encode %{
17141     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17142   %}
17143   ins_pipe(pipe_slow);
17144 %}
17145 
17146 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17147   predicate(UseAVX > 0);
17148   match(Set dst (SubD src1 (LoadD src2)));
17149 
17150   format %{ "vsubsd  $dst, $src1, $src2" %}
17151   ins_cost(150);
17152   ins_encode %{
17153     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17154   %}
17155   ins_pipe(pipe_slow);
17156 %}
17157 
17158 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17159   predicate(UseAVX > 0);
17160   match(Set dst (SubD src con));
17161 
17162   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17163   ins_cost(150);
17164   ins_encode %{
17165     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17166   %}
17167   ins_pipe(pipe_slow);
17168 %}
17169 
17170 instruct mulF_reg(regF dst, regF src) %{
17171   predicate(UseAVX == 0);
17172   match(Set dst (MulF dst src));
17173 
17174   format %{ "mulss   $dst, $src" %}
17175   ins_cost(150);
17176   ins_encode %{
17177     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17178   %}
17179   ins_pipe(pipe_slow);
17180 %}
17181 
17182 instruct mulF_mem(regF dst, memory src) %{
17183   predicate(UseAVX == 0);
17184   match(Set dst (MulF dst (LoadF src)));
17185 
17186   format %{ "mulss   $dst, $src" %}
17187   ins_cost(150);
17188   ins_encode %{
17189     __ mulss($dst$$XMMRegister, $src$$Address);
17190   %}
17191   ins_pipe(pipe_slow);
17192 %}
17193 
17194 instruct mulF_imm(regF dst, immF con) %{
17195   predicate(UseAVX == 0);
17196   match(Set dst (MulF dst con));
17197   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17198   ins_cost(150);
17199   ins_encode %{
17200     __ mulss($dst$$XMMRegister, $constantaddress($con));
17201   %}
17202   ins_pipe(pipe_slow);
17203 %}
17204 
17205 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17206   predicate(UseAVX > 0);
17207   match(Set dst (MulF src1 src2));
17208 
17209   format %{ "vmulss  $dst, $src1, $src2" %}
17210   ins_cost(150);
17211   ins_encode %{
17212     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17213   %}
17214   ins_pipe(pipe_slow);
17215 %}
17216 
17217 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17218   predicate(UseAVX > 0);
17219   match(Set dst (MulF src1 (LoadF src2)));
17220 
17221   format %{ "vmulss  $dst, $src1, $src2" %}
17222   ins_cost(150);
17223   ins_encode %{
17224     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17225   %}
17226   ins_pipe(pipe_slow);
17227 %}
17228 
17229 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17230   predicate(UseAVX > 0);
17231   match(Set dst (MulF src con));
17232 
17233   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17234   ins_cost(150);
17235   ins_encode %{
17236     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17237   %}
17238   ins_pipe(pipe_slow);
17239 %}
17240 
17241 instruct mulD_reg(regD dst, regD src) %{
17242   predicate(UseAVX == 0);
17243   match(Set dst (MulD dst src));
17244 
17245   format %{ "mulsd   $dst, $src" %}
17246   ins_cost(150);
17247   ins_encode %{
17248     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17249   %}
17250   ins_pipe(pipe_slow);
17251 %}
17252 
17253 instruct mulD_mem(regD dst, memory src) %{
17254   predicate(UseAVX == 0);
17255   match(Set dst (MulD dst (LoadD src)));
17256 
17257   format %{ "mulsd   $dst, $src" %}
17258   ins_cost(150);
17259   ins_encode %{
17260     __ mulsd($dst$$XMMRegister, $src$$Address);
17261   %}
17262   ins_pipe(pipe_slow);
17263 %}
17264 
17265 instruct mulD_imm(regD dst, immD con) %{
17266   predicate(UseAVX == 0);
17267   match(Set dst (MulD dst con));
17268   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17269   ins_cost(150);
17270   ins_encode %{
17271     __ mulsd($dst$$XMMRegister, $constantaddress($con));
17272   %}
17273   ins_pipe(pipe_slow);
17274 %}
17275 
17276 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17277   predicate(UseAVX > 0);
17278   match(Set dst (MulD src1 src2));
17279 
17280   format %{ "vmulsd  $dst, $src1, $src2" %}
17281   ins_cost(150);
17282   ins_encode %{
17283     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17284   %}
17285   ins_pipe(pipe_slow);
17286 %}
17287 
17288 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17289   predicate(UseAVX > 0);
17290   match(Set dst (MulD src1 (LoadD src2)));
17291 
17292   format %{ "vmulsd  $dst, $src1, $src2" %}
17293   ins_cost(150);
17294   ins_encode %{
17295     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17296   %}
17297   ins_pipe(pipe_slow);
17298 %}
17299 
17300 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17301   predicate(UseAVX > 0);
17302   match(Set dst (MulD src con));
17303 
17304   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17305   ins_cost(150);
17306   ins_encode %{
17307     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17308   %}
17309   ins_pipe(pipe_slow);
17310 %}
17311 
17312 instruct divF_reg(regF dst, regF src) %{
17313   predicate(UseAVX == 0);
17314   match(Set dst (DivF dst src));
17315 
17316   format %{ "divss   $dst, $src" %}
17317   ins_cost(150);
17318   ins_encode %{
17319     __ divss($dst$$XMMRegister, $src$$XMMRegister);
17320   %}
17321   ins_pipe(pipe_slow);
17322 %}
17323 
17324 instruct divF_mem(regF dst, memory src) %{
17325   predicate(UseAVX == 0);
17326   match(Set dst (DivF dst (LoadF src)));
17327 
17328   format %{ "divss   $dst, $src" %}
17329   ins_cost(150);
17330   ins_encode %{
17331     __ divss($dst$$XMMRegister, $src$$Address);
17332   %}
17333   ins_pipe(pipe_slow);
17334 %}
17335 
17336 instruct divF_imm(regF dst, immF con) %{
17337   predicate(UseAVX == 0);
17338   match(Set dst (DivF dst con));
17339   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17340   ins_cost(150);
17341   ins_encode %{
17342     __ divss($dst$$XMMRegister, $constantaddress($con));
17343   %}
17344   ins_pipe(pipe_slow);
17345 %}
17346 
17347 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17348   predicate(UseAVX > 0);
17349   match(Set dst (DivF src1 src2));
17350 
17351   format %{ "vdivss  $dst, $src1, $src2" %}
17352   ins_cost(150);
17353   ins_encode %{
17354     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17355   %}
17356   ins_pipe(pipe_slow);
17357 %}
17358 
17359 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17360   predicate(UseAVX > 0);
17361   match(Set dst (DivF src1 (LoadF src2)));
17362 
17363   format %{ "vdivss  $dst, $src1, $src2" %}
17364   ins_cost(150);
17365   ins_encode %{
17366     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17367   %}
17368   ins_pipe(pipe_slow);
17369 %}
17370 
17371 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17372   predicate(UseAVX > 0);
17373   match(Set dst (DivF src con));
17374 
17375   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17376   ins_cost(150);
17377   ins_encode %{
17378     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17379   %}
17380   ins_pipe(pipe_slow);
17381 %}
17382 
17383 instruct divD_reg(regD dst, regD src) %{
17384   predicate(UseAVX == 0);
17385   match(Set dst (DivD dst src));
17386 
17387   format %{ "divsd   $dst, $src" %}
17388   ins_cost(150);
17389   ins_encode %{
17390     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17391   %}
17392   ins_pipe(pipe_slow);
17393 %}
17394 
17395 instruct divD_mem(regD dst, memory src) %{
17396   predicate(UseAVX == 0);
17397   match(Set dst (DivD dst (LoadD src)));
17398 
17399   format %{ "divsd   $dst, $src" %}
17400   ins_cost(150);
17401   ins_encode %{
17402     __ divsd($dst$$XMMRegister, $src$$Address);
17403   %}
17404   ins_pipe(pipe_slow);
17405 %}
17406 
17407 instruct divD_imm(regD dst, immD con) %{
17408   predicate(UseAVX == 0);
17409   match(Set dst (DivD dst con));
17410   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17411   ins_cost(150);
17412   ins_encode %{
17413     __ divsd($dst$$XMMRegister, $constantaddress($con));
17414   %}
17415   ins_pipe(pipe_slow);
17416 %}
17417 
17418 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17419   predicate(UseAVX > 0);
17420   match(Set dst (DivD src1 src2));
17421 
17422   format %{ "vdivsd  $dst, $src1, $src2" %}
17423   ins_cost(150);
17424   ins_encode %{
17425     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17426   %}
17427   ins_pipe(pipe_slow);
17428 %}
17429 
17430 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17431   predicate(UseAVX > 0);
17432   match(Set dst (DivD src1 (LoadD src2)));
17433 
17434   format %{ "vdivsd  $dst, $src1, $src2" %}
17435   ins_cost(150);
17436   ins_encode %{
17437     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17438   %}
17439   ins_pipe(pipe_slow);
17440 %}
17441 
17442 instruct divD_reg_imm(regD dst, regD src, immD con) %{
17443   predicate(UseAVX > 0);
17444   match(Set dst (DivD src con));
17445 
17446   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17447   ins_cost(150);
17448   ins_encode %{
17449     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17450   %}
17451   ins_pipe(pipe_slow);
17452 %}
17453 
17454 instruct absF_reg(regF dst) %{
17455   predicate(UseAVX == 0);
17456   match(Set dst (AbsF dst));
17457   ins_cost(150);
17458   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
17459   ins_encode %{
17460     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
17461   %}
17462   ins_pipe(pipe_slow);
17463 %}
17464 
17465 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
17466   predicate(UseAVX > 0);
17467   match(Set dst (AbsF src));
17468   ins_cost(150);
17469   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
17470   ins_encode %{
17471     int vlen_enc = Assembler::AVX_128bit;
17472     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
17473               ExternalAddress(float_signmask()), vlen_enc);
17474   %}
17475   ins_pipe(pipe_slow);
17476 %}
17477 
17478 instruct absD_reg(regD dst) %{
17479   predicate(UseAVX == 0);
17480   match(Set dst (AbsD dst));
17481   ins_cost(150);
17482   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
17483             "# abs double by sign masking" %}
17484   ins_encode %{
17485     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
17486   %}
17487   ins_pipe(pipe_slow);
17488 %}
17489 
17490 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
17491   predicate(UseAVX > 0);
17492   match(Set dst (AbsD src));
17493   ins_cost(150);
17494   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
17495             "# abs double by sign masking" %}
17496   ins_encode %{
17497     int vlen_enc = Assembler::AVX_128bit;
17498     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
17499               ExternalAddress(double_signmask()), vlen_enc);
17500   %}
17501   ins_pipe(pipe_slow);
17502 %}
17503 
17504 instruct negF_reg(regF dst) %{
17505   predicate(UseAVX == 0);
17506   match(Set dst (NegF dst));
17507   ins_cost(150);
17508   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
17509   ins_encode %{
17510     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
17511   %}
17512   ins_pipe(pipe_slow);
17513 %}
17514 
17515 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
17516   predicate(UseAVX > 0);
17517   match(Set dst (NegF src));
17518   ins_cost(150);
17519   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
17520   ins_encode %{
17521     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
17522                  ExternalAddress(float_signflip()));
17523   %}
17524   ins_pipe(pipe_slow);
17525 %}
17526 
17527 instruct negD_reg(regD dst) %{
17528   predicate(UseAVX == 0);
17529   match(Set dst (NegD dst));
17530   ins_cost(150);
17531   format %{ "xorpd   $dst, [0x8000000000000000]\t"
17532             "# neg double by sign flipping" %}
17533   ins_encode %{
17534     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
17535   %}
17536   ins_pipe(pipe_slow);
17537 %}
17538 
17539 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
17540   predicate(UseAVX > 0);
17541   match(Set dst (NegD src));
17542   ins_cost(150);
17543   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
17544             "# neg double by sign flipping" %}
17545   ins_encode %{
17546     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
17547                  ExternalAddress(double_signflip()));
17548   %}
17549   ins_pipe(pipe_slow);
17550 %}
17551 
17552 // sqrtss instruction needs destination register to be pre initialized for best performance
17553 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17554 instruct sqrtF_reg(regF dst) %{
17555   match(Set dst (SqrtF dst));
17556   format %{ "sqrtss  $dst, $dst" %}
17557   ins_encode %{
17558     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
17559   %}
17560   ins_pipe(pipe_slow);
17561 %}
17562 
17563 // sqrtsd instruction needs destination register to be pre initialized for best performance
17564 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17565 instruct sqrtD_reg(regD dst) %{
17566   match(Set dst (SqrtD dst));
17567   format %{ "sqrtsd  $dst, $dst" %}
17568   ins_encode %{
17569     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
17570   %}
17571   ins_pipe(pipe_slow);
17572 %}
17573 
17574 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
17575   effect(TEMP tmp);
17576   match(Set dst (ConvF2HF src));
17577   ins_cost(125);
17578   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
17579   ins_encode %{
17580     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
17581   %}
17582   ins_pipe( pipe_slow );
17583 %}
17584 
17585 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
17586   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
17587   effect(TEMP ktmp, TEMP rtmp);
17588   match(Set mem (StoreC mem (ConvF2HF src)));
17589   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
17590   ins_encode %{
17591     __ movl($rtmp$$Register, 0x1);
17592     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
17593     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
17594   %}
17595   ins_pipe( pipe_slow );
17596 %}
17597 
17598 instruct vconvF2HF(vec dst, vec src) %{
17599   match(Set dst (VectorCastF2HF src));
17600   format %{ "vector_conv_F2HF $dst $src" %}
17601   ins_encode %{
17602     int vlen_enc = vector_length_encoding(this, $src);
17603     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
17604   %}
17605   ins_pipe( pipe_slow );
17606 %}
17607 
17608 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
17609   predicate(n->as_StoreVector()->memory_size() >= 16);
17610   match(Set mem (StoreVector mem (VectorCastF2HF src)));
17611   format %{ "vcvtps2ph $mem,$src" %}
17612   ins_encode %{
17613     int vlen_enc = vector_length_encoding(this, $src);
17614     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
17615   %}
17616   ins_pipe( pipe_slow );
17617 %}
17618 
17619 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
17620   match(Set dst (ConvHF2F src));
17621   format %{ "vcvtph2ps $dst,$src" %}
17622   ins_encode %{
17623     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
17624   %}
17625   ins_pipe( pipe_slow );
17626 %}
17627 
17628 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
17629   match(Set dst (VectorCastHF2F (LoadVector mem)));
17630   format %{ "vcvtph2ps $dst,$mem" %}
17631   ins_encode %{
17632     int vlen_enc = vector_length_encoding(this);
17633     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
17634   %}
17635   ins_pipe( pipe_slow );
17636 %}
17637 
17638 instruct vconvHF2F(vec dst, vec src) %{
17639   match(Set dst (VectorCastHF2F src));
17640   ins_cost(125);
17641   format %{ "vector_conv_HF2F $dst,$src" %}
17642   ins_encode %{
17643     int vlen_enc = vector_length_encoding(this);
17644     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
17645   %}
17646   ins_pipe( pipe_slow );
17647 %}
17648 
17649 // ---------------------------------------- VectorReinterpret ------------------------------------
17650 instruct reinterpret_mask(kReg dst) %{
17651   predicate(n->bottom_type()->isa_pvectmask() &&
17652             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
17653   match(Set dst (VectorReinterpret dst));
17654   ins_cost(125);
17655   format %{ "vector_reinterpret $dst\t!" %}
17656   ins_encode %{
17657     // empty
17658   %}
17659   ins_pipe( pipe_slow );
17660 %}
17661 
17662 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
17663   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17664             n->bottom_type()->isa_pvectmask() &&
17665             n->in(1)->bottom_type()->isa_pvectmask() &&
17666             n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_SHORT &&
17667             n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
17668   match(Set dst (VectorReinterpret src));
17669   effect(TEMP xtmp);
17670   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
17671   ins_encode %{
17672      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
17673      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17674      assert(src_sz == dst_sz , "src and dst size mismatch");
17675      int vlen_enc = vector_length_encoding(src_sz);
17676      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
17677      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
17678   %}
17679   ins_pipe( pipe_slow );
17680 %}
17681 
17682 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
17683   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17684             n->bottom_type()->isa_pvectmask() &&
17685             n->in(1)->bottom_type()->isa_pvectmask() &&
17686             (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_INT ||
17687              n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_FLOAT) &&
17688             n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
17689   match(Set dst (VectorReinterpret src));
17690   effect(TEMP xtmp);
17691   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
17692   ins_encode %{
17693      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
17694      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17695      assert(src_sz == dst_sz , "src and dst size mismatch");
17696      int vlen_enc = vector_length_encoding(src_sz);
17697      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
17698      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
17699   %}
17700   ins_pipe( pipe_slow );
17701 %}
17702 
17703 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
17704   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17705             n->bottom_type()->isa_pvectmask() &&
17706             n->in(1)->bottom_type()->isa_pvectmask() &&
17707             (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_LONG ||
17708              n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_DOUBLE) &&
17709             n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
17710   match(Set dst (VectorReinterpret src));
17711   effect(TEMP xtmp);
17712   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
17713   ins_encode %{
17714      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
17715      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17716      assert(src_sz == dst_sz , "src and dst size mismatch");
17717      int vlen_enc = vector_length_encoding(src_sz);
17718      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
17719      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
17720   %}
17721   ins_pipe( pipe_slow );
17722 %}
17723 
17724 instruct reinterpret(vec dst) %{
17725   predicate(!n->bottom_type()->isa_pvectmask() &&
17726             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
17727   match(Set dst (VectorReinterpret dst));
17728   ins_cost(125);
17729   format %{ "vector_reinterpret $dst\t!" %}
17730   ins_encode %{
17731     // empty
17732   %}
17733   ins_pipe( pipe_slow );
17734 %}
17735 
17736 instruct reinterpret_expand(vec dst, vec src) %{
17737   predicate(UseAVX == 0 &&
17738             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
17739   match(Set dst (VectorReinterpret src));
17740   ins_cost(125);
17741   effect(TEMP dst);
17742   format %{ "vector_reinterpret_expand $dst,$src" %}
17743   ins_encode %{
17744     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
17745     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
17746 
17747     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
17748     if (src_vlen_in_bytes == 4) {
17749       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
17750     } else {
17751       assert(src_vlen_in_bytes == 8, "");
17752       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
17753     }
17754     __ pand($dst$$XMMRegister, $src$$XMMRegister);
17755   %}
17756   ins_pipe( pipe_slow );
17757 %}
17758 
17759 instruct vreinterpret_expand4(legVec dst, vec src) %{
17760   predicate(UseAVX > 0 &&
17761             !n->bottom_type()->isa_pvectmask() &&
17762             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
17763             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
17764   match(Set dst (VectorReinterpret src));
17765   ins_cost(125);
17766   format %{ "vector_reinterpret_expand $dst,$src" %}
17767   ins_encode %{
17768     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
17769   %}
17770   ins_pipe( pipe_slow );
17771 %}
17772 
17773 
17774 instruct vreinterpret_expand(legVec dst, vec src) %{
17775   predicate(UseAVX > 0 &&
17776             !n->bottom_type()->isa_pvectmask() &&
17777             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
17778             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
17779   match(Set dst (VectorReinterpret src));
17780   ins_cost(125);
17781   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
17782   ins_encode %{
17783     switch (Matcher::vector_length_in_bytes(this, $src)) {
17784       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
17785       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
17786       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
17787       default: ShouldNotReachHere();
17788     }
17789   %}
17790   ins_pipe( pipe_slow );
17791 %}
17792 
17793 instruct reinterpret_shrink(vec dst, legVec src) %{
17794   predicate(!n->bottom_type()->isa_pvectmask() &&
17795             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
17796   match(Set dst (VectorReinterpret src));
17797   ins_cost(125);
17798   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
17799   ins_encode %{
17800     switch (Matcher::vector_length_in_bytes(this)) {
17801       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
17802       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
17803       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
17804       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
17805       default: ShouldNotReachHere();
17806     }
17807   %}
17808   ins_pipe( pipe_slow );
17809 %}
17810 
17811 // ----------------------------------------------------------------------------------------------------
17812 
17813 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
17814   match(Set dst (RoundDoubleMode src rmode));
17815   format %{ "roundsd $dst,$src" %}
17816   ins_cost(150);
17817   ins_encode %{
17818     assert(UseSSE >= 4, "required");
17819     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
17820       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
17821     }
17822     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
17823   %}
17824   ins_pipe(pipe_slow);
17825 %}
17826 
17827 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
17828   match(Set dst (RoundDoubleMode con rmode));
17829   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
17830   ins_cost(150);
17831   ins_encode %{
17832     assert(UseSSE >= 4, "required");
17833     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
17834   %}
17835   ins_pipe(pipe_slow);
17836 %}
17837 
17838 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
17839   predicate(Matcher::vector_length(n) < 8);
17840   match(Set dst (RoundDoubleModeV src rmode));
17841   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
17842   ins_encode %{
17843     assert(UseAVX > 0, "required");
17844     int vlen_enc = vector_length_encoding(this);
17845     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
17846   %}
17847   ins_pipe( pipe_slow );
17848 %}
17849 
17850 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
17851   predicate(Matcher::vector_length(n) == 8);
17852   match(Set dst (RoundDoubleModeV src rmode));
17853   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
17854   ins_encode %{
17855     assert(UseAVX > 2, "required");
17856     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
17857   %}
17858   ins_pipe( pipe_slow );
17859 %}
17860 
17861 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
17862   predicate(Matcher::vector_length(n) < 8);
17863   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
17864   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
17865   ins_encode %{
17866     assert(UseAVX > 0, "required");
17867     int vlen_enc = vector_length_encoding(this);
17868     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
17869   %}
17870   ins_pipe( pipe_slow );
17871 %}
17872 
17873 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
17874   predicate(Matcher::vector_length(n) == 8);
17875   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
17876   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
17877   ins_encode %{
17878     assert(UseAVX > 2, "required");
17879     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
17880   %}
17881   ins_pipe( pipe_slow );
17882 %}
17883 
17884 instruct onspinwait() %{
17885   match(OnSpinWait);
17886   ins_cost(200);
17887 
17888   format %{
17889     $$template
17890     $$emit$$"pause\t! membar_onspinwait"
17891   %}
17892   ins_encode %{
17893     __ pause();
17894   %}
17895   ins_pipe(pipe_slow);
17896 %}
17897 
17898 // a * b + c
17899 instruct fmaD_reg(regD a, regD b, regD c) %{
17900   match(Set c (FmaD  c (Binary a b)));
17901   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
17902   ins_cost(150);
17903   ins_encode %{
17904     assert(UseFMA, "Needs FMA instructions support.");
17905     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
17906   %}
17907   ins_pipe( pipe_slow );
17908 %}
17909 
17910 // a * b + c
17911 instruct fmaF_reg(regF a, regF b, regF c) %{
17912   match(Set c (FmaF  c (Binary a b)));
17913   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
17914   ins_cost(150);
17915   ins_encode %{
17916     assert(UseFMA, "Needs FMA instructions support.");
17917     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
17918   %}
17919   ins_pipe( pipe_slow );
17920 %}
17921 
17922 // ====================VECTOR INSTRUCTIONS=====================================
17923 
17924 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
17925 instruct MoveVec2Leg(legVec dst, vec src) %{
17926   match(Set dst src);
17927   format %{ "" %}
17928   ins_encode %{
17929     ShouldNotReachHere();
17930   %}
17931   ins_pipe( fpu_reg_reg );
17932 %}
17933 
17934 instruct MoveLeg2Vec(vec dst, legVec src) %{
17935   match(Set dst src);
17936   format %{ "" %}
17937   ins_encode %{
17938     ShouldNotReachHere();
17939   %}
17940   ins_pipe( fpu_reg_reg );
17941 %}
17942 
17943 // ============================================================================
17944 
17945 // Load vectors generic operand pattern
17946 instruct loadV(vec dst, memory mem) %{
17947   match(Set dst (LoadVector mem));
17948   ins_cost(125);
17949   format %{ "load_vector $dst,$mem" %}
17950   ins_encode %{
17951     BasicType bt = Matcher::vector_element_basic_type(this);
17952     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
17953   %}
17954   ins_pipe( pipe_slow );
17955 %}
17956 
17957 // Store vectors generic operand pattern.
17958 instruct storeV(memory mem, vec src) %{
17959   match(Set mem (StoreVector mem src));
17960   ins_cost(145);
17961   format %{ "store_vector $mem,$src\n\t" %}
17962   ins_encode %{
17963     switch (Matcher::vector_length_in_bytes(this, $src)) {
17964       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
17965       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
17966       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
17967       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
17968       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
17969       default: ShouldNotReachHere();
17970     }
17971   %}
17972   ins_pipe( pipe_slow );
17973 %}
17974 
17975 // ---------------------------------------- Gather ------------------------------------
17976 
17977 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
17978 
17979 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
17980   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
17981             Matcher::vector_length_in_bytes(n) <= 32);
17982   match(Set dst (LoadVectorGather mem idx));
17983   effect(TEMP dst, TEMP tmp, TEMP mask);
17984   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
17985   ins_encode %{
17986     int vlen_enc = vector_length_encoding(this);
17987     BasicType elem_bt = Matcher::vector_element_basic_type(this);
17988     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
17989     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
17990     __ lea($tmp$$Register, $mem$$Address);
17991     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
17992   %}
17993   ins_pipe( pipe_slow );
17994 %}
17995 
17996 
17997 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
17998   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
17999             !is_subword_type(Matcher::vector_element_basic_type(n)));
18000   match(Set dst (LoadVectorGather mem idx));
18001   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18002   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18003   ins_encode %{
18004     int vlen_enc = vector_length_encoding(this);
18005     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18006     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18007     __ lea($tmp$$Register, $mem$$Address);
18008     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18009   %}
18010   ins_pipe( pipe_slow );
18011 %}
18012 
18013 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18014   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18015             !is_subword_type(Matcher::vector_element_basic_type(n)));
18016   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18017   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18018   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18019   ins_encode %{
18020     assert(UseAVX > 2, "sanity");
18021     int vlen_enc = vector_length_encoding(this);
18022     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18023     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18024     // Note: Since gather instruction partially updates the opmask register used
18025     // for predication hense moving mask operand to a temporary.
18026     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18027     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18028     __ lea($tmp$$Register, $mem$$Address);
18029     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18030   %}
18031   ins_pipe( pipe_slow );
18032 %}
18033 
18034 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18035   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18036   match(Set dst (LoadVectorGather mem idx_base));
18037   effect(TEMP tmp, TEMP rtmp);
18038   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18039   ins_encode %{
18040     int vlen_enc = vector_length_encoding(this);
18041     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18042     __ lea($tmp$$Register, $mem$$Address);
18043     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18044   %}
18045   ins_pipe( pipe_slow );
18046 %}
18047 
18048 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18049                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18050   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18051   match(Set dst (LoadVectorGather mem idx_base));
18052   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18053   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18054   ins_encode %{
18055     int vlen_enc = vector_length_encoding(this);
18056     int vector_len = Matcher::vector_length(this);
18057     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18058     __ lea($tmp$$Register, $mem$$Address);
18059     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18060     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18061                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18062   %}
18063   ins_pipe( pipe_slow );
18064 %}
18065 
18066 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18067   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18068   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18069   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18070   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18071   ins_encode %{
18072     int vlen_enc = vector_length_encoding(this);
18073     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18074     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18075     __ lea($tmp$$Register, $mem$$Address);
18076     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18077     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18078   %}
18079   ins_pipe( pipe_slow );
18080 %}
18081 
18082 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18083                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18084   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18085   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18086   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18087   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18088   ins_encode %{
18089     int vlen_enc = vector_length_encoding(this);
18090     int vector_len = Matcher::vector_length(this);
18091     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18092     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18093     __ lea($tmp$$Register, $mem$$Address);
18094     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18095     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18096     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18097                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18098   %}
18099   ins_pipe( pipe_slow );
18100 %}
18101 
18102 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18103   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18104   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18105   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18106   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18107   ins_encode %{
18108     int vlen_enc = vector_length_encoding(this);
18109     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18110     __ lea($tmp$$Register, $mem$$Address);
18111     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18112     if (elem_bt == T_SHORT) {
18113       __ movl($mask_idx$$Register, 0x55555555);
18114       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18115     }
18116     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18117     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18118   %}
18119   ins_pipe( pipe_slow );
18120 %}
18121 
18122 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18123                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18124   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18125   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18126   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18127   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18128   ins_encode %{
18129     int vlen_enc = vector_length_encoding(this);
18130     int vector_len = Matcher::vector_length(this);
18131     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18132     __ lea($tmp$$Register, $mem$$Address);
18133     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18134     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18135     if (elem_bt == T_SHORT) {
18136       __ movl($mask_idx$$Register, 0x55555555);
18137       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18138     }
18139     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18140     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18141                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18142   %}
18143   ins_pipe( pipe_slow );
18144 %}
18145 
18146 // ====================Scatter=======================================
18147 
18148 // Scatter INT, LONG, FLOAT, DOUBLE
18149 
18150 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18151   predicate(UseAVX > 2);
18152   match(Set mem (StoreVectorScatter mem (Binary src idx)));
18153   effect(TEMP tmp, TEMP ktmp);
18154   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18155   ins_encode %{
18156     int vlen_enc = vector_length_encoding(this, $src);
18157     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18158 
18159     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18160     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18161 
18162     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18163     __ lea($tmp$$Register, $mem$$Address);
18164     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18165   %}
18166   ins_pipe( pipe_slow );
18167 %}
18168 
18169 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18170   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18171   effect(TEMP tmp, TEMP ktmp);
18172   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18173   ins_encode %{
18174     int vlen_enc = vector_length_encoding(this, $src);
18175     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18176     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18177     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18178     // Note: Since scatter instruction partially updates the opmask register used
18179     // for predication hense moving mask operand to a temporary.
18180     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18181     __ lea($tmp$$Register, $mem$$Address);
18182     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18183   %}
18184   ins_pipe( pipe_slow );
18185 %}
18186 
18187 // ====================REPLICATE=======================================
18188 
18189 // Replicate byte scalar to be vector
18190 instruct vReplB_reg(vec dst, rRegI src) %{
18191   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18192   match(Set dst (Replicate src));
18193   format %{ "replicateB $dst,$src" %}
18194   ins_encode %{
18195     uint vlen = Matcher::vector_length(this);
18196     if (UseAVX >= 2) {
18197       int vlen_enc = vector_length_encoding(this);
18198       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18199         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18200         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18201       } else {
18202         __ movdl($dst$$XMMRegister, $src$$Register);
18203         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18204       }
18205     } else {
18206        assert(UseAVX < 2, "");
18207       __ movdl($dst$$XMMRegister, $src$$Register);
18208       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18209       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18210       if (vlen >= 16) {
18211         assert(vlen == 16, "");
18212         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18213       }
18214     }
18215   %}
18216   ins_pipe( pipe_slow );
18217 %}
18218 
18219 instruct ReplB_mem(vec dst, memory mem) %{
18220   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18221   match(Set dst (Replicate (LoadB mem)));
18222   format %{ "replicateB $dst,$mem" %}
18223   ins_encode %{
18224     int vlen_enc = vector_length_encoding(this);
18225     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18226   %}
18227   ins_pipe( pipe_slow );
18228 %}
18229 
18230 // ====================ReplicateS=======================================
18231 
18232 instruct vReplS_reg(vec dst, rRegI src) %{
18233   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18234   match(Set dst (Replicate src));
18235   format %{ "replicateS $dst,$src" %}
18236   ins_encode %{
18237     uint vlen = Matcher::vector_length(this);
18238     int vlen_enc = vector_length_encoding(this);
18239     if (UseAVX >= 2) {
18240       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18241         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18242         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18243       } else {
18244         __ movdl($dst$$XMMRegister, $src$$Register);
18245         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18246       }
18247     } else {
18248       assert(UseAVX < 2, "");
18249       __ movdl($dst$$XMMRegister, $src$$Register);
18250       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18251       if (vlen >= 8) {
18252         assert(vlen == 8, "");
18253         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18254       }
18255     }
18256   %}
18257   ins_pipe( pipe_slow );
18258 %}
18259 
18260 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18261   match(Set dst (Replicate con));
18262   effect(TEMP rtmp);
18263   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18264   ins_encode %{
18265     int vlen_enc = vector_length_encoding(this);
18266     BasicType bt = Matcher::vector_element_basic_type(this);
18267     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18268     __ movl($rtmp$$Register, $con$$constant);
18269     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18270   %}
18271   ins_pipe( pipe_slow );
18272 %}
18273 
18274 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18275   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18276   match(Set dst (Replicate src));
18277   effect(TEMP rtmp);
18278   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18279   ins_encode %{
18280     int vlen_enc = vector_length_encoding(this);
18281     __ evmovw($rtmp$$Register, $src$$XMMRegister);
18282     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18283   %}
18284   ins_pipe( pipe_slow );
18285 %}
18286 
18287 instruct ReplS_mem(vec dst, memory mem) %{
18288   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18289   match(Set dst (Replicate (LoadS mem)));
18290   format %{ "replicateS $dst,$mem" %}
18291   ins_encode %{
18292     int vlen_enc = vector_length_encoding(this);
18293     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18294   %}
18295   ins_pipe( pipe_slow );
18296 %}
18297 
18298 // ====================ReplicateI=======================================
18299 
18300 instruct ReplI_reg(vec dst, rRegI src) %{
18301   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18302   match(Set dst (Replicate src));
18303   format %{ "replicateI $dst,$src" %}
18304   ins_encode %{
18305     uint vlen = Matcher::vector_length(this);
18306     int vlen_enc = vector_length_encoding(this);
18307     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18308       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18309     } else if (VM_Version::supports_avx2()) {
18310       __ movdl($dst$$XMMRegister, $src$$Register);
18311       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18312     } else {
18313       __ movdl($dst$$XMMRegister, $src$$Register);
18314       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18315     }
18316   %}
18317   ins_pipe( pipe_slow );
18318 %}
18319 
18320 instruct ReplI_mem(vec dst, memory mem) %{
18321   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18322   match(Set dst (Replicate (LoadI mem)));
18323   format %{ "replicateI $dst,$mem" %}
18324   ins_encode %{
18325     int vlen_enc = vector_length_encoding(this);
18326     if (VM_Version::supports_avx2()) {
18327       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18328     } else if (VM_Version::supports_avx()) {
18329       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18330     } else {
18331       __ movdl($dst$$XMMRegister, $mem$$Address);
18332       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18333     }
18334   %}
18335   ins_pipe( pipe_slow );
18336 %}
18337 
18338 instruct ReplI_imm(vec dst, immI con) %{
18339   predicate(Matcher::is_non_long_integral_vector(n));
18340   match(Set dst (Replicate con));
18341   format %{ "replicateI $dst,$con" %}
18342   ins_encode %{
18343     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18344                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18345                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
18346     BasicType bt = Matcher::vector_element_basic_type(this);
18347     int vlen = Matcher::vector_length_in_bytes(this);
18348     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18349   %}
18350   ins_pipe( pipe_slow );
18351 %}
18352 
18353 // Replicate scalar zero to be vector
18354 instruct ReplI_zero(vec dst, immI_0 zero) %{
18355   predicate(Matcher::is_non_long_integral_vector(n));
18356   match(Set dst (Replicate zero));
18357   format %{ "replicateI $dst,$zero" %}
18358   ins_encode %{
18359     int vlen_enc = vector_length_encoding(this);
18360     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18361       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18362     } else {
18363       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18364     }
18365   %}
18366   ins_pipe( fpu_reg_reg );
18367 %}
18368 
18369 instruct ReplI_M1(vec dst, immI_M1 con) %{
18370   predicate(Matcher::is_non_long_integral_vector(n));
18371   match(Set dst (Replicate con));
18372   format %{ "vallones $dst" %}
18373   ins_encode %{
18374     int vector_len = vector_length_encoding(this);
18375     __ vallones($dst$$XMMRegister, vector_len);
18376   %}
18377   ins_pipe( pipe_slow );
18378 %}
18379 
18380 // ====================ReplicateL=======================================
18381 
18382 // Replicate long (8 byte) scalar to be vector
18383 instruct ReplL_reg(vec dst, rRegL src) %{
18384   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18385   match(Set dst (Replicate src));
18386   format %{ "replicateL $dst,$src" %}
18387   ins_encode %{
18388     int vlen = Matcher::vector_length(this);
18389     int vlen_enc = vector_length_encoding(this);
18390     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18391       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18392     } else if (VM_Version::supports_avx2()) {
18393       __ movdq($dst$$XMMRegister, $src$$Register);
18394       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18395     } else {
18396       __ movdq($dst$$XMMRegister, $src$$Register);
18397       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18398     }
18399   %}
18400   ins_pipe( pipe_slow );
18401 %}
18402 
18403 instruct ReplL_mem(vec dst, memory mem) %{
18404   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18405   match(Set dst (Replicate (LoadL mem)));
18406   format %{ "replicateL $dst,$mem" %}
18407   ins_encode %{
18408     int vlen_enc = vector_length_encoding(this);
18409     if (VM_Version::supports_avx2()) {
18410       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18411     } else if (VM_Version::supports_sse3()) {
18412       __ movddup($dst$$XMMRegister, $mem$$Address);
18413     } else {
18414       __ movq($dst$$XMMRegister, $mem$$Address);
18415       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18416     }
18417   %}
18418   ins_pipe( pipe_slow );
18419 %}
18420 
18421 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18422 instruct ReplL_imm(vec dst, immL con) %{
18423   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18424   match(Set dst (Replicate con));
18425   format %{ "replicateL $dst,$con" %}
18426   ins_encode %{
18427     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18428     int vlen = Matcher::vector_length_in_bytes(this);
18429     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18430   %}
18431   ins_pipe( pipe_slow );
18432 %}
18433 
18434 instruct ReplL_zero(vec dst, immL0 zero) %{
18435   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18436   match(Set dst (Replicate zero));
18437   format %{ "replicateL $dst,$zero" %}
18438   ins_encode %{
18439     int vlen_enc = vector_length_encoding(this);
18440     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18441       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18442     } else {
18443       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18444     }
18445   %}
18446   ins_pipe( fpu_reg_reg );
18447 %}
18448 
18449 instruct ReplL_M1(vec dst, immL_M1 con) %{
18450   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18451   match(Set dst (Replicate con));
18452   format %{ "vallones $dst" %}
18453   ins_encode %{
18454     int vector_len = vector_length_encoding(this);
18455     __ vallones($dst$$XMMRegister, vector_len);
18456   %}
18457   ins_pipe( pipe_slow );
18458 %}
18459 
18460 // ====================ReplicateF=======================================
18461 
18462 instruct vReplF_reg(vec dst, vlRegF src) %{
18463   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18464   match(Set dst (Replicate src));
18465   format %{ "replicateF $dst,$src" %}
18466   ins_encode %{
18467     uint vlen = Matcher::vector_length(this);
18468     int vlen_enc = vector_length_encoding(this);
18469     if (vlen <= 4) {
18470       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18471     } else if (VM_Version::supports_avx2()) {
18472       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18473     } else {
18474       assert(vlen == 8, "sanity");
18475       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18476       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18477     }
18478   %}
18479   ins_pipe( pipe_slow );
18480 %}
18481 
18482 instruct ReplF_reg(vec dst, vlRegF src) %{
18483   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18484   match(Set dst (Replicate src));
18485   format %{ "replicateF $dst,$src" %}
18486   ins_encode %{
18487     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
18488   %}
18489   ins_pipe( pipe_slow );
18490 %}
18491 
18492 instruct ReplF_mem(vec dst, memory mem) %{
18493   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18494   match(Set dst (Replicate (LoadF mem)));
18495   format %{ "replicateF $dst,$mem" %}
18496   ins_encode %{
18497     int vlen_enc = vector_length_encoding(this);
18498     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18499   %}
18500   ins_pipe( pipe_slow );
18501 %}
18502 
18503 // Replicate float scalar immediate to be vector by loading from const table.
18504 instruct ReplF_imm(vec dst, immF con) %{
18505   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18506   match(Set dst (Replicate con));
18507   format %{ "replicateF $dst,$con" %}
18508   ins_encode %{
18509     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
18510                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
18511     int vlen = Matcher::vector_length_in_bytes(this);
18512     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
18513   %}
18514   ins_pipe( pipe_slow );
18515 %}
18516 
18517 instruct ReplF_zero(vec dst, immF0 zero) %{
18518   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18519   match(Set dst (Replicate zero));
18520   format %{ "replicateF $dst,$zero" %}
18521   ins_encode %{
18522     int vlen_enc = vector_length_encoding(this);
18523     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18524       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18525     } else {
18526       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18527     }
18528   %}
18529   ins_pipe( fpu_reg_reg );
18530 %}
18531 
18532 // ====================ReplicateD=======================================
18533 
18534 // Replicate double (8 bytes) scalar to be vector
18535 instruct vReplD_reg(vec dst, vlRegD src) %{
18536   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18537   match(Set dst (Replicate src));
18538   format %{ "replicateD $dst,$src" %}
18539   ins_encode %{
18540     uint vlen = Matcher::vector_length(this);
18541     int vlen_enc = vector_length_encoding(this);
18542     if (vlen <= 2) {
18543       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18544     } else if (VM_Version::supports_avx2()) {
18545       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18546     } else {
18547       assert(vlen == 4, "sanity");
18548       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18549       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18550     }
18551   %}
18552   ins_pipe( pipe_slow );
18553 %}
18554 
18555 instruct ReplD_reg(vec dst, vlRegD src) %{
18556   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18557   match(Set dst (Replicate src));
18558   format %{ "replicateD $dst,$src" %}
18559   ins_encode %{
18560     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
18561   %}
18562   ins_pipe( pipe_slow );
18563 %}
18564 
18565 instruct ReplD_mem(vec dst, memory mem) %{
18566   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18567   match(Set dst (Replicate (LoadD mem)));
18568   format %{ "replicateD $dst,$mem" %}
18569   ins_encode %{
18570     if (Matcher::vector_length(this) >= 4) {
18571       int vlen_enc = vector_length_encoding(this);
18572       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18573     } else {
18574       __ movddup($dst$$XMMRegister, $mem$$Address);
18575     }
18576   %}
18577   ins_pipe( pipe_slow );
18578 %}
18579 
18580 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
18581 instruct ReplD_imm(vec dst, immD con) %{
18582   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18583   match(Set dst (Replicate con));
18584   format %{ "replicateD $dst,$con" %}
18585   ins_encode %{
18586     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18587     int vlen = Matcher::vector_length_in_bytes(this);
18588     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
18589   %}
18590   ins_pipe( pipe_slow );
18591 %}
18592 
18593 instruct ReplD_zero(vec dst, immD0 zero) %{
18594   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18595   match(Set dst (Replicate zero));
18596   format %{ "replicateD $dst,$zero" %}
18597   ins_encode %{
18598     int vlen_enc = vector_length_encoding(this);
18599     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18600       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18601     } else {
18602       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18603     }
18604   %}
18605   ins_pipe( fpu_reg_reg );
18606 %}
18607 
18608 // ====================VECTOR INSERT=======================================
18609 
18610 instruct insert(vec dst, rRegI val, immU8 idx) %{
18611   predicate(Matcher::vector_length_in_bytes(n) < 32);
18612   match(Set dst (VectorInsert (Binary dst val) idx));
18613   format %{ "vector_insert $dst,$val,$idx" %}
18614   ins_encode %{
18615     assert(UseSSE >= 4, "required");
18616     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
18617 
18618     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18619 
18620     assert(is_integral_type(elem_bt), "");
18621     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18622 
18623     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
18624   %}
18625   ins_pipe( pipe_slow );
18626 %}
18627 
18628 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
18629   predicate(Matcher::vector_length_in_bytes(n) == 32);
18630   match(Set dst (VectorInsert (Binary src val) idx));
18631   effect(TEMP vtmp);
18632   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18633   ins_encode %{
18634     int vlen_enc = Assembler::AVX_256bit;
18635     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18636     int elem_per_lane = 16/type2aelembytes(elem_bt);
18637     int log2epr = log2(elem_per_lane);
18638 
18639     assert(is_integral_type(elem_bt), "sanity");
18640     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18641 
18642     uint x_idx = $idx$$constant & right_n_bits(log2epr);
18643     uint y_idx = ($idx$$constant >> log2epr) & 1;
18644     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18645     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18646     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18647   %}
18648   ins_pipe( pipe_slow );
18649 %}
18650 
18651 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
18652   predicate(Matcher::vector_length_in_bytes(n) == 64);
18653   match(Set dst (VectorInsert (Binary src val) idx));
18654   effect(TEMP vtmp);
18655   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18656   ins_encode %{
18657     assert(UseAVX > 2, "sanity");
18658 
18659     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18660     int elem_per_lane = 16/type2aelembytes(elem_bt);
18661     int log2epr = log2(elem_per_lane);
18662 
18663     assert(is_integral_type(elem_bt), "");
18664     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18665 
18666     uint x_idx = $idx$$constant & right_n_bits(log2epr);
18667     uint y_idx = ($idx$$constant >> log2epr) & 3;
18668     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18669     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18670     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18671   %}
18672   ins_pipe( pipe_slow );
18673 %}
18674 
18675 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
18676   predicate(Matcher::vector_length(n) == 2);
18677   match(Set dst (VectorInsert (Binary dst val) idx));
18678   format %{ "vector_insert $dst,$val,$idx" %}
18679   ins_encode %{
18680     assert(UseSSE >= 4, "required");
18681     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
18682     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18683 
18684     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
18685   %}
18686   ins_pipe( pipe_slow );
18687 %}
18688 
18689 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
18690   predicate(Matcher::vector_length(n) == 4);
18691   match(Set dst (VectorInsert (Binary src val) idx));
18692   effect(TEMP vtmp);
18693   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18694   ins_encode %{
18695     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
18696     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18697 
18698     uint x_idx = $idx$$constant & right_n_bits(1);
18699     uint y_idx = ($idx$$constant >> 1) & 1;
18700     int vlen_enc = Assembler::AVX_256bit;
18701     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18702     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18703     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18704   %}
18705   ins_pipe( pipe_slow );
18706 %}
18707 
18708 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
18709   predicate(Matcher::vector_length(n) == 8);
18710   match(Set dst (VectorInsert (Binary src val) idx));
18711   effect(TEMP vtmp);
18712   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18713   ins_encode %{
18714     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
18715     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18716 
18717     uint x_idx = $idx$$constant & right_n_bits(1);
18718     uint y_idx = ($idx$$constant >> 1) & 3;
18719     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18720     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18721     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18722   %}
18723   ins_pipe( pipe_slow );
18724 %}
18725 
18726 instruct insertF(vec dst, regF val, immU8 idx) %{
18727   predicate(Matcher::vector_length(n) < 8);
18728   match(Set dst (VectorInsert (Binary dst val) idx));
18729   format %{ "vector_insert $dst,$val,$idx" %}
18730   ins_encode %{
18731     assert(UseSSE >= 4, "sanity");
18732 
18733     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
18734     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18735 
18736     uint x_idx = $idx$$constant & right_n_bits(2);
18737     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
18738   %}
18739   ins_pipe( pipe_slow );
18740 %}
18741 
18742 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
18743   predicate(Matcher::vector_length(n) >= 8);
18744   match(Set dst (VectorInsert (Binary src val) idx));
18745   effect(TEMP vtmp);
18746   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18747   ins_encode %{
18748     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
18749     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18750 
18751     int vlen = Matcher::vector_length(this);
18752     uint x_idx = $idx$$constant & right_n_bits(2);
18753     if (vlen == 8) {
18754       uint y_idx = ($idx$$constant >> 2) & 1;
18755       int vlen_enc = Assembler::AVX_256bit;
18756       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18757       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
18758       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18759     } else {
18760       assert(vlen == 16, "sanity");
18761       uint y_idx = ($idx$$constant >> 2) & 3;
18762       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18763       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
18764       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18765     }
18766   %}
18767   ins_pipe( pipe_slow );
18768 %}
18769 
18770 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
18771   predicate(Matcher::vector_length(n) == 2);
18772   match(Set dst (VectorInsert (Binary dst val) idx));
18773   effect(TEMP tmp);
18774   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
18775   ins_encode %{
18776     assert(UseSSE >= 4, "sanity");
18777     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
18778     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18779 
18780     __ movq($tmp$$Register, $val$$XMMRegister);
18781     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
18782   %}
18783   ins_pipe( pipe_slow );
18784 %}
18785 
18786 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
18787   predicate(Matcher::vector_length(n) == 4);
18788   match(Set dst (VectorInsert (Binary src val) idx));
18789   effect(TEMP vtmp, TEMP tmp);
18790   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
18791   ins_encode %{
18792     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
18793     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18794 
18795     uint x_idx = $idx$$constant & right_n_bits(1);
18796     uint y_idx = ($idx$$constant >> 1) & 1;
18797     int vlen_enc = Assembler::AVX_256bit;
18798     __ movq($tmp$$Register, $val$$XMMRegister);
18799     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18800     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
18801     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18802   %}
18803   ins_pipe( pipe_slow );
18804 %}
18805 
18806 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
18807   predicate(Matcher::vector_length(n) == 8);
18808   match(Set dst (VectorInsert (Binary src val) idx));
18809   effect(TEMP tmp, TEMP vtmp);
18810   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18811   ins_encode %{
18812     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
18813     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18814 
18815     uint x_idx = $idx$$constant & right_n_bits(1);
18816     uint y_idx = ($idx$$constant >> 1) & 3;
18817     __ movq($tmp$$Register, $val$$XMMRegister);
18818     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18819     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
18820     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18821   %}
18822   ins_pipe( pipe_slow );
18823 %}
18824 
18825 // ====================REDUCTION ARITHMETIC=======================================
18826 
18827 // =======================Int Reduction==========================================
18828 
18829 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
18830   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
18831   match(Set dst (AddReductionVI src1 src2));
18832   match(Set dst (MulReductionVI src1 src2));
18833   match(Set dst (AndReductionV  src1 src2));
18834   match(Set dst ( OrReductionV  src1 src2));
18835   match(Set dst (XorReductionV  src1 src2));
18836   match(Set dst (MinReductionV  src1 src2));
18837   match(Set dst (MaxReductionV  src1 src2));
18838   match(Set dst (UMinReductionV  src1 src2));
18839   match(Set dst (UMaxReductionV  src1 src2));
18840   effect(TEMP vtmp1, TEMP vtmp2);
18841   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
18842   ins_encode %{
18843     int opcode = this->ideal_Opcode();
18844     int vlen = Matcher::vector_length(this, $src2);
18845     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18846   %}
18847   ins_pipe( pipe_slow );
18848 %}
18849 
18850 // =======================Long Reduction==========================================
18851 
18852 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
18853   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
18854   match(Set dst (AddReductionVL src1 src2));
18855   match(Set dst (MulReductionVL src1 src2));
18856   match(Set dst (AndReductionV  src1 src2));
18857   match(Set dst ( OrReductionV  src1 src2));
18858   match(Set dst (XorReductionV  src1 src2));
18859   match(Set dst (MinReductionV  src1 src2));
18860   match(Set dst (MaxReductionV  src1 src2));
18861   match(Set dst (UMinReductionV  src1 src2));
18862   match(Set dst (UMaxReductionV  src1 src2));
18863   effect(TEMP vtmp1, TEMP vtmp2);
18864   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
18865   ins_encode %{
18866     int opcode = this->ideal_Opcode();
18867     int vlen = Matcher::vector_length(this, $src2);
18868     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18869   %}
18870   ins_pipe( pipe_slow );
18871 %}
18872 
18873 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
18874   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
18875   match(Set dst (AddReductionVL src1 src2));
18876   match(Set dst (MulReductionVL src1 src2));
18877   match(Set dst (AndReductionV  src1 src2));
18878   match(Set dst ( OrReductionV  src1 src2));
18879   match(Set dst (XorReductionV  src1 src2));
18880   match(Set dst (MinReductionV  src1 src2));
18881   match(Set dst (MaxReductionV  src1 src2));
18882   match(Set dst (UMinReductionV  src1 src2));
18883   match(Set dst (UMaxReductionV  src1 src2));
18884   effect(TEMP vtmp1, TEMP vtmp2);
18885   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
18886   ins_encode %{
18887     int opcode = this->ideal_Opcode();
18888     int vlen = Matcher::vector_length(this, $src2);
18889     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18890   %}
18891   ins_pipe( pipe_slow );
18892 %}
18893 
18894 // =======================Float Reduction==========================================
18895 
18896 instruct reductionF128(regF dst, vec src, vec vtmp) %{
18897   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
18898   match(Set dst (AddReductionVF dst src));
18899   match(Set dst (MulReductionVF dst src));
18900   effect(TEMP dst, TEMP vtmp);
18901   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
18902   ins_encode %{
18903     int opcode = this->ideal_Opcode();
18904     int vlen = Matcher::vector_length(this, $src);
18905     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
18906   %}
18907   ins_pipe( pipe_slow );
18908 %}
18909 
18910 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
18911   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
18912   match(Set dst (AddReductionVF dst src));
18913   match(Set dst (MulReductionVF dst src));
18914   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
18915   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
18916   ins_encode %{
18917     int opcode = this->ideal_Opcode();
18918     int vlen = Matcher::vector_length(this, $src);
18919     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18920   %}
18921   ins_pipe( pipe_slow );
18922 %}
18923 
18924 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
18925   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
18926   match(Set dst (AddReductionVF dst src));
18927   match(Set dst (MulReductionVF dst src));
18928   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
18929   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
18930   ins_encode %{
18931     int opcode = this->ideal_Opcode();
18932     int vlen = Matcher::vector_length(this, $src);
18933     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18934   %}
18935   ins_pipe( pipe_slow );
18936 %}
18937 
18938 
18939 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
18940   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
18941   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
18942   // src1 contains reduction identity
18943   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
18944   match(Set dst (AddReductionVF src1 src2));
18945   match(Set dst (MulReductionVF src1 src2));
18946   effect(TEMP dst);
18947   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
18948   ins_encode %{
18949     int opcode = this->ideal_Opcode();
18950     int vlen = Matcher::vector_length(this, $src2);
18951     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
18952   %}
18953   ins_pipe( pipe_slow );
18954 %}
18955 
18956 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
18957   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
18958   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
18959   // src1 contains reduction identity
18960   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
18961   match(Set dst (AddReductionVF src1 src2));
18962   match(Set dst (MulReductionVF src1 src2));
18963   effect(TEMP dst, TEMP vtmp);
18964   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
18965   ins_encode %{
18966     int opcode = this->ideal_Opcode();
18967     int vlen = Matcher::vector_length(this, $src2);
18968     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
18969   %}
18970   ins_pipe( pipe_slow );
18971 %}
18972 
18973 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
18974   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
18975   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
18976   // src1 contains reduction identity
18977   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
18978   match(Set dst (AddReductionVF src1 src2));
18979   match(Set dst (MulReductionVF src1 src2));
18980   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
18981   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
18982   ins_encode %{
18983     int opcode = this->ideal_Opcode();
18984     int vlen = Matcher::vector_length(this, $src2);
18985     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18986   %}
18987   ins_pipe( pipe_slow );
18988 %}
18989 
18990 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
18991   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
18992   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
18993   // src1 contains reduction identity
18994   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
18995   match(Set dst (AddReductionVF src1 src2));
18996   match(Set dst (MulReductionVF src1 src2));
18997   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
18998   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
18999   ins_encode %{
19000     int opcode = this->ideal_Opcode();
19001     int vlen = Matcher::vector_length(this, $src2);
19002     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19003   %}
19004   ins_pipe( pipe_slow );
19005 %}
19006 
19007 // =======================Double Reduction==========================================
19008 
19009 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19010   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19011   match(Set dst (AddReductionVD dst src));
19012   match(Set dst (MulReductionVD dst src));
19013   effect(TEMP dst, TEMP vtmp);
19014   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19015   ins_encode %{
19016     int opcode = this->ideal_Opcode();
19017     int vlen = Matcher::vector_length(this, $src);
19018     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19019 %}
19020   ins_pipe( pipe_slow );
19021 %}
19022 
19023 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19024   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19025   match(Set dst (AddReductionVD dst src));
19026   match(Set dst (MulReductionVD dst src));
19027   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19028   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19029   ins_encode %{
19030     int opcode = this->ideal_Opcode();
19031     int vlen = Matcher::vector_length(this, $src);
19032     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19033   %}
19034   ins_pipe( pipe_slow );
19035 %}
19036 
19037 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19038   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19039   match(Set dst (AddReductionVD dst src));
19040   match(Set dst (MulReductionVD dst src));
19041   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19042   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19043   ins_encode %{
19044     int opcode = this->ideal_Opcode();
19045     int vlen = Matcher::vector_length(this, $src);
19046     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19047   %}
19048   ins_pipe( pipe_slow );
19049 %}
19050 
19051 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19052   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19053   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19054   // src1 contains reduction identity
19055   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19056   match(Set dst (AddReductionVD src1 src2));
19057   match(Set dst (MulReductionVD src1 src2));
19058   effect(TEMP dst);
19059   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19060   ins_encode %{
19061     int opcode = this->ideal_Opcode();
19062     int vlen = Matcher::vector_length(this, $src2);
19063     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19064 %}
19065   ins_pipe( pipe_slow );
19066 %}
19067 
19068 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19069   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19070   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19071   // src1 contains reduction identity
19072   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19073   match(Set dst (AddReductionVD src1 src2));
19074   match(Set dst (MulReductionVD src1 src2));
19075   effect(TEMP dst, TEMP vtmp);
19076   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19077   ins_encode %{
19078     int opcode = this->ideal_Opcode();
19079     int vlen = Matcher::vector_length(this, $src2);
19080     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19081   %}
19082   ins_pipe( pipe_slow );
19083 %}
19084 
19085 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19086   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19087   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19088   // src1 contains reduction identity
19089   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19090   match(Set dst (AddReductionVD src1 src2));
19091   match(Set dst (MulReductionVD src1 src2));
19092   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19093   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19094   ins_encode %{
19095     int opcode = this->ideal_Opcode();
19096     int vlen = Matcher::vector_length(this, $src2);
19097     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19098   %}
19099   ins_pipe( pipe_slow );
19100 %}
19101 
19102 // =======================Byte Reduction==========================================
19103 
19104 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19105   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19106   match(Set dst (AddReductionVI src1 src2));
19107   match(Set dst (AndReductionV  src1 src2));
19108   match(Set dst ( OrReductionV  src1 src2));
19109   match(Set dst (XorReductionV  src1 src2));
19110   match(Set dst (MinReductionV  src1 src2));
19111   match(Set dst (MaxReductionV  src1 src2));
19112   match(Set dst (UMinReductionV  src1 src2));
19113   match(Set dst (UMaxReductionV  src1 src2));
19114   effect(TEMP vtmp1, TEMP vtmp2);
19115   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19116   ins_encode %{
19117     int opcode = this->ideal_Opcode();
19118     int vlen = Matcher::vector_length(this, $src2);
19119     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19120   %}
19121   ins_pipe( pipe_slow );
19122 %}
19123 
19124 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19125   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19126   match(Set dst (AddReductionVI src1 src2));
19127   match(Set dst (AndReductionV  src1 src2));
19128   match(Set dst ( OrReductionV  src1 src2));
19129   match(Set dst (XorReductionV  src1 src2));
19130   match(Set dst (MinReductionV  src1 src2));
19131   match(Set dst (MaxReductionV  src1 src2));
19132   match(Set dst (UMinReductionV  src1 src2));
19133   match(Set dst (UMaxReductionV  src1 src2));
19134   effect(TEMP vtmp1, TEMP vtmp2);
19135   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19136   ins_encode %{
19137     int opcode = this->ideal_Opcode();
19138     int vlen = Matcher::vector_length(this, $src2);
19139     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19140   %}
19141   ins_pipe( pipe_slow );
19142 %}
19143 
19144 // =======================Short Reduction==========================================
19145 
19146 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19147   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19148   match(Set dst (AddReductionVI src1 src2));
19149   match(Set dst (MulReductionVI src1 src2));
19150   match(Set dst (AndReductionV  src1 src2));
19151   match(Set dst ( OrReductionV  src1 src2));
19152   match(Set dst (XorReductionV  src1 src2));
19153   match(Set dst (MinReductionV  src1 src2));
19154   match(Set dst (MaxReductionV  src1 src2));
19155   match(Set dst (UMinReductionV  src1 src2));
19156   match(Set dst (UMaxReductionV  src1 src2));
19157   effect(TEMP vtmp1, TEMP vtmp2);
19158   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19159   ins_encode %{
19160     int opcode = this->ideal_Opcode();
19161     int vlen = Matcher::vector_length(this, $src2);
19162     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19163   %}
19164   ins_pipe( pipe_slow );
19165 %}
19166 
19167 // =======================Mul Reduction==========================================
19168 
19169 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19170   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19171             Matcher::vector_length(n->in(2)) <= 32); // src2
19172   match(Set dst (MulReductionVI src1 src2));
19173   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19174   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19175   ins_encode %{
19176     int opcode = this->ideal_Opcode();
19177     int vlen = Matcher::vector_length(this, $src2);
19178     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19179   %}
19180   ins_pipe( pipe_slow );
19181 %}
19182 
19183 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19184   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19185             Matcher::vector_length(n->in(2)) == 64); // src2
19186   match(Set dst (MulReductionVI src1 src2));
19187   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19188   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19189   ins_encode %{
19190     int opcode = this->ideal_Opcode();
19191     int vlen = Matcher::vector_length(this, $src2);
19192     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19193   %}
19194   ins_pipe( pipe_slow );
19195 %}
19196 
19197 //--------------------Min/Max Float Reduction --------------------
19198 // Float Min Reduction
19199 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19200                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19201   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19202             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19203              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19204             Matcher::vector_length(n->in(2)) == 2);
19205   match(Set dst (MinReductionV src1 src2));
19206   match(Set dst (MaxReductionV src1 src2));
19207   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19208   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19209   ins_encode %{
19210     assert(UseAVX > 0, "sanity");
19211 
19212     int opcode = this->ideal_Opcode();
19213     int vlen = Matcher::vector_length(this, $src2);
19214     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19215                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19216   %}
19217   ins_pipe( pipe_slow );
19218 %}
19219 
19220 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19221                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19222   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19223             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19224              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19225             Matcher::vector_length(n->in(2)) >= 4);
19226   match(Set dst (MinReductionV src1 src2));
19227   match(Set dst (MaxReductionV src1 src2));
19228   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19229   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19230   ins_encode %{
19231     assert(UseAVX > 0, "sanity");
19232 
19233     int opcode = this->ideal_Opcode();
19234     int vlen = Matcher::vector_length(this, $src2);
19235     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19236                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19237   %}
19238   ins_pipe( pipe_slow );
19239 %}
19240 
19241 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19242                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19243   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19244             Matcher::vector_length(n->in(2)) == 2);
19245   match(Set dst (MinReductionV dst src));
19246   match(Set dst (MaxReductionV dst src));
19247   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19248   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19249   ins_encode %{
19250     assert(UseAVX > 0, "sanity");
19251 
19252     int opcode = this->ideal_Opcode();
19253     int vlen = Matcher::vector_length(this, $src);
19254     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19255                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19256   %}
19257   ins_pipe( pipe_slow );
19258 %}
19259 
19260 
19261 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19262                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19263   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19264             Matcher::vector_length(n->in(2)) >= 4);
19265   match(Set dst (MinReductionV dst src));
19266   match(Set dst (MaxReductionV dst src));
19267   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19268   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19269   ins_encode %{
19270     assert(UseAVX > 0, "sanity");
19271 
19272     int opcode = this->ideal_Opcode();
19273     int vlen = Matcher::vector_length(this, $src);
19274     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19275                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19276   %}
19277   ins_pipe( pipe_slow );
19278 %}
19279 
19280 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19281   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19282             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19283              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19284             Matcher::vector_length(n->in(2)) == 2);
19285   match(Set dst (MinReductionV src1 src2));
19286   match(Set dst (MaxReductionV src1 src2));
19287   effect(TEMP dst, TEMP xtmp1);
19288   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19289   ins_encode %{
19290     int opcode = this->ideal_Opcode();
19291     int vlen = Matcher::vector_length(this, $src2);
19292     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19293                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19294   %}
19295   ins_pipe( pipe_slow );
19296 %}
19297 
19298 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19299   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19300             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19301              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19302             Matcher::vector_length(n->in(2)) >= 4);
19303   match(Set dst (MinReductionV src1 src2));
19304   match(Set dst (MaxReductionV src1 src2));
19305   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19306   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19307   ins_encode %{
19308     int opcode = this->ideal_Opcode();
19309     int vlen = Matcher::vector_length(this, $src2);
19310     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19311                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19312   %}
19313   ins_pipe( pipe_slow );
19314 %}
19315 
19316 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19317   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19318             Matcher::vector_length(n->in(2)) == 2);
19319   match(Set dst (MinReductionV dst src));
19320   match(Set dst (MaxReductionV dst src));
19321   effect(TEMP dst, TEMP xtmp1);
19322   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19323   ins_encode %{
19324     int opcode = this->ideal_Opcode();
19325     int vlen = Matcher::vector_length(this, $src);
19326     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19327                          $xtmp1$$XMMRegister);
19328   %}
19329   ins_pipe( pipe_slow );
19330 %}
19331 
19332 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19333   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19334             Matcher::vector_length(n->in(2)) >= 4);
19335   match(Set dst (MinReductionV dst src));
19336   match(Set dst (MaxReductionV dst src));
19337   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19338   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19339   ins_encode %{
19340     int opcode = this->ideal_Opcode();
19341     int vlen = Matcher::vector_length(this, $src);
19342     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19343                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19344   %}
19345   ins_pipe( pipe_slow );
19346 %}
19347 
19348 //--------------------Min Double Reduction --------------------
19349 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19350                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19351   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19352             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19353              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19354             Matcher::vector_length(n->in(2)) == 2);
19355   match(Set dst (MinReductionV src1 src2));
19356   match(Set dst (MaxReductionV src1 src2));
19357   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19358   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19359   ins_encode %{
19360     assert(UseAVX > 0, "sanity");
19361 
19362     int opcode = this->ideal_Opcode();
19363     int vlen = Matcher::vector_length(this, $src2);
19364     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19365                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19366   %}
19367   ins_pipe( pipe_slow );
19368 %}
19369 
19370 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19371                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19372   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19373             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19374              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19375             Matcher::vector_length(n->in(2)) >= 4);
19376   match(Set dst (MinReductionV src1 src2));
19377   match(Set dst (MaxReductionV src1 src2));
19378   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19379   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19380   ins_encode %{
19381     assert(UseAVX > 0, "sanity");
19382 
19383     int opcode = this->ideal_Opcode();
19384     int vlen = Matcher::vector_length(this, $src2);
19385     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19386                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19387   %}
19388   ins_pipe( pipe_slow );
19389 %}
19390 
19391 
19392 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19393                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19394   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19395             Matcher::vector_length(n->in(2)) == 2);
19396   match(Set dst (MinReductionV dst src));
19397   match(Set dst (MaxReductionV dst src));
19398   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19399   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19400   ins_encode %{
19401     assert(UseAVX > 0, "sanity");
19402 
19403     int opcode = this->ideal_Opcode();
19404     int vlen = Matcher::vector_length(this, $src);
19405     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19406                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19407   %}
19408   ins_pipe( pipe_slow );
19409 %}
19410 
19411 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19412                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19413   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19414             Matcher::vector_length(n->in(2)) >= 4);
19415   match(Set dst (MinReductionV dst src));
19416   match(Set dst (MaxReductionV dst src));
19417   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19418   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19419   ins_encode %{
19420     assert(UseAVX > 0, "sanity");
19421 
19422     int opcode = this->ideal_Opcode();
19423     int vlen = Matcher::vector_length(this, $src);
19424     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19425                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19426   %}
19427   ins_pipe( pipe_slow );
19428 %}
19429 
19430 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19431   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19432             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19433              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19434             Matcher::vector_length(n->in(2)) == 2);
19435   match(Set dst (MinReductionV src1 src2));
19436   match(Set dst (MaxReductionV src1 src2));
19437   effect(TEMP dst, TEMP xtmp1);
19438   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19439   ins_encode %{
19440     int opcode = this->ideal_Opcode();
19441     int vlen = Matcher::vector_length(this, $src2);
19442     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
19443                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
19444   %}
19445   ins_pipe( pipe_slow );
19446 %}
19447 
19448 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
19449   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19450             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19451              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19452             Matcher::vector_length(n->in(2)) >= 4);
19453   match(Set dst (MinReductionV src1 src2));
19454   match(Set dst (MaxReductionV src1 src2));
19455   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19456   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
19457   ins_encode %{
19458     int opcode = this->ideal_Opcode();
19459     int vlen = Matcher::vector_length(this, $src2);
19460     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19461                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19462   %}
19463   ins_pipe( pipe_slow );
19464 %}
19465 
19466 
19467 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
19468   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19469             Matcher::vector_length(n->in(2)) == 2);
19470   match(Set dst (MinReductionV dst src));
19471   match(Set dst (MaxReductionV dst src));
19472   effect(TEMP dst, TEMP xtmp1);
19473   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
19474   ins_encode %{
19475     int opcode = this->ideal_Opcode();
19476     int vlen = Matcher::vector_length(this, $src);
19477     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19478                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19479   %}
19480   ins_pipe( pipe_slow );
19481 %}
19482 
19483 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
19484   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19485             Matcher::vector_length(n->in(2)) >= 4);
19486   match(Set dst (MinReductionV dst src));
19487   match(Set dst (MaxReductionV dst src));
19488   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19489   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
19490   ins_encode %{
19491     int opcode = this->ideal_Opcode();
19492     int vlen = Matcher::vector_length(this, $src);
19493     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19494                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19495   %}
19496   ins_pipe( pipe_slow );
19497 %}
19498 
19499 // ====================VECTOR ARITHMETIC=======================================
19500 
19501 // --------------------------------- ADD --------------------------------------
19502 
19503 // Bytes vector add
19504 instruct vaddB(vec dst, vec src) %{
19505   predicate(UseAVX == 0);
19506   match(Set dst (AddVB dst src));
19507   format %{ "paddb   $dst,$src\t! add packedB" %}
19508   ins_encode %{
19509     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
19510   %}
19511   ins_pipe( pipe_slow );
19512 %}
19513 
19514 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
19515   predicate(UseAVX > 0);
19516   match(Set dst (AddVB src1 src2));
19517   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
19518   ins_encode %{
19519     int vlen_enc = vector_length_encoding(this);
19520     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19521   %}
19522   ins_pipe( pipe_slow );
19523 %}
19524 
19525 instruct vaddB_mem(vec dst, vec src, memory mem) %{
19526   predicate((UseAVX > 0) &&
19527             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19528   match(Set dst (AddVB src (LoadVector mem)));
19529   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
19530   ins_encode %{
19531     int vlen_enc = vector_length_encoding(this);
19532     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19533   %}
19534   ins_pipe( pipe_slow );
19535 %}
19536 
19537 // Shorts/Chars vector add
19538 instruct vaddS(vec dst, vec src) %{
19539   predicate(UseAVX == 0);
19540   match(Set dst (AddVS dst src));
19541   format %{ "paddw   $dst,$src\t! add packedS" %}
19542   ins_encode %{
19543     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
19544   %}
19545   ins_pipe( pipe_slow );
19546 %}
19547 
19548 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
19549   predicate(UseAVX > 0);
19550   match(Set dst (AddVS src1 src2));
19551   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
19552   ins_encode %{
19553     int vlen_enc = vector_length_encoding(this);
19554     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19555   %}
19556   ins_pipe( pipe_slow );
19557 %}
19558 
19559 instruct vaddS_mem(vec dst, vec src, memory mem) %{
19560   predicate((UseAVX > 0) &&
19561             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19562   match(Set dst (AddVS src (LoadVector mem)));
19563   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
19564   ins_encode %{
19565     int vlen_enc = vector_length_encoding(this);
19566     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19567   %}
19568   ins_pipe( pipe_slow );
19569 %}
19570 
19571 // Integers vector add
19572 instruct vaddI(vec dst, vec src) %{
19573   predicate(UseAVX == 0);
19574   match(Set dst (AddVI dst src));
19575   format %{ "paddd   $dst,$src\t! add packedI" %}
19576   ins_encode %{
19577     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
19578   %}
19579   ins_pipe( pipe_slow );
19580 %}
19581 
19582 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
19583   predicate(UseAVX > 0);
19584   match(Set dst (AddVI src1 src2));
19585   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
19586   ins_encode %{
19587     int vlen_enc = vector_length_encoding(this);
19588     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19589   %}
19590   ins_pipe( pipe_slow );
19591 %}
19592 
19593 
19594 instruct vaddI_mem(vec dst, vec src, memory mem) %{
19595   predicate((UseAVX > 0) &&
19596             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19597   match(Set dst (AddVI src (LoadVector mem)));
19598   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
19599   ins_encode %{
19600     int vlen_enc = vector_length_encoding(this);
19601     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19602   %}
19603   ins_pipe( pipe_slow );
19604 %}
19605 
19606 // Longs vector add
19607 instruct vaddL(vec dst, vec src) %{
19608   predicate(UseAVX == 0);
19609   match(Set dst (AddVL dst src));
19610   format %{ "paddq   $dst,$src\t! add packedL" %}
19611   ins_encode %{
19612     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
19613   %}
19614   ins_pipe( pipe_slow );
19615 %}
19616 
19617 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
19618   predicate(UseAVX > 0);
19619   match(Set dst (AddVL src1 src2));
19620   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
19621   ins_encode %{
19622     int vlen_enc = vector_length_encoding(this);
19623     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19624   %}
19625   ins_pipe( pipe_slow );
19626 %}
19627 
19628 instruct vaddL_mem(vec dst, vec src, memory mem) %{
19629   predicate((UseAVX > 0) &&
19630             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19631   match(Set dst (AddVL src (LoadVector mem)));
19632   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
19633   ins_encode %{
19634     int vlen_enc = vector_length_encoding(this);
19635     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19636   %}
19637   ins_pipe( pipe_slow );
19638 %}
19639 
19640 // Floats vector add
19641 instruct vaddF(vec dst, vec src) %{
19642   predicate(UseAVX == 0);
19643   match(Set dst (AddVF dst src));
19644   format %{ "addps   $dst,$src\t! add packedF" %}
19645   ins_encode %{
19646     __ addps($dst$$XMMRegister, $src$$XMMRegister);
19647   %}
19648   ins_pipe( pipe_slow );
19649 %}
19650 
19651 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
19652   predicate(UseAVX > 0);
19653   match(Set dst (AddVF src1 src2));
19654   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
19655   ins_encode %{
19656     int vlen_enc = vector_length_encoding(this);
19657     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19658   %}
19659   ins_pipe( pipe_slow );
19660 %}
19661 
19662 instruct vaddF_mem(vec dst, vec src, memory mem) %{
19663   predicate((UseAVX > 0) &&
19664             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19665   match(Set dst (AddVF src (LoadVector mem)));
19666   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
19667   ins_encode %{
19668     int vlen_enc = vector_length_encoding(this);
19669     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19670   %}
19671   ins_pipe( pipe_slow );
19672 %}
19673 
19674 // Doubles vector add
19675 instruct vaddD(vec dst, vec src) %{
19676   predicate(UseAVX == 0);
19677   match(Set dst (AddVD dst src));
19678   format %{ "addpd   $dst,$src\t! add packedD" %}
19679   ins_encode %{
19680     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
19681   %}
19682   ins_pipe( pipe_slow );
19683 %}
19684 
19685 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
19686   predicate(UseAVX > 0);
19687   match(Set dst (AddVD src1 src2));
19688   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
19689   ins_encode %{
19690     int vlen_enc = vector_length_encoding(this);
19691     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19692   %}
19693   ins_pipe( pipe_slow );
19694 %}
19695 
19696 instruct vaddD_mem(vec dst, vec src, memory mem) %{
19697   predicate((UseAVX > 0) &&
19698             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19699   match(Set dst (AddVD src (LoadVector mem)));
19700   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
19701   ins_encode %{
19702     int vlen_enc = vector_length_encoding(this);
19703     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19704   %}
19705   ins_pipe( pipe_slow );
19706 %}
19707 
19708 // --------------------------------- SUB --------------------------------------
19709 
19710 // Bytes vector sub
19711 instruct vsubB(vec dst, vec src) %{
19712   predicate(UseAVX == 0);
19713   match(Set dst (SubVB dst src));
19714   format %{ "psubb   $dst,$src\t! sub packedB" %}
19715   ins_encode %{
19716     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
19717   %}
19718   ins_pipe( pipe_slow );
19719 %}
19720 
19721 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
19722   predicate(UseAVX > 0);
19723   match(Set dst (SubVB src1 src2));
19724   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
19725   ins_encode %{
19726     int vlen_enc = vector_length_encoding(this);
19727     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19728   %}
19729   ins_pipe( pipe_slow );
19730 %}
19731 
19732 instruct vsubB_mem(vec dst, vec src, memory mem) %{
19733   predicate((UseAVX > 0) &&
19734             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19735   match(Set dst (SubVB src (LoadVector mem)));
19736   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
19737   ins_encode %{
19738     int vlen_enc = vector_length_encoding(this);
19739     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19740   %}
19741   ins_pipe( pipe_slow );
19742 %}
19743 
19744 // Shorts/Chars vector sub
19745 instruct vsubS(vec dst, vec src) %{
19746   predicate(UseAVX == 0);
19747   match(Set dst (SubVS dst src));
19748   format %{ "psubw   $dst,$src\t! sub packedS" %}
19749   ins_encode %{
19750     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
19751   %}
19752   ins_pipe( pipe_slow );
19753 %}
19754 
19755 
19756 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
19757   predicate(UseAVX > 0);
19758   match(Set dst (SubVS src1 src2));
19759   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
19760   ins_encode %{
19761     int vlen_enc = vector_length_encoding(this);
19762     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19763   %}
19764   ins_pipe( pipe_slow );
19765 %}
19766 
19767 instruct vsubS_mem(vec dst, vec src, memory mem) %{
19768   predicate((UseAVX > 0) &&
19769             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19770   match(Set dst (SubVS src (LoadVector mem)));
19771   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
19772   ins_encode %{
19773     int vlen_enc = vector_length_encoding(this);
19774     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19775   %}
19776   ins_pipe( pipe_slow );
19777 %}
19778 
19779 // Integers vector sub
19780 instruct vsubI(vec dst, vec src) %{
19781   predicate(UseAVX == 0);
19782   match(Set dst (SubVI dst src));
19783   format %{ "psubd   $dst,$src\t! sub packedI" %}
19784   ins_encode %{
19785     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
19786   %}
19787   ins_pipe( pipe_slow );
19788 %}
19789 
19790 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
19791   predicate(UseAVX > 0);
19792   match(Set dst (SubVI src1 src2));
19793   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
19794   ins_encode %{
19795     int vlen_enc = vector_length_encoding(this);
19796     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19797   %}
19798   ins_pipe( pipe_slow );
19799 %}
19800 
19801 instruct vsubI_mem(vec dst, vec src, memory mem) %{
19802   predicate((UseAVX > 0) &&
19803             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19804   match(Set dst (SubVI src (LoadVector mem)));
19805   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
19806   ins_encode %{
19807     int vlen_enc = vector_length_encoding(this);
19808     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19809   %}
19810   ins_pipe( pipe_slow );
19811 %}
19812 
19813 // Longs vector sub
19814 instruct vsubL(vec dst, vec src) %{
19815   predicate(UseAVX == 0);
19816   match(Set dst (SubVL dst src));
19817   format %{ "psubq   $dst,$src\t! sub packedL" %}
19818   ins_encode %{
19819     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
19820   %}
19821   ins_pipe( pipe_slow );
19822 %}
19823 
19824 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
19825   predicate(UseAVX > 0);
19826   match(Set dst (SubVL src1 src2));
19827   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
19828   ins_encode %{
19829     int vlen_enc = vector_length_encoding(this);
19830     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19831   %}
19832   ins_pipe( pipe_slow );
19833 %}
19834 
19835 
19836 instruct vsubL_mem(vec dst, vec src, memory mem) %{
19837   predicate((UseAVX > 0) &&
19838             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19839   match(Set dst (SubVL src (LoadVector mem)));
19840   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
19841   ins_encode %{
19842     int vlen_enc = vector_length_encoding(this);
19843     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19844   %}
19845   ins_pipe( pipe_slow );
19846 %}
19847 
19848 // Floats vector sub
19849 instruct vsubF(vec dst, vec src) %{
19850   predicate(UseAVX == 0);
19851   match(Set dst (SubVF dst src));
19852   format %{ "subps   $dst,$src\t! sub packedF" %}
19853   ins_encode %{
19854     __ subps($dst$$XMMRegister, $src$$XMMRegister);
19855   %}
19856   ins_pipe( pipe_slow );
19857 %}
19858 
19859 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
19860   predicate(UseAVX > 0);
19861   match(Set dst (SubVF src1 src2));
19862   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
19863   ins_encode %{
19864     int vlen_enc = vector_length_encoding(this);
19865     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19866   %}
19867   ins_pipe( pipe_slow );
19868 %}
19869 
19870 instruct vsubF_mem(vec dst, vec src, memory mem) %{
19871   predicate((UseAVX > 0) &&
19872             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19873   match(Set dst (SubVF src (LoadVector mem)));
19874   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
19875   ins_encode %{
19876     int vlen_enc = vector_length_encoding(this);
19877     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19878   %}
19879   ins_pipe( pipe_slow );
19880 %}
19881 
19882 // Doubles vector sub
19883 instruct vsubD(vec dst, vec src) %{
19884   predicate(UseAVX == 0);
19885   match(Set dst (SubVD dst src));
19886   format %{ "subpd   $dst,$src\t! sub packedD" %}
19887   ins_encode %{
19888     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
19889   %}
19890   ins_pipe( pipe_slow );
19891 %}
19892 
19893 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
19894   predicate(UseAVX > 0);
19895   match(Set dst (SubVD src1 src2));
19896   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
19897   ins_encode %{
19898     int vlen_enc = vector_length_encoding(this);
19899     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19900   %}
19901   ins_pipe( pipe_slow );
19902 %}
19903 
19904 instruct vsubD_mem(vec dst, vec src, memory mem) %{
19905   predicate((UseAVX > 0) &&
19906             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19907   match(Set dst (SubVD src (LoadVector mem)));
19908   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
19909   ins_encode %{
19910     int vlen_enc = vector_length_encoding(this);
19911     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19912   %}
19913   ins_pipe( pipe_slow );
19914 %}
19915 
19916 // --------------------------------- MUL --------------------------------------
19917 
19918 // Byte vector mul
19919 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
19920   predicate(Matcher::vector_length_in_bytes(n) <= 8);
19921   match(Set dst (MulVB src1 src2));
19922   effect(TEMP dst, TEMP xtmp);
19923   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
19924   ins_encode %{
19925     assert(UseSSE > 3, "required");
19926     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
19927     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
19928     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
19929     __ psllw($dst$$XMMRegister, 8);
19930     __ psrlw($dst$$XMMRegister, 8);
19931     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
19932   %}
19933   ins_pipe( pipe_slow );
19934 %}
19935 
19936 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
19937   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
19938   match(Set dst (MulVB src1 src2));
19939   effect(TEMP dst, TEMP xtmp);
19940   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
19941   ins_encode %{
19942     assert(UseSSE > 3, "required");
19943     // Odd-index elements
19944     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
19945     __ psrlw($dst$$XMMRegister, 8);
19946     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
19947     __ psrlw($xtmp$$XMMRegister, 8);
19948     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
19949     __ psllw($dst$$XMMRegister, 8);
19950     // Even-index elements
19951     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
19952     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
19953     __ psllw($xtmp$$XMMRegister, 8);
19954     __ psrlw($xtmp$$XMMRegister, 8);
19955     // Combine
19956     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
19957   %}
19958   ins_pipe( pipe_slow );
19959 %}
19960 
19961 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
19962   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
19963   match(Set dst (MulVB src1 src2));
19964   effect(TEMP xtmp1, TEMP xtmp2);
19965   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
19966   ins_encode %{
19967     int vlen_enc = vector_length_encoding(this);
19968     // Odd-index elements
19969     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
19970     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
19971     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
19972     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
19973     // Even-index elements
19974     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19975     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
19976     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
19977     // Combine
19978     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
19979   %}
19980   ins_pipe( pipe_slow );
19981 %}
19982 
19983 // Shorts/Chars vector mul
19984 instruct vmulS(vec dst, vec src) %{
19985   predicate(UseAVX == 0);
19986   match(Set dst (MulVS dst src));
19987   format %{ "pmullw  $dst,$src\t! mul packedS" %}
19988   ins_encode %{
19989     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
19990   %}
19991   ins_pipe( pipe_slow );
19992 %}
19993 
19994 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
19995   predicate(UseAVX > 0);
19996   match(Set dst (MulVS src1 src2));
19997   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
19998   ins_encode %{
19999     int vlen_enc = vector_length_encoding(this);
20000     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20001   %}
20002   ins_pipe( pipe_slow );
20003 %}
20004 
20005 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20006   predicate((UseAVX > 0) &&
20007             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20008   match(Set dst (MulVS src (LoadVector mem)));
20009   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20010   ins_encode %{
20011     int vlen_enc = vector_length_encoding(this);
20012     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20013   %}
20014   ins_pipe( pipe_slow );
20015 %}
20016 
20017 // Integers vector mul
20018 instruct vmulI(vec dst, vec src) %{
20019   predicate(UseAVX == 0);
20020   match(Set dst (MulVI dst src));
20021   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20022   ins_encode %{
20023     assert(UseSSE > 3, "required");
20024     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20025   %}
20026   ins_pipe( pipe_slow );
20027 %}
20028 
20029 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20030   predicate(UseAVX > 0);
20031   match(Set dst (MulVI src1 src2));
20032   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20033   ins_encode %{
20034     int vlen_enc = vector_length_encoding(this);
20035     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20036   %}
20037   ins_pipe( pipe_slow );
20038 %}
20039 
20040 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20041   predicate((UseAVX > 0) &&
20042             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20043   match(Set dst (MulVI src (LoadVector mem)));
20044   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20045   ins_encode %{
20046     int vlen_enc = vector_length_encoding(this);
20047     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20048   %}
20049   ins_pipe( pipe_slow );
20050 %}
20051 
20052 // Longs vector mul
20053 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20054   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20055              VM_Version::supports_avx512dq()) ||
20056             VM_Version::supports_avx512vldq());
20057   match(Set dst (MulVL src1 src2));
20058   ins_cost(500);
20059   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20060   ins_encode %{
20061     assert(UseAVX > 2, "required");
20062     int vlen_enc = vector_length_encoding(this);
20063     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20064   %}
20065   ins_pipe( pipe_slow );
20066 %}
20067 
20068 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20069   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20070              VM_Version::supports_avx512dq()) ||
20071             (Matcher::vector_length_in_bytes(n) > 8 &&
20072              VM_Version::supports_avx512vldq()));
20073   match(Set dst (MulVL src (LoadVector mem)));
20074   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20075   ins_cost(500);
20076   ins_encode %{
20077     assert(UseAVX > 2, "required");
20078     int vlen_enc = vector_length_encoding(this);
20079     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20080   %}
20081   ins_pipe( pipe_slow );
20082 %}
20083 
20084 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20085   predicate(UseAVX == 0);
20086   match(Set dst (MulVL src1 src2));
20087   ins_cost(500);
20088   effect(TEMP dst, TEMP xtmp);
20089   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20090   ins_encode %{
20091     assert(VM_Version::supports_sse4_1(), "required");
20092     // Get the lo-hi products, only the lower 32 bits is in concerns
20093     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20094     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20095     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20096     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20097     __ psllq($dst$$XMMRegister, 32);
20098     // Get the lo-lo products
20099     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20100     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20101     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20102   %}
20103   ins_pipe( pipe_slow );
20104 %}
20105 
20106 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20107   predicate(UseAVX > 0 &&
20108             ((Matcher::vector_length_in_bytes(n) == 64 &&
20109               !VM_Version::supports_avx512dq()) ||
20110              (Matcher::vector_length_in_bytes(n) < 64 &&
20111               !VM_Version::supports_avx512vldq())));
20112   match(Set dst (MulVL src1 src2));
20113   effect(TEMP xtmp1, TEMP xtmp2);
20114   ins_cost(500);
20115   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20116   ins_encode %{
20117     int vlen_enc = vector_length_encoding(this);
20118     // Get the lo-hi products, only the lower 32 bits is in concerns
20119     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20120     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20121     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20122     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20123     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20124     // Get the lo-lo products
20125     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20126     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20127   %}
20128   ins_pipe( pipe_slow );
20129 %}
20130 
20131 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20132   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20133   match(Set dst (MulVL src1 src2));
20134   ins_cost(100);
20135   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20136   ins_encode %{
20137     int vlen_enc = vector_length_encoding(this);
20138     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20139   %}
20140   ins_pipe( pipe_slow );
20141 %}
20142 
20143 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20144   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20145   match(Set dst (MulVL src1 src2));
20146   ins_cost(100);
20147   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20148   ins_encode %{
20149     int vlen_enc = vector_length_encoding(this);
20150     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20151   %}
20152   ins_pipe( pipe_slow );
20153 %}
20154 
20155 // Floats vector mul
20156 instruct vmulF(vec dst, vec src) %{
20157   predicate(UseAVX == 0);
20158   match(Set dst (MulVF dst src));
20159   format %{ "mulps   $dst,$src\t! mul packedF" %}
20160   ins_encode %{
20161     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20162   %}
20163   ins_pipe( pipe_slow );
20164 %}
20165 
20166 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20167   predicate(UseAVX > 0);
20168   match(Set dst (MulVF src1 src2));
20169   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
20170   ins_encode %{
20171     int vlen_enc = vector_length_encoding(this);
20172     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20173   %}
20174   ins_pipe( pipe_slow );
20175 %}
20176 
20177 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20178   predicate((UseAVX > 0) &&
20179             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20180   match(Set dst (MulVF src (LoadVector mem)));
20181   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
20182   ins_encode %{
20183     int vlen_enc = vector_length_encoding(this);
20184     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20185   %}
20186   ins_pipe( pipe_slow );
20187 %}
20188 
20189 // Doubles vector mul
20190 instruct vmulD(vec dst, vec src) %{
20191   predicate(UseAVX == 0);
20192   match(Set dst (MulVD dst src));
20193   format %{ "mulpd   $dst,$src\t! mul packedD" %}
20194   ins_encode %{
20195     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20196   %}
20197   ins_pipe( pipe_slow );
20198 %}
20199 
20200 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20201   predicate(UseAVX > 0);
20202   match(Set dst (MulVD src1 src2));
20203   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
20204   ins_encode %{
20205     int vlen_enc = vector_length_encoding(this);
20206     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20207   %}
20208   ins_pipe( pipe_slow );
20209 %}
20210 
20211 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20212   predicate((UseAVX > 0) &&
20213             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20214   match(Set dst (MulVD src (LoadVector mem)));
20215   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
20216   ins_encode %{
20217     int vlen_enc = vector_length_encoding(this);
20218     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20219   %}
20220   ins_pipe( pipe_slow );
20221 %}
20222 
20223 // --------------------------------- DIV --------------------------------------
20224 
20225 // Floats vector div
20226 instruct vdivF(vec dst, vec src) %{
20227   predicate(UseAVX == 0);
20228   match(Set dst (DivVF dst src));
20229   format %{ "divps   $dst,$src\t! div packedF" %}
20230   ins_encode %{
20231     __ divps($dst$$XMMRegister, $src$$XMMRegister);
20232   %}
20233   ins_pipe( pipe_slow );
20234 %}
20235 
20236 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20237   predicate(UseAVX > 0);
20238   match(Set dst (DivVF src1 src2));
20239   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
20240   ins_encode %{
20241     int vlen_enc = vector_length_encoding(this);
20242     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20243   %}
20244   ins_pipe( pipe_slow );
20245 %}
20246 
20247 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20248   predicate((UseAVX > 0) &&
20249             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20250   match(Set dst (DivVF src (LoadVector mem)));
20251   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
20252   ins_encode %{
20253     int vlen_enc = vector_length_encoding(this);
20254     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20255   %}
20256   ins_pipe( pipe_slow );
20257 %}
20258 
20259 // Doubles vector div
20260 instruct vdivD(vec dst, vec src) %{
20261   predicate(UseAVX == 0);
20262   match(Set dst (DivVD dst src));
20263   format %{ "divpd   $dst,$src\t! div packedD" %}
20264   ins_encode %{
20265     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20266   %}
20267   ins_pipe( pipe_slow );
20268 %}
20269 
20270 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20271   predicate(UseAVX > 0);
20272   match(Set dst (DivVD src1 src2));
20273   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
20274   ins_encode %{
20275     int vlen_enc = vector_length_encoding(this);
20276     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20277   %}
20278   ins_pipe( pipe_slow );
20279 %}
20280 
20281 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20282   predicate((UseAVX > 0) &&
20283             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20284   match(Set dst (DivVD src (LoadVector mem)));
20285   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
20286   ins_encode %{
20287     int vlen_enc = vector_length_encoding(this);
20288     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20289   %}
20290   ins_pipe( pipe_slow );
20291 %}
20292 
20293 // ------------------------------ MinMax ---------------------------------------
20294 
20295 // Byte, Short, Int vector Min/Max
20296 instruct minmax_reg_sse(vec dst, vec src) %{
20297   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20298             UseAVX == 0);
20299   match(Set dst (MinV dst src));
20300   match(Set dst (MaxV dst src));
20301   format %{ "vector_minmax  $dst,$src\t!  " %}
20302   ins_encode %{
20303     assert(UseSSE >= 4, "required");
20304 
20305     int opcode = this->ideal_Opcode();
20306     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20307     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20308   %}
20309   ins_pipe( pipe_slow );
20310 %}
20311 
20312 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20313   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20314             UseAVX > 0);
20315   match(Set dst (MinV src1 src2));
20316   match(Set dst (MaxV src1 src2));
20317   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
20318   ins_encode %{
20319     int opcode = this->ideal_Opcode();
20320     int vlen_enc = vector_length_encoding(this);
20321     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20322 
20323     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20324   %}
20325   ins_pipe( pipe_slow );
20326 %}
20327 
20328 // Long vector Min/Max
20329 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20330   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20331             UseAVX == 0);
20332   match(Set dst (MinV dst src));
20333   match(Set dst (MaxV src dst));
20334   effect(TEMP dst, TEMP tmp);
20335   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
20336   ins_encode %{
20337     assert(UseSSE >= 4, "required");
20338 
20339     int opcode = this->ideal_Opcode();
20340     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20341     assert(elem_bt == T_LONG, "sanity");
20342 
20343     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20344   %}
20345   ins_pipe( pipe_slow );
20346 %}
20347 
20348 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20349   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20350             UseAVX > 0 && !VM_Version::supports_avx512vl());
20351   match(Set dst (MinV src1 src2));
20352   match(Set dst (MaxV src1 src2));
20353   effect(TEMP dst);
20354   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
20355   ins_encode %{
20356     int vlen_enc = vector_length_encoding(this);
20357     int opcode = this->ideal_Opcode();
20358     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20359     assert(elem_bt == T_LONG, "sanity");
20360 
20361     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20362   %}
20363   ins_pipe( pipe_slow );
20364 %}
20365 
20366 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20367   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20368             Matcher::vector_element_basic_type(n) == T_LONG);
20369   match(Set dst (MinV src1 src2));
20370   match(Set dst (MaxV src1 src2));
20371   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
20372   ins_encode %{
20373     assert(UseAVX > 2, "required");
20374 
20375     int vlen_enc = vector_length_encoding(this);
20376     int opcode = this->ideal_Opcode();
20377     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20378     assert(elem_bt == T_LONG, "sanity");
20379 
20380     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20381   %}
20382   ins_pipe( pipe_slow );
20383 %}
20384 
20385 // Float/Double vector Min/Max
20386 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20387   predicate(VM_Version::supports_avx10_2() &&
20388             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20389   match(Set dst (MinV a b));
20390   match(Set dst (MaxV a b));
20391   format %{ "vector_minmaxFP  $dst, $a, $b" %}
20392   ins_encode %{
20393     int vlen_enc = vector_length_encoding(this);
20394     int opcode = this->ideal_Opcode();
20395     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20396     __ vminmax_fp_avx10_2(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20397   %}
20398   ins_pipe( pipe_slow );
20399 %}
20400 
20401 // Float/Double vector Min/Max
20402 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20403   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20404             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20405             UseAVX > 0);
20406   match(Set dst (MinV a b));
20407   match(Set dst (MaxV a b));
20408   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20409   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20410   ins_encode %{
20411     assert(UseAVX > 0, "required");
20412 
20413     int opcode = this->ideal_Opcode();
20414     int vlen_enc = vector_length_encoding(this);
20415     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20416 
20417     __ vminmax_fp(opcode, elem_bt,
20418                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20419                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20420   %}
20421   ins_pipe( pipe_slow );
20422 %}
20423 
20424 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20425   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20426             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20427   match(Set dst (MinV a b));
20428   match(Set dst (MaxV a b));
20429   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20430   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20431   ins_encode %{
20432     assert(UseAVX > 2, "required");
20433 
20434     int opcode = this->ideal_Opcode();
20435     int vlen_enc = vector_length_encoding(this);
20436     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20437 
20438     __ evminmax_fp(opcode, elem_bt,
20439                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20440                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20441   %}
20442   ins_pipe( pipe_slow );
20443 %}
20444 
20445 // ------------------------------ Unsigned vector Min/Max ----------------------
20446 
20447 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
20448   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20449   match(Set dst (UMinV a b));
20450   match(Set dst (UMaxV a b));
20451   format %{ "vector_uminmax $dst,$a,$b\t!" %}
20452   ins_encode %{
20453     int opcode = this->ideal_Opcode();
20454     int vlen_enc = vector_length_encoding(this);
20455     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20456     assert(is_integral_type(elem_bt), "");
20457     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20458   %}
20459   ins_pipe( pipe_slow );
20460 %}
20461 
20462 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
20463   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20464   match(Set dst (UMinV a (LoadVector b)));
20465   match(Set dst (UMaxV a (LoadVector b)));
20466   format %{ "vector_uminmax $dst,$a,$b\t!" %}
20467   ins_encode %{
20468     int opcode = this->ideal_Opcode();
20469     int vlen_enc = vector_length_encoding(this);
20470     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20471     assert(is_integral_type(elem_bt), "");
20472     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
20473   %}
20474   ins_pipe( pipe_slow );
20475 %}
20476 
20477 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
20478   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
20479   match(Set dst (UMinV a b));
20480   match(Set dst (UMaxV a b));
20481   effect(TEMP xtmp1, TEMP xtmp2);
20482   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
20483   ins_encode %{
20484     int opcode = this->ideal_Opcode();
20485     int vlen_enc = vector_length_encoding(this);
20486     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20487   %}
20488   ins_pipe( pipe_slow );
20489 %}
20490 
20491 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
20492   match(Set dst (UMinV (Binary dst src2) mask));
20493   match(Set dst (UMaxV (Binary dst src2) mask));
20494   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20495   ins_encode %{
20496     int vlen_enc = vector_length_encoding(this);
20497     BasicType bt = Matcher::vector_element_basic_type(this);
20498     int opc = this->ideal_Opcode();
20499     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20500                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
20501   %}
20502   ins_pipe( pipe_slow );
20503 %}
20504 
20505 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
20506   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
20507   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
20508   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20509   ins_encode %{
20510     int vlen_enc = vector_length_encoding(this);
20511     BasicType bt = Matcher::vector_element_basic_type(this);
20512     int opc = this->ideal_Opcode();
20513     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20514                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
20515   %}
20516   ins_pipe( pipe_slow );
20517 %}
20518 
20519 // --------------------------------- Signum/CopySign ---------------------------
20520 
20521 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
20522   match(Set dst (SignumF dst (Binary zero one)));
20523   effect(KILL cr);
20524   format %{ "signumF $dst, $dst" %}
20525   ins_encode %{
20526     int opcode = this->ideal_Opcode();
20527     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20528   %}
20529   ins_pipe( pipe_slow );
20530 %}
20531 
20532 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
20533   match(Set dst (SignumD dst (Binary zero one)));
20534   effect(KILL cr);
20535   format %{ "signumD $dst, $dst" %}
20536   ins_encode %{
20537     int opcode = this->ideal_Opcode();
20538     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20539   %}
20540   ins_pipe( pipe_slow );
20541 %}
20542 
20543 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
20544   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
20545   match(Set dst (SignumVF src (Binary zero one)));
20546   match(Set dst (SignumVD src (Binary zero one)));
20547   effect(TEMP dst, TEMP xtmp1);
20548   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
20549   ins_encode %{
20550     int opcode = this->ideal_Opcode();
20551     int vec_enc = vector_length_encoding(this);
20552     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20553                          $xtmp1$$XMMRegister, vec_enc);
20554   %}
20555   ins_pipe( pipe_slow );
20556 %}
20557 
20558 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
20559   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
20560   match(Set dst (SignumVF src (Binary zero one)));
20561   match(Set dst (SignumVD src (Binary zero one)));
20562   effect(TEMP dst, TEMP ktmp1);
20563   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
20564   ins_encode %{
20565     int opcode = this->ideal_Opcode();
20566     int vec_enc = vector_length_encoding(this);
20567     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20568                           $ktmp1$$KRegister, vec_enc);
20569   %}
20570   ins_pipe( pipe_slow );
20571 %}
20572 
20573 // ---------------------------------------
20574 // For copySign use 0xE4 as writemask for vpternlog
20575 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
20576 // C (xmm2) is set to 0x7FFFFFFF
20577 // Wherever xmm2 is 0, we want to pick from B (sign)
20578 // Wherever xmm2 is 1, we want to pick from A (src)
20579 //
20580 // A B C Result
20581 // 0 0 0 0
20582 // 0 0 1 0
20583 // 0 1 0 1
20584 // 0 1 1 0
20585 // 1 0 0 0
20586 // 1 0 1 1
20587 // 1 1 0 1
20588 // 1 1 1 1
20589 //
20590 // Result going from high bit to low bit is 0x11100100 = 0xe4
20591 // ---------------------------------------
20592 
20593 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
20594   match(Set dst (CopySignF dst src));
20595   effect(TEMP tmp1, TEMP tmp2);
20596   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20597   ins_encode %{
20598     __ movl($tmp2$$Register, 0x7FFFFFFF);
20599     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
20600     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20601   %}
20602   ins_pipe( pipe_slow );
20603 %}
20604 
20605 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
20606   match(Set dst (CopySignD dst (Binary src zero)));
20607   ins_cost(100);
20608   effect(TEMP tmp1, TEMP tmp2);
20609   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20610   ins_encode %{
20611     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
20612     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
20613     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20614   %}
20615   ins_pipe( pipe_slow );
20616 %}
20617 
20618 //----------------------------- CompressBits/ExpandBits ------------------------
20619 
20620 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20621   predicate(n->bottom_type()->isa_int());
20622   match(Set dst (CompressBits src mask));
20623   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
20624   ins_encode %{
20625     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
20626   %}
20627   ins_pipe( pipe_slow );
20628 %}
20629 
20630 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20631   predicate(n->bottom_type()->isa_int());
20632   match(Set dst (ExpandBits src mask));
20633   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
20634   ins_encode %{
20635     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
20636   %}
20637   ins_pipe( pipe_slow );
20638 %}
20639 
20640 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20641   predicate(n->bottom_type()->isa_int());
20642   match(Set dst (CompressBits src (LoadI mask)));
20643   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
20644   ins_encode %{
20645     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
20646   %}
20647   ins_pipe( pipe_slow );
20648 %}
20649 
20650 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20651   predicate(n->bottom_type()->isa_int());
20652   match(Set dst (ExpandBits src (LoadI mask)));
20653   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
20654   ins_encode %{
20655     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
20656   %}
20657   ins_pipe( pipe_slow );
20658 %}
20659 
20660 // --------------------------------- Sqrt --------------------------------------
20661 
20662 instruct vsqrtF_reg(vec dst, vec src) %{
20663   match(Set dst (SqrtVF src));
20664   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
20665   ins_encode %{
20666     assert(UseAVX > 0, "required");
20667     int vlen_enc = vector_length_encoding(this);
20668     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20669   %}
20670   ins_pipe( pipe_slow );
20671 %}
20672 
20673 instruct vsqrtF_mem(vec dst, memory mem) %{
20674   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
20675   match(Set dst (SqrtVF (LoadVector mem)));
20676   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
20677   ins_encode %{
20678     assert(UseAVX > 0, "required");
20679     int vlen_enc = vector_length_encoding(this);
20680     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
20681   %}
20682   ins_pipe( pipe_slow );
20683 %}
20684 
20685 // Floating point vector sqrt
20686 instruct vsqrtD_reg(vec dst, vec src) %{
20687   match(Set dst (SqrtVD src));
20688   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
20689   ins_encode %{
20690     assert(UseAVX > 0, "required");
20691     int vlen_enc = vector_length_encoding(this);
20692     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20693   %}
20694   ins_pipe( pipe_slow );
20695 %}
20696 
20697 instruct vsqrtD_mem(vec dst, memory mem) %{
20698   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
20699   match(Set dst (SqrtVD (LoadVector mem)));
20700   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
20701   ins_encode %{
20702     assert(UseAVX > 0, "required");
20703     int vlen_enc = vector_length_encoding(this);
20704     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
20705   %}
20706   ins_pipe( pipe_slow );
20707 %}
20708 
20709 // ------------------------------ Shift ---------------------------------------
20710 
20711 // Left and right shift count vectors are the same on x86
20712 // (only lowest bits of xmm reg are used for count).
20713 instruct vshiftcnt(vec dst, rRegI cnt) %{
20714   match(Set dst (LShiftCntV cnt));
20715   match(Set dst (RShiftCntV cnt));
20716   format %{ "movdl    $dst,$cnt\t! load shift count" %}
20717   ins_encode %{
20718     __ movdl($dst$$XMMRegister, $cnt$$Register);
20719   %}
20720   ins_pipe( pipe_slow );
20721 %}
20722 
20723 // Byte vector shift
20724 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
20725   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
20726   match(Set dst ( LShiftVB src shift));
20727   match(Set dst ( RShiftVB src shift));
20728   match(Set dst (URShiftVB src shift));
20729   effect(TEMP dst, USE src, USE shift, TEMP tmp);
20730   format %{"vector_byte_shift $dst,$src,$shift" %}
20731   ins_encode %{
20732     assert(UseSSE > 3, "required");
20733     int opcode = this->ideal_Opcode();
20734     bool sign = (opcode != Op_URShiftVB);
20735     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
20736     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
20737     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
20738     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
20739     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20740   %}
20741   ins_pipe( pipe_slow );
20742 %}
20743 
20744 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
20745   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
20746             UseAVX <= 1);
20747   match(Set dst ( LShiftVB src shift));
20748   match(Set dst ( RShiftVB src shift));
20749   match(Set dst (URShiftVB src shift));
20750   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
20751   format %{"vector_byte_shift $dst,$src,$shift" %}
20752   ins_encode %{
20753     assert(UseSSE > 3, "required");
20754     int opcode = this->ideal_Opcode();
20755     bool sign = (opcode != Op_URShiftVB);
20756     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
20757     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
20758     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
20759     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
20760     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
20761     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
20762     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
20763     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
20764     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
20765   %}
20766   ins_pipe( pipe_slow );
20767 %}
20768 
20769 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
20770   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
20771             UseAVX > 1);
20772   match(Set dst ( LShiftVB src shift));
20773   match(Set dst ( RShiftVB src shift));
20774   match(Set dst (URShiftVB src shift));
20775   effect(TEMP dst, TEMP tmp);
20776   format %{"vector_byte_shift $dst,$src,$shift" %}
20777   ins_encode %{
20778     int opcode = this->ideal_Opcode();
20779     bool sign = (opcode != Op_URShiftVB);
20780     int vlen_enc = Assembler::AVX_256bit;
20781     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
20782     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20783     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
20784     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
20785     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
20786   %}
20787   ins_pipe( pipe_slow );
20788 %}
20789 
20790 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
20791   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
20792   match(Set dst ( LShiftVB src shift));
20793   match(Set dst ( RShiftVB src shift));
20794   match(Set dst (URShiftVB src shift));
20795   effect(TEMP dst, TEMP tmp);
20796   format %{"vector_byte_shift $dst,$src,$shift" %}
20797   ins_encode %{
20798     assert(UseAVX > 1, "required");
20799     int opcode = this->ideal_Opcode();
20800     bool sign = (opcode != Op_URShiftVB);
20801     int vlen_enc = Assembler::AVX_256bit;
20802     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
20803     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
20804     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20805     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20806     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20807     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
20808     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
20809     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
20810     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
20811   %}
20812   ins_pipe( pipe_slow );
20813 %}
20814 
20815 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
20816   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
20817   match(Set dst ( LShiftVB src shift));
20818   match(Set dst  (RShiftVB src shift));
20819   match(Set dst (URShiftVB src shift));
20820   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
20821   format %{"vector_byte_shift $dst,$src,$shift" %}
20822   ins_encode %{
20823     assert(UseAVX > 2, "required");
20824     int opcode = this->ideal_Opcode();
20825     bool sign = (opcode != Op_URShiftVB);
20826     int vlen_enc = Assembler::AVX_512bit;
20827     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
20828     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
20829     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
20830     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20831     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20832     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
20833     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
20834     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
20835     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
20836     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
20837     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
20838     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
20839   %}
20840   ins_pipe( pipe_slow );
20841 %}
20842 
20843 // Shorts vector logical right shift produces incorrect Java result
20844 // for negative data because java code convert short value into int with
20845 // sign extension before a shift. But char vectors are fine since chars are
20846 // unsigned values.
20847 // Shorts/Chars vector left shift
20848 instruct vshiftS(vec dst, vec src, vec shift) %{
20849   predicate(!n->as_ShiftV()->is_var_shift());
20850   match(Set dst ( LShiftVS src shift));
20851   match(Set dst ( RShiftVS src shift));
20852   match(Set dst (URShiftVS src shift));
20853   effect(TEMP dst, USE src, USE shift);
20854   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
20855   ins_encode %{
20856     int opcode = this->ideal_Opcode();
20857     if (UseAVX > 0) {
20858       int vlen_enc = vector_length_encoding(this);
20859       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20860     } else {
20861       int vlen = Matcher::vector_length(this);
20862       if (vlen == 2) {
20863         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
20864         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20865       } else if (vlen == 4) {
20866         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
20867         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20868       } else {
20869         assert (vlen == 8, "sanity");
20870         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20871         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20872       }
20873     }
20874   %}
20875   ins_pipe( pipe_slow );
20876 %}
20877 
20878 // Integers vector left shift
20879 instruct vshiftI(vec dst, vec src, vec shift) %{
20880   predicate(!n->as_ShiftV()->is_var_shift());
20881   match(Set dst ( LShiftVI src shift));
20882   match(Set dst ( RShiftVI src shift));
20883   match(Set dst (URShiftVI src shift));
20884   effect(TEMP dst, USE src, USE shift);
20885   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
20886   ins_encode %{
20887     int opcode = this->ideal_Opcode();
20888     if (UseAVX > 0) {
20889       int vlen_enc = vector_length_encoding(this);
20890       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20891     } else {
20892       int vlen = Matcher::vector_length(this);
20893       if (vlen == 2) {
20894         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
20895         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20896       } else {
20897         assert(vlen == 4, "sanity");
20898         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20899         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20900       }
20901     }
20902   %}
20903   ins_pipe( pipe_slow );
20904 %}
20905 
20906 // Integers vector left constant shift
20907 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
20908   match(Set dst (LShiftVI src (LShiftCntV shift)));
20909   match(Set dst (RShiftVI src (RShiftCntV shift)));
20910   match(Set dst (URShiftVI src (RShiftCntV shift)));
20911   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
20912   ins_encode %{
20913     int opcode = this->ideal_Opcode();
20914     if (UseAVX > 0) {
20915       int vector_len = vector_length_encoding(this);
20916       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
20917     } else {
20918       int vlen = Matcher::vector_length(this);
20919       if (vlen == 2) {
20920         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
20921         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
20922       } else {
20923         assert(vlen == 4, "sanity");
20924         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20925         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
20926       }
20927     }
20928   %}
20929   ins_pipe( pipe_slow );
20930 %}
20931 
20932 // Longs vector shift
20933 instruct vshiftL(vec dst, vec src, vec shift) %{
20934   predicate(!n->as_ShiftV()->is_var_shift());
20935   match(Set dst ( LShiftVL src shift));
20936   match(Set dst (URShiftVL src shift));
20937   effect(TEMP dst, USE src, USE shift);
20938   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
20939   ins_encode %{
20940     int opcode = this->ideal_Opcode();
20941     if (UseAVX > 0) {
20942       int vlen_enc = vector_length_encoding(this);
20943       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20944     } else {
20945       assert(Matcher::vector_length(this) == 2, "");
20946       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20947       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20948     }
20949   %}
20950   ins_pipe( pipe_slow );
20951 %}
20952 
20953 // Longs vector constant shift
20954 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
20955   match(Set dst (LShiftVL src (LShiftCntV shift)));
20956   match(Set dst (URShiftVL src (RShiftCntV shift)));
20957   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
20958   ins_encode %{
20959     int opcode = this->ideal_Opcode();
20960     if (UseAVX > 0) {
20961       int vector_len = vector_length_encoding(this);
20962       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
20963     } else {
20964       assert(Matcher::vector_length(this) == 2, "");
20965       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20966       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
20967     }
20968   %}
20969   ins_pipe( pipe_slow );
20970 %}
20971 
20972 // -------------------ArithmeticRightShift -----------------------------------
20973 // Long vector arithmetic right shift
20974 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
20975   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
20976   match(Set dst (RShiftVL src shift));
20977   effect(TEMP dst, TEMP tmp);
20978   format %{ "vshiftq $dst,$src,$shift" %}
20979   ins_encode %{
20980     uint vlen = Matcher::vector_length(this);
20981     if (vlen == 2) {
20982       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20983       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
20984       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
20985       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
20986       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
20987       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
20988     } else {
20989       assert(vlen == 4, "sanity");
20990       assert(UseAVX > 1, "required");
20991       int vlen_enc = Assembler::AVX_256bit;
20992       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20993       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
20994       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20995       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
20996       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
20997     }
20998   %}
20999   ins_pipe( pipe_slow );
21000 %}
21001 
21002 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21003   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21004   match(Set dst (RShiftVL src shift));
21005   format %{ "vshiftq $dst,$src,$shift" %}
21006   ins_encode %{
21007     int vlen_enc = vector_length_encoding(this);
21008     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21009   %}
21010   ins_pipe( pipe_slow );
21011 %}
21012 
21013 // ------------------- Variable Shift -----------------------------
21014 // Byte variable shift
21015 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21016   predicate(Matcher::vector_length(n) <= 8 &&
21017             n->as_ShiftV()->is_var_shift() &&
21018             !VM_Version::supports_avx512bw());
21019   match(Set dst ( LShiftVB src shift));
21020   match(Set dst ( RShiftVB src shift));
21021   match(Set dst (URShiftVB src shift));
21022   effect(TEMP dst, TEMP vtmp);
21023   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21024   ins_encode %{
21025     assert(UseAVX >= 2, "required");
21026 
21027     int opcode = this->ideal_Opcode();
21028     int vlen_enc = Assembler::AVX_128bit;
21029     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21030     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21031   %}
21032   ins_pipe( pipe_slow );
21033 %}
21034 
21035 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21036   predicate(Matcher::vector_length(n) == 16 &&
21037             n->as_ShiftV()->is_var_shift() &&
21038             !VM_Version::supports_avx512bw());
21039   match(Set dst ( LShiftVB src shift));
21040   match(Set dst ( RShiftVB src shift));
21041   match(Set dst (URShiftVB src shift));
21042   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21043   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21044   ins_encode %{
21045     assert(UseAVX >= 2, "required");
21046 
21047     int opcode = this->ideal_Opcode();
21048     int vlen_enc = Assembler::AVX_128bit;
21049     // Shift lower half and get word result in dst
21050     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21051 
21052     // Shift upper half and get word result in vtmp1
21053     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21054     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21055     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21056 
21057     // Merge and down convert the two word results to byte in dst
21058     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21059   %}
21060   ins_pipe( pipe_slow );
21061 %}
21062 
21063 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21064   predicate(Matcher::vector_length(n) == 32 &&
21065             n->as_ShiftV()->is_var_shift() &&
21066             !VM_Version::supports_avx512bw());
21067   match(Set dst ( LShiftVB src shift));
21068   match(Set dst ( RShiftVB src shift));
21069   match(Set dst (URShiftVB src shift));
21070   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21071   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21072   ins_encode %{
21073     assert(UseAVX >= 2, "required");
21074 
21075     int opcode = this->ideal_Opcode();
21076     int vlen_enc = Assembler::AVX_128bit;
21077     // Process lower 128 bits and get result in dst
21078     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21079     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21080     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21081     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21082     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21083 
21084     // Process higher 128 bits and get result in vtmp3
21085     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21086     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21087     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21088     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21089     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21090     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21091     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21092 
21093     // Merge the two results in dst
21094     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21095   %}
21096   ins_pipe( pipe_slow );
21097 %}
21098 
21099 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21100   predicate(Matcher::vector_length(n) <= 32 &&
21101             n->as_ShiftV()->is_var_shift() &&
21102             VM_Version::supports_avx512bw());
21103   match(Set dst ( LShiftVB src shift));
21104   match(Set dst ( RShiftVB src shift));
21105   match(Set dst (URShiftVB src shift));
21106   effect(TEMP dst, TEMP vtmp);
21107   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21108   ins_encode %{
21109     assert(UseAVX > 2, "required");
21110 
21111     int opcode = this->ideal_Opcode();
21112     int vlen_enc = vector_length_encoding(this);
21113     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21114   %}
21115   ins_pipe( pipe_slow );
21116 %}
21117 
21118 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21119   predicate(Matcher::vector_length(n) == 64 &&
21120             n->as_ShiftV()->is_var_shift() &&
21121             VM_Version::supports_avx512bw());
21122   match(Set dst ( LShiftVB src shift));
21123   match(Set dst ( RShiftVB src shift));
21124   match(Set dst (URShiftVB src shift));
21125   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21126   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21127   ins_encode %{
21128     assert(UseAVX > 2, "required");
21129 
21130     int opcode = this->ideal_Opcode();
21131     int vlen_enc = Assembler::AVX_256bit;
21132     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21133     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21134     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21135     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21136     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21137   %}
21138   ins_pipe( pipe_slow );
21139 %}
21140 
21141 // Short variable shift
21142 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21143   predicate(Matcher::vector_length(n) <= 8 &&
21144             n->as_ShiftV()->is_var_shift() &&
21145             !VM_Version::supports_avx512bw());
21146   match(Set dst ( LShiftVS src shift));
21147   match(Set dst ( RShiftVS src shift));
21148   match(Set dst (URShiftVS src shift));
21149   effect(TEMP dst, TEMP vtmp);
21150   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21151   ins_encode %{
21152     assert(UseAVX >= 2, "required");
21153 
21154     int opcode = this->ideal_Opcode();
21155     bool sign = (opcode != Op_URShiftVS);
21156     int vlen_enc = Assembler::AVX_256bit;
21157     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21158     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21159     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21160     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21161     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21162     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21163   %}
21164   ins_pipe( pipe_slow );
21165 %}
21166 
21167 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21168   predicate(Matcher::vector_length(n) == 16 &&
21169             n->as_ShiftV()->is_var_shift() &&
21170             !VM_Version::supports_avx512bw());
21171   match(Set dst ( LShiftVS src shift));
21172   match(Set dst ( RShiftVS src shift));
21173   match(Set dst (URShiftVS src shift));
21174   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21175   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21176   ins_encode %{
21177     assert(UseAVX >= 2, "required");
21178 
21179     int opcode = this->ideal_Opcode();
21180     bool sign = (opcode != Op_URShiftVS);
21181     int vlen_enc = Assembler::AVX_256bit;
21182     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21183     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21184     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21185     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21186     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21187 
21188     // Shift upper half, with result in dst using vtmp1 as TEMP
21189     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21190     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21191     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21192     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21193     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21194     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21195 
21196     // Merge lower and upper half result into dst
21197     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21198     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21199   %}
21200   ins_pipe( pipe_slow );
21201 %}
21202 
21203 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21204   predicate(n->as_ShiftV()->is_var_shift() &&
21205             VM_Version::supports_avx512bw());
21206   match(Set dst ( LShiftVS src shift));
21207   match(Set dst ( RShiftVS src shift));
21208   match(Set dst (URShiftVS src shift));
21209   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21210   ins_encode %{
21211     assert(UseAVX > 2, "required");
21212 
21213     int opcode = this->ideal_Opcode();
21214     int vlen_enc = vector_length_encoding(this);
21215     if (!VM_Version::supports_avx512vl()) {
21216       vlen_enc = Assembler::AVX_512bit;
21217     }
21218     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21219   %}
21220   ins_pipe( pipe_slow );
21221 %}
21222 
21223 //Integer variable shift
21224 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21225   predicate(n->as_ShiftV()->is_var_shift());
21226   match(Set dst ( LShiftVI src shift));
21227   match(Set dst ( RShiftVI src shift));
21228   match(Set dst (URShiftVI src shift));
21229   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21230   ins_encode %{
21231     assert(UseAVX >= 2, "required");
21232 
21233     int opcode = this->ideal_Opcode();
21234     int vlen_enc = vector_length_encoding(this);
21235     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21236   %}
21237   ins_pipe( pipe_slow );
21238 %}
21239 
21240 //Long variable shift
21241 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21242   predicate(n->as_ShiftV()->is_var_shift());
21243   match(Set dst ( LShiftVL src shift));
21244   match(Set dst (URShiftVL src shift));
21245   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21246   ins_encode %{
21247     assert(UseAVX >= 2, "required");
21248 
21249     int opcode = this->ideal_Opcode();
21250     int vlen_enc = vector_length_encoding(this);
21251     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21252   %}
21253   ins_pipe( pipe_slow );
21254 %}
21255 
21256 //Long variable right shift arithmetic
21257 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21258   predicate(Matcher::vector_length(n) <= 4 &&
21259             n->as_ShiftV()->is_var_shift() &&
21260             UseAVX == 2);
21261   match(Set dst (RShiftVL src shift));
21262   effect(TEMP dst, TEMP vtmp);
21263   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21264   ins_encode %{
21265     int opcode = this->ideal_Opcode();
21266     int vlen_enc = vector_length_encoding(this);
21267     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21268                  $vtmp$$XMMRegister);
21269   %}
21270   ins_pipe( pipe_slow );
21271 %}
21272 
21273 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21274   predicate(n->as_ShiftV()->is_var_shift() &&
21275             UseAVX > 2);
21276   match(Set dst (RShiftVL src shift));
21277   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21278   ins_encode %{
21279     int opcode = this->ideal_Opcode();
21280     int vlen_enc = vector_length_encoding(this);
21281     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21282   %}
21283   ins_pipe( pipe_slow );
21284 %}
21285 
21286 // --------------------------------- AND --------------------------------------
21287 
21288 instruct vand(vec dst, vec src) %{
21289   predicate(UseAVX == 0);
21290   match(Set dst (AndV dst src));
21291   format %{ "pand    $dst,$src\t! and vectors" %}
21292   ins_encode %{
21293     __ pand($dst$$XMMRegister, $src$$XMMRegister);
21294   %}
21295   ins_pipe( pipe_slow );
21296 %}
21297 
21298 instruct vand_reg(vec dst, vec src1, vec src2) %{
21299   predicate(UseAVX > 0);
21300   match(Set dst (AndV src1 src2));
21301   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
21302   ins_encode %{
21303     int vlen_enc = vector_length_encoding(this);
21304     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21305   %}
21306   ins_pipe( pipe_slow );
21307 %}
21308 
21309 instruct vand_mem(vec dst, vec src, memory mem) %{
21310   predicate((UseAVX > 0) &&
21311             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21312   match(Set dst (AndV src (LoadVector mem)));
21313   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
21314   ins_encode %{
21315     int vlen_enc = vector_length_encoding(this);
21316     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21317   %}
21318   ins_pipe( pipe_slow );
21319 %}
21320 
21321 // --------------------------------- OR ---------------------------------------
21322 
21323 instruct vor(vec dst, vec src) %{
21324   predicate(UseAVX == 0);
21325   match(Set dst (OrV dst src));
21326   format %{ "por     $dst,$src\t! or vectors" %}
21327   ins_encode %{
21328     __ por($dst$$XMMRegister, $src$$XMMRegister);
21329   %}
21330   ins_pipe( pipe_slow );
21331 %}
21332 
21333 instruct vor_reg(vec dst, vec src1, vec src2) %{
21334   predicate(UseAVX > 0);
21335   match(Set dst (OrV src1 src2));
21336   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
21337   ins_encode %{
21338     int vlen_enc = vector_length_encoding(this);
21339     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21340   %}
21341   ins_pipe( pipe_slow );
21342 %}
21343 
21344 instruct vor_mem(vec dst, vec src, memory mem) %{
21345   predicate((UseAVX > 0) &&
21346             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21347   match(Set dst (OrV src (LoadVector mem)));
21348   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
21349   ins_encode %{
21350     int vlen_enc = vector_length_encoding(this);
21351     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21352   %}
21353   ins_pipe( pipe_slow );
21354 %}
21355 
21356 // --------------------------------- XOR --------------------------------------
21357 
21358 instruct vxor(vec dst, vec src) %{
21359   predicate(UseAVX == 0);
21360   match(Set dst (XorV dst src));
21361   format %{ "pxor    $dst,$src\t! xor vectors" %}
21362   ins_encode %{
21363     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21364   %}
21365   ins_pipe( pipe_slow );
21366 %}
21367 
21368 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21369   predicate(UseAVX > 0);
21370   match(Set dst (XorV src1 src2));
21371   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
21372   ins_encode %{
21373     int vlen_enc = vector_length_encoding(this);
21374     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21375   %}
21376   ins_pipe( pipe_slow );
21377 %}
21378 
21379 instruct vxor_mem(vec dst, vec src, memory mem) %{
21380   predicate((UseAVX > 0) &&
21381             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21382   match(Set dst (XorV src (LoadVector mem)));
21383   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
21384   ins_encode %{
21385     int vlen_enc = vector_length_encoding(this);
21386     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21387   %}
21388   ins_pipe( pipe_slow );
21389 %}
21390 
21391 // --------------------------------- VectorCast --------------------------------------
21392 
21393 instruct vcastBtoX(vec dst, vec src) %{
21394   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21395   match(Set dst (VectorCastB2X src));
21396   format %{ "vector_cast_b2x $dst,$src\t!" %}
21397   ins_encode %{
21398     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21399     int vlen_enc = vector_length_encoding(this);
21400     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21401   %}
21402   ins_pipe( pipe_slow );
21403 %}
21404 
21405 instruct vcastBtoD(legVec dst, legVec src) %{
21406   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21407   match(Set dst (VectorCastB2X src));
21408   format %{ "vector_cast_b2x $dst,$src\t!" %}
21409   ins_encode %{
21410     int vlen_enc = vector_length_encoding(this);
21411     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21412   %}
21413   ins_pipe( pipe_slow );
21414 %}
21415 
21416 instruct castStoX(vec dst, vec src) %{
21417   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21418             Matcher::vector_length(n->in(1)) <= 8 && // src
21419             Matcher::vector_element_basic_type(n) == T_BYTE);
21420   match(Set dst (VectorCastS2X src));
21421   format %{ "vector_cast_s2x $dst,$src" %}
21422   ins_encode %{
21423     assert(UseAVX > 0, "required");
21424 
21425     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21426     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21427   %}
21428   ins_pipe( pipe_slow );
21429 %}
21430 
21431 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21432   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21433             Matcher::vector_length(n->in(1)) == 16 && // src
21434             Matcher::vector_element_basic_type(n) == T_BYTE);
21435   effect(TEMP dst, TEMP vtmp);
21436   match(Set dst (VectorCastS2X src));
21437   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21438   ins_encode %{
21439     assert(UseAVX > 0, "required");
21440 
21441     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
21442     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21443     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
21444     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21445   %}
21446   ins_pipe( pipe_slow );
21447 %}
21448 
21449 instruct vcastStoX_evex(vec dst, vec src) %{
21450   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
21451             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21452   match(Set dst (VectorCastS2X src));
21453   format %{ "vector_cast_s2x $dst,$src\t!" %}
21454   ins_encode %{
21455     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21456     int src_vlen_enc = vector_length_encoding(this, $src);
21457     int vlen_enc = vector_length_encoding(this);
21458     switch (to_elem_bt) {
21459       case T_BYTE:
21460         if (!VM_Version::supports_avx512vl()) {
21461           vlen_enc = Assembler::AVX_512bit;
21462         }
21463         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21464         break;
21465       case T_INT:
21466         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21467         break;
21468       case T_FLOAT:
21469         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21470         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21471         break;
21472       case T_LONG:
21473         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21474         break;
21475       case T_DOUBLE: {
21476         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
21477         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
21478         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21479         break;
21480       }
21481       default:
21482         ShouldNotReachHere();
21483     }
21484   %}
21485   ins_pipe( pipe_slow );
21486 %}
21487 
21488 instruct castItoX(vec dst, vec src) %{
21489   predicate(UseAVX <= 2 &&
21490             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
21491             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21492   match(Set dst (VectorCastI2X src));
21493   format %{ "vector_cast_i2x $dst,$src" %}
21494   ins_encode %{
21495     assert(UseAVX > 0, "required");
21496 
21497     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21498     int vlen_enc = vector_length_encoding(this, $src);
21499 
21500     if (to_elem_bt == T_BYTE) {
21501       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21502       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21503       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21504     } else {
21505       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21506       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21507       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21508     }
21509   %}
21510   ins_pipe( pipe_slow );
21511 %}
21512 
21513 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
21514   predicate(UseAVX <= 2 &&
21515             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
21516             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21517   match(Set dst (VectorCastI2X src));
21518   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
21519   effect(TEMP dst, TEMP vtmp);
21520   ins_encode %{
21521     assert(UseAVX > 0, "required");
21522 
21523     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21524     int vlen_enc = vector_length_encoding(this, $src);
21525 
21526     if (to_elem_bt == T_BYTE) {
21527       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21528       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21529       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21530       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21531     } else {
21532       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21533       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21534       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21535       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21536     }
21537   %}
21538   ins_pipe( pipe_slow );
21539 %}
21540 
21541 instruct vcastItoX_evex(vec dst, vec src) %{
21542   predicate(UseAVX > 2 ||
21543             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21544   match(Set dst (VectorCastI2X src));
21545   format %{ "vector_cast_i2x $dst,$src\t!" %}
21546   ins_encode %{
21547     assert(UseAVX > 0, "required");
21548 
21549     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
21550     int src_vlen_enc = vector_length_encoding(this, $src);
21551     int dst_vlen_enc = vector_length_encoding(this);
21552     switch (dst_elem_bt) {
21553       case T_BYTE:
21554         if (!VM_Version::supports_avx512vl()) {
21555           src_vlen_enc = Assembler::AVX_512bit;
21556         }
21557         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21558         break;
21559       case T_SHORT:
21560         if (!VM_Version::supports_avx512vl()) {
21561           src_vlen_enc = Assembler::AVX_512bit;
21562         }
21563         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21564         break;
21565       case T_FLOAT:
21566         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21567         break;
21568       case T_LONG:
21569         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21570         break;
21571       case T_DOUBLE:
21572         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21573         break;
21574       default:
21575         ShouldNotReachHere();
21576     }
21577   %}
21578   ins_pipe( pipe_slow );
21579 %}
21580 
21581 instruct vcastLtoBS(vec dst, vec src) %{
21582   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
21583             UseAVX <= 2);
21584   match(Set dst (VectorCastL2X src));
21585   format %{ "vector_cast_l2x  $dst,$src" %}
21586   ins_encode %{
21587     assert(UseAVX > 0, "required");
21588 
21589     int vlen = Matcher::vector_length_in_bytes(this, $src);
21590     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
21591     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
21592                                                       : ExternalAddress(vector_int_to_short_mask());
21593     if (vlen <= 16) {
21594       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
21595       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21596       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21597     } else {
21598       assert(vlen <= 32, "required");
21599       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
21600       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
21601       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21602       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21603     }
21604     if (to_elem_bt == T_BYTE) {
21605       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21606     }
21607   %}
21608   ins_pipe( pipe_slow );
21609 %}
21610 
21611 instruct vcastLtoX_evex(vec dst, vec src) %{
21612   predicate(UseAVX > 2 ||
21613             (Matcher::vector_element_basic_type(n) == T_INT ||
21614              Matcher::vector_element_basic_type(n) == T_FLOAT ||
21615              Matcher::vector_element_basic_type(n) == T_DOUBLE));
21616   match(Set dst (VectorCastL2X src));
21617   format %{ "vector_cast_l2x  $dst,$src\t!" %}
21618   ins_encode %{
21619     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21620     int vlen = Matcher::vector_length_in_bytes(this, $src);
21621     int vlen_enc = vector_length_encoding(this, $src);
21622     switch (to_elem_bt) {
21623       case T_BYTE:
21624         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21625           vlen_enc = Assembler::AVX_512bit;
21626         }
21627         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21628         break;
21629       case T_SHORT:
21630         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21631           vlen_enc = Assembler::AVX_512bit;
21632         }
21633         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21634         break;
21635       case T_INT:
21636         if (vlen == 8) {
21637           if ($dst$$XMMRegister != $src$$XMMRegister) {
21638             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21639           }
21640         } else if (vlen == 16) {
21641           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
21642         } else if (vlen == 32) {
21643           if (UseAVX > 2) {
21644             if (!VM_Version::supports_avx512vl()) {
21645               vlen_enc = Assembler::AVX_512bit;
21646             }
21647             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21648           } else {
21649             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
21650             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
21651           }
21652         } else { // vlen == 64
21653           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21654         }
21655         break;
21656       case T_FLOAT:
21657         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21658         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21659         break;
21660       case T_DOUBLE:
21661         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21662         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21663         break;
21664 
21665       default: assert(false, "%s", type2name(to_elem_bt));
21666     }
21667   %}
21668   ins_pipe( pipe_slow );
21669 %}
21670 
21671 instruct vcastFtoD_reg(vec dst, vec src) %{
21672   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
21673   match(Set dst (VectorCastF2X src));
21674   format %{ "vector_cast_f2d  $dst,$src\t!" %}
21675   ins_encode %{
21676     int vlen_enc = vector_length_encoding(this);
21677     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21678   %}
21679   ins_pipe( pipe_slow );
21680 %}
21681 
21682 
21683 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
21684   predicate(!VM_Version::supports_avx10_2() &&
21685             !VM_Version::supports_avx512vl() &&
21686             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
21687             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
21688             is_integral_type(Matcher::vector_element_basic_type(n)));
21689   match(Set dst (VectorCastF2X src));
21690   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
21691   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
21692   ins_encode %{
21693     int vlen_enc = vector_length_encoding(this, $src);
21694     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21695     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
21696     // 32 bit addresses for register indirect addressing mode since stub constants
21697     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
21698     // However, targets are free to increase this limit, but having a large code cache size
21699     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
21700     // cap we save a temporary register allocation which in limiting case can prevent
21701     // spilling in high register pressure blocks.
21702     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21703                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
21704                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
21705   %}
21706   ins_pipe( pipe_slow );
21707 %}
21708 
21709 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
21710   predicate(!VM_Version::supports_avx10_2() &&
21711             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
21712             is_integral_type(Matcher::vector_element_basic_type(n)));
21713   match(Set dst (VectorCastF2X src));
21714   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
21715   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
21716   ins_encode %{
21717     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21718     if (to_elem_bt == T_LONG) {
21719       int vlen_enc = vector_length_encoding(this);
21720       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21721                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
21722                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
21723     } else {
21724       int vlen_enc = vector_length_encoding(this, $src);
21725       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21726                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
21727                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
21728     }
21729   %}
21730   ins_pipe( pipe_slow );
21731 %}
21732 
21733 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
21734   predicate(VM_Version::supports_avx10_2() &&
21735             is_integral_type(Matcher::vector_element_basic_type(n)));
21736   match(Set dst (VectorCastF2X src));
21737   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
21738   ins_encode %{
21739     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21740     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
21741     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21742   %}
21743   ins_pipe( pipe_slow );
21744 %}
21745 
21746 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
21747   predicate(VM_Version::supports_avx10_2() &&
21748             is_integral_type(Matcher::vector_element_basic_type(n)));
21749   match(Set dst (VectorCastF2X (LoadVector src)));
21750   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
21751   ins_encode %{
21752     int vlen = Matcher::vector_length(this);
21753     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21754     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
21755     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
21756   %}
21757   ins_pipe( pipe_slow );
21758 %}
21759 
21760 instruct vcastDtoF_reg(vec dst, vec src) %{
21761   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
21762   match(Set dst (VectorCastD2X src));
21763   format %{ "vector_cast_d2x  $dst,$src\t!" %}
21764   ins_encode %{
21765     int vlen_enc = vector_length_encoding(this, $src);
21766     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21767   %}
21768   ins_pipe( pipe_slow );
21769 %}
21770 
21771 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
21772   predicate(!VM_Version::supports_avx10_2() &&
21773             !VM_Version::supports_avx512vl() &&
21774             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
21775             is_integral_type(Matcher::vector_element_basic_type(n)));
21776   match(Set dst (VectorCastD2X src));
21777   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
21778   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
21779   ins_encode %{
21780     int vlen_enc = vector_length_encoding(this, $src);
21781     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21782     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21783                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
21784                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
21785   %}
21786   ins_pipe( pipe_slow );
21787 %}
21788 
21789 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
21790   predicate(!VM_Version::supports_avx10_2() &&
21791             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
21792             is_integral_type(Matcher::vector_element_basic_type(n)));
21793   match(Set dst (VectorCastD2X src));
21794   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
21795   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
21796   ins_encode %{
21797     int vlen_enc = vector_length_encoding(this, $src);
21798     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21799     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
21800                               ExternalAddress(vector_float_signflip());
21801     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21802                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
21803   %}
21804   ins_pipe( pipe_slow );
21805 %}
21806 
21807 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
21808   predicate(VM_Version::supports_avx10_2() &&
21809             is_integral_type(Matcher::vector_element_basic_type(n)));
21810   match(Set dst (VectorCastD2X src));
21811   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
21812   ins_encode %{
21813     int vlen_enc = vector_length_encoding(this, $src);
21814     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21815     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21816   %}
21817   ins_pipe( pipe_slow );
21818 %}
21819 
21820 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
21821   predicate(VM_Version::supports_avx10_2() &&
21822             is_integral_type(Matcher::vector_element_basic_type(n)));
21823   match(Set dst (VectorCastD2X (LoadVector src)));
21824   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
21825   ins_encode %{
21826     int vlen = Matcher::vector_length(this);
21827     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
21828     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21829     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
21830   %}
21831   ins_pipe( pipe_slow );
21832 %}
21833 
21834 instruct vucast(vec dst, vec src) %{
21835   match(Set dst (VectorUCastB2X src));
21836   match(Set dst (VectorUCastS2X src));
21837   match(Set dst (VectorUCastI2X src));
21838   format %{ "vector_ucast $dst,$src\t!" %}
21839   ins_encode %{
21840     assert(UseAVX > 0, "required");
21841 
21842     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
21843     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21844     int vlen_enc = vector_length_encoding(this);
21845     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
21846   %}
21847   ins_pipe( pipe_slow );
21848 %}
21849 
21850 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
21851   predicate(!VM_Version::supports_avx512vl() &&
21852             Matcher::vector_length_in_bytes(n) < 64 &&
21853             Matcher::vector_element_basic_type(n) == T_INT);
21854   match(Set dst (RoundVF src));
21855   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
21856   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
21857   ins_encode %{
21858     int vlen_enc = vector_length_encoding(this);
21859     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
21860     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
21861                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
21862                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
21863   %}
21864   ins_pipe( pipe_slow );
21865 %}
21866 
21867 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
21868   predicate((VM_Version::supports_avx512vl() ||
21869              Matcher::vector_length_in_bytes(n) == 64) &&
21870              Matcher::vector_element_basic_type(n) == T_INT);
21871   match(Set dst (RoundVF src));
21872   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
21873   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
21874   ins_encode %{
21875     int vlen_enc = vector_length_encoding(this);
21876     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
21877     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
21878                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
21879                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
21880   %}
21881   ins_pipe( pipe_slow );
21882 %}
21883 
21884 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
21885   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
21886   match(Set dst (RoundVD src));
21887   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
21888   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
21889   ins_encode %{
21890     int vlen_enc = vector_length_encoding(this);
21891     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
21892     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
21893                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
21894                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
21895   %}
21896   ins_pipe( pipe_slow );
21897 %}
21898 
21899 // --------------------------------- VectorMaskCmp --------------------------------------
21900 
21901 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
21902   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
21903             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
21904             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
21905             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
21906   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
21907   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
21908   ins_encode %{
21909     int vlen_enc = vector_length_encoding(this, $src1);
21910     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
21911     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
21912       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21913     } else {
21914       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21915     }
21916   %}
21917   ins_pipe( pipe_slow );
21918 %}
21919 
21920 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
21921   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
21922             n->bottom_type()->isa_pvectmask() == nullptr &&
21923             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
21924   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
21925   effect(TEMP ktmp);
21926   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
21927   ins_encode %{
21928     int vlen_enc = Assembler::AVX_512bit;
21929     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
21930     KRegister mask = k0; // The comparison itself is not being masked.
21931     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
21932       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21933       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
21934     } else {
21935       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21936       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
21937     }
21938   %}
21939   ins_pipe( pipe_slow );
21940 %}
21941 
21942 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
21943   predicate(n->bottom_type()->isa_pvectmask() &&
21944             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
21945   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
21946   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
21947   ins_encode %{
21948     assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
21949     int vlen_enc = vector_length_encoding(this, $src1);
21950     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
21951     KRegister mask = k0; // The comparison itself is not being masked.
21952     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
21953       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21954     } else {
21955       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21956     }
21957   %}
21958   ins_pipe( pipe_slow );
21959 %}
21960 
21961 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
21962   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
21963             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
21964             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
21965             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
21966             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
21967             (n->in(2)->get_int() == BoolTest::eq ||
21968              n->in(2)->get_int() == BoolTest::lt ||
21969              n->in(2)->get_int() == BoolTest::gt)); // cond
21970   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
21971   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
21972   ins_encode %{
21973     int vlen_enc = vector_length_encoding(this, $src1);
21974     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
21975     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
21976     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
21977   %}
21978   ins_pipe( pipe_slow );
21979 %}
21980 
21981 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
21982   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
21983             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
21984             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
21985             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
21986             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
21987             (n->in(2)->get_int() == BoolTest::ne ||
21988              n->in(2)->get_int() == BoolTest::le ||
21989              n->in(2)->get_int() == BoolTest::ge)); // cond
21990   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
21991   effect(TEMP dst, TEMP xtmp);
21992   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
21993   ins_encode %{
21994     int vlen_enc = vector_length_encoding(this, $src1);
21995     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
21996     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
21997     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
21998   %}
21999   ins_pipe( pipe_slow );
22000 %}
22001 
22002 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22003   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22004             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22005             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22006             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22007             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22008   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22009   effect(TEMP dst, TEMP xtmp);
22010   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22011   ins_encode %{
22012     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22013     int vlen_enc = vector_length_encoding(this, $src1);
22014     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22015     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22016 
22017     if (vlen_enc == Assembler::AVX_128bit) {
22018       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22019     } else {
22020       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22021     }
22022     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22023     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22024     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22025   %}
22026   ins_pipe( pipe_slow );
22027 %}
22028 
22029 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22030   predicate((n->bottom_type()->isa_pvectmask() == nullptr &&
22031              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22032              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22033   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22034   effect(TEMP ktmp);
22035   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22036   ins_encode %{
22037     assert(UseAVX > 2, "required");
22038 
22039     int vlen_enc = vector_length_encoding(this, $src1);
22040     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22041     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22042     KRegister mask = k0; // The comparison itself is not being masked.
22043     bool merge = false;
22044     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22045 
22046     switch (src1_elem_bt) {
22047       case T_INT: {
22048         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22049         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22050         break;
22051       }
22052       case T_LONG: {
22053         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22054         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22055         break;
22056       }
22057       default: assert(false, "%s", type2name(src1_elem_bt));
22058     }
22059   %}
22060   ins_pipe( pipe_slow );
22061 %}
22062 
22063 
22064 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22065   predicate(n->bottom_type()->isa_pvectmask() &&
22066             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22067   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22068   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22069   ins_encode %{
22070     assert(UseAVX > 2, "required");
22071     assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
22072 
22073     int vlen_enc = vector_length_encoding(this, $src1);
22074     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22075     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22076     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22077 
22078     // Comparison i
22079     switch (src1_elem_bt) {
22080       case T_BYTE: {
22081         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22082         break;
22083       }
22084       case T_SHORT: {
22085         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22086         break;
22087       }
22088       case T_INT: {
22089         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22090         break;
22091       }
22092       case T_LONG: {
22093         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22094         break;
22095       }
22096       default: assert(false, "%s", type2name(src1_elem_bt));
22097     }
22098   %}
22099   ins_pipe( pipe_slow );
22100 %}
22101 
22102 // Extract
22103 
22104 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22105   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22106   match(Set dst (ExtractI src idx));
22107   match(Set dst (ExtractS src idx));
22108   match(Set dst (ExtractB src idx));
22109   format %{ "extractI $dst,$src,$idx\t!" %}
22110   ins_encode %{
22111     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22112 
22113     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22114     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22115   %}
22116   ins_pipe( pipe_slow );
22117 %}
22118 
22119 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22120   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22121             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
22122   match(Set dst (ExtractI src idx));
22123   match(Set dst (ExtractS src idx));
22124   match(Set dst (ExtractB src idx));
22125   effect(TEMP vtmp);
22126   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22127   ins_encode %{
22128     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22129 
22130     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22131     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22132     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22133   %}
22134   ins_pipe( pipe_slow );
22135 %}
22136 
22137 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22138   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22139   match(Set dst (ExtractL src idx));
22140   format %{ "extractL $dst,$src,$idx\t!" %}
22141   ins_encode %{
22142     assert(UseSSE >= 4, "required");
22143     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22144 
22145     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22146   %}
22147   ins_pipe( pipe_slow );
22148 %}
22149 
22150 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22151   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22152             Matcher::vector_length(n->in(1)) == 8);  // src
22153   match(Set dst (ExtractL src idx));
22154   effect(TEMP vtmp);
22155   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22156   ins_encode %{
22157     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22158 
22159     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22160     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22161   %}
22162   ins_pipe( pipe_slow );
22163 %}
22164 
22165 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22166   predicate(Matcher::vector_length(n->in(1)) <= 4);
22167   match(Set dst (ExtractF src idx));
22168   effect(TEMP dst, TEMP vtmp);
22169   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22170   ins_encode %{
22171     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22172 
22173     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22174   %}
22175   ins_pipe( pipe_slow );
22176 %}
22177 
22178 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22179   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22180             Matcher::vector_length(n->in(1)/*src*/) == 16);
22181   match(Set dst (ExtractF src idx));
22182   effect(TEMP vtmp);
22183   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22184   ins_encode %{
22185     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22186 
22187     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22188     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22189   %}
22190   ins_pipe( pipe_slow );
22191 %}
22192 
22193 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22194   predicate(Matcher::vector_length(n->in(1)) == 2); // src
22195   match(Set dst (ExtractD src idx));
22196   format %{ "extractD $dst,$src,$idx\t!" %}
22197   ins_encode %{
22198     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22199 
22200     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22201   %}
22202   ins_pipe( pipe_slow );
22203 %}
22204 
22205 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22206   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22207             Matcher::vector_length(n->in(1)) == 8);  // src
22208   match(Set dst (ExtractD src idx));
22209   effect(TEMP vtmp);
22210   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22211   ins_encode %{
22212     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22213 
22214     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22215     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22216   %}
22217   ins_pipe( pipe_slow );
22218 %}
22219 
22220 // --------------------------------- Vector Blend --------------------------------------
22221 
22222 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22223   predicate(UseAVX == 0);
22224   match(Set dst (VectorBlend (Binary dst src) mask));
22225   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
22226   effect(TEMP tmp);
22227   ins_encode %{
22228     assert(UseSSE >= 4, "required");
22229 
22230     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22231       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22232     }
22233     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22234   %}
22235   ins_pipe( pipe_slow );
22236 %}
22237 
22238 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22239   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22240             n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22241             Matcher::vector_length_in_bytes(n) <= 32 &&
22242             is_integral_type(Matcher::vector_element_basic_type(n)));
22243   match(Set dst (VectorBlend (Binary src1 src2) mask));
22244   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22245   ins_encode %{
22246     int vlen_enc = vector_length_encoding(this);
22247     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22248   %}
22249   ins_pipe( pipe_slow );
22250 %}
22251 
22252 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22253   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22254             n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22255             Matcher::vector_length_in_bytes(n) <= 32 &&
22256             !is_integral_type(Matcher::vector_element_basic_type(n)));
22257   match(Set dst (VectorBlend (Binary src1 src2) mask));
22258   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22259   ins_encode %{
22260     int vlen_enc = vector_length_encoding(this);
22261     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22262   %}
22263   ins_pipe( pipe_slow );
22264 %}
22265 
22266 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22267   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22268             n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22269             Matcher::vector_length_in_bytes(n) <= 32);
22270   match(Set dst (VectorBlend (Binary src1 src2) mask));
22271   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22272   effect(TEMP vtmp, TEMP dst);
22273   ins_encode %{
22274     int vlen_enc = vector_length_encoding(this);
22275     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22276     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22277     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
22278   %}
22279   ins_pipe( pipe_slow );
22280 %}
22281 
22282 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22283   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22284             n->in(2)->bottom_type()->isa_pvectmask() == nullptr);
22285   match(Set dst (VectorBlend (Binary src1 src2) mask));
22286   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22287   effect(TEMP ktmp);
22288   ins_encode %{
22289      int vlen_enc = Assembler::AVX_512bit;
22290      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22291     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22292     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22293   %}
22294   ins_pipe( pipe_slow );
22295 %}
22296 
22297 
22298 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22299   predicate(n->in(2)->bottom_type()->isa_pvectmask() &&
22300             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22301              VM_Version::supports_avx512bw()));
22302   match(Set dst (VectorBlend (Binary src1 src2) mask));
22303   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22304   ins_encode %{
22305     int vlen_enc = vector_length_encoding(this);
22306     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22307     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22308   %}
22309   ins_pipe( pipe_slow );
22310 %}
22311 
22312 // --------------------------------- ABS --------------------------------------
22313 // a = |a|
22314 instruct vabsB_reg(vec dst, vec src) %{
22315   match(Set dst (AbsVB  src));
22316   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22317   ins_encode %{
22318     uint vlen = Matcher::vector_length(this);
22319     if (vlen <= 16) {
22320       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22321     } else {
22322       int vlen_enc = vector_length_encoding(this);
22323       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22324     }
22325   %}
22326   ins_pipe( pipe_slow );
22327 %}
22328 
22329 instruct vabsS_reg(vec dst, vec src) %{
22330   match(Set dst (AbsVS  src));
22331   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22332   ins_encode %{
22333     uint vlen = Matcher::vector_length(this);
22334     if (vlen <= 8) {
22335       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22336     } else {
22337       int vlen_enc = vector_length_encoding(this);
22338       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22339     }
22340   %}
22341   ins_pipe( pipe_slow );
22342 %}
22343 
22344 instruct vabsI_reg(vec dst, vec src) %{
22345   match(Set dst (AbsVI  src));
22346   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22347   ins_encode %{
22348     uint vlen = Matcher::vector_length(this);
22349     if (vlen <= 4) {
22350       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22351     } else {
22352       int vlen_enc = vector_length_encoding(this);
22353       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22354     }
22355   %}
22356   ins_pipe( pipe_slow );
22357 %}
22358 
22359 instruct vabsL_reg(vec dst, vec src) %{
22360   match(Set dst (AbsVL  src));
22361   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22362   ins_encode %{
22363     assert(UseAVX > 2, "required");
22364     int vlen_enc = vector_length_encoding(this);
22365     if (!VM_Version::supports_avx512vl()) {
22366       vlen_enc = Assembler::AVX_512bit;
22367     }
22368     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22369   %}
22370   ins_pipe( pipe_slow );
22371 %}
22372 
22373 // --------------------------------- ABSNEG --------------------------------------
22374 
22375 instruct vabsnegF(vec dst, vec src) %{
22376   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22377   match(Set dst (AbsVF src));
22378   match(Set dst (NegVF src));
22379   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22380   ins_cost(150);
22381   ins_encode %{
22382     int opcode = this->ideal_Opcode();
22383     int vlen = Matcher::vector_length(this);
22384     if (vlen == 2) {
22385       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22386     } else {
22387       assert(vlen == 8 || vlen == 16, "required");
22388       int vlen_enc = vector_length_encoding(this);
22389       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22390     }
22391   %}
22392   ins_pipe( pipe_slow );
22393 %}
22394 
22395 instruct vabsneg4F(vec dst) %{
22396   predicate(Matcher::vector_length(n) == 4);
22397   match(Set dst (AbsVF dst));
22398   match(Set dst (NegVF dst));
22399   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22400   ins_cost(150);
22401   ins_encode %{
22402     int opcode = this->ideal_Opcode();
22403     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22404   %}
22405   ins_pipe( pipe_slow );
22406 %}
22407 
22408 instruct vabsnegD(vec dst, vec src) %{
22409   match(Set dst (AbsVD  src));
22410   match(Set dst (NegVD  src));
22411   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22412   ins_encode %{
22413     int opcode = this->ideal_Opcode();
22414     uint vlen = Matcher::vector_length(this);
22415     if (vlen == 2) {
22416       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22417     } else {
22418       int vlen_enc = vector_length_encoding(this);
22419       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22420     }
22421   %}
22422   ins_pipe( pipe_slow );
22423 %}
22424 
22425 //------------------------------------- VectorTest --------------------------------------------
22426 
22427 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22428   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22429   match(Set cr (VectorTest src1 src2));
22430   effect(TEMP vtmp);
22431   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
22432   ins_encode %{
22433     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22434     int vlen = Matcher::vector_length_in_bytes(this, $src1);
22435     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22436   %}
22437   ins_pipe( pipe_slow );
22438 %}
22439 
22440 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
22441   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
22442   match(Set cr (VectorTest src1 src2));
22443   format %{ "vptest_ge16  $src1, $src2\n\t" %}
22444   ins_encode %{
22445     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22446     int vlen = Matcher::vector_length_in_bytes(this, $src1);
22447     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
22448   %}
22449   ins_pipe( pipe_slow );
22450 %}
22451 
22452 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22453   predicate((Matcher::vector_length(n->in(1)) < 8 ||
22454              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22455             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
22456   match(Set cr (VectorTest src1 src2));
22457   effect(TEMP tmp);
22458   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
22459   ins_encode %{
22460     uint masklen = Matcher::vector_length(this, $src1);
22461     __ kmovwl($tmp$$Register, $src1$$KRegister);
22462     __ andl($tmp$$Register, (1 << masklen) - 1);
22463     __ cmpl($tmp$$Register, (1 << masklen) - 1);
22464   %}
22465   ins_pipe( pipe_slow );
22466 %}
22467 
22468 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22469   predicate((Matcher::vector_length(n->in(1)) < 8 ||
22470              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22471             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
22472   match(Set cr (VectorTest src1 src2));
22473   effect(TEMP tmp);
22474   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
22475   ins_encode %{
22476     uint masklen = Matcher::vector_length(this, $src1);
22477     __ kmovwl($tmp$$Register, $src1$$KRegister);
22478     __ andl($tmp$$Register, (1 << masklen) - 1);
22479   %}
22480   ins_pipe( pipe_slow );
22481 %}
22482 
22483 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
22484   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
22485             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
22486   match(Set cr (VectorTest src1 src2));
22487   format %{ "ktest_ge8  $src1, $src2\n\t" %}
22488   ins_encode %{
22489     uint masklen = Matcher::vector_length(this, $src1);
22490     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
22491   %}
22492   ins_pipe( pipe_slow );
22493 %}
22494 
22495 //------------------------------------- LoadMask --------------------------------------------
22496 
22497 instruct loadMask(legVec dst, legVec src) %{
22498   predicate(n->bottom_type()->isa_pvectmask() == nullptr && !VM_Version::supports_avx512vlbw());
22499   match(Set dst (VectorLoadMask src));
22500   effect(TEMP dst);
22501   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
22502   ins_encode %{
22503     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22504     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22505     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
22506   %}
22507   ins_pipe( pipe_slow );
22508 %}
22509 
22510 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
22511   predicate(n->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
22512   match(Set dst (VectorLoadMask src));
22513   effect(TEMP xtmp);
22514   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
22515   ins_encode %{
22516     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22517                         true, Assembler::AVX_512bit);
22518   %}
22519   ins_pipe( pipe_slow );
22520 %}
22521 
22522 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
22523   predicate(n->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
22524   match(Set dst (VectorLoadMask src));
22525   effect(TEMP xtmp);
22526   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
22527   ins_encode %{
22528     int vlen_enc = vector_length_encoding(in(1));
22529     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22530                         false, vlen_enc);
22531   %}
22532   ins_pipe( pipe_slow );
22533 %}
22534 
22535 //------------------------------------- StoreMask --------------------------------------------
22536 
22537 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
22538   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22539   match(Set dst (VectorStoreMask src size));
22540   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22541   ins_encode %{
22542     int vlen = Matcher::vector_length(this);
22543     if (vlen <= 16 && UseAVX <= 2) {
22544       assert(UseSSE >= 3, "required");
22545       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22546     } else {
22547       assert(UseAVX > 0, "required");
22548       int src_vlen_enc = vector_length_encoding(this, $src);
22549       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22550     }
22551   %}
22552   ins_pipe( pipe_slow );
22553 %}
22554 
22555 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
22556   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22557   match(Set dst (VectorStoreMask src size));
22558   effect(TEMP_DEF dst, TEMP xtmp);
22559   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22560   ins_encode %{
22561     int vlen_enc = Assembler::AVX_128bit;
22562     int vlen = Matcher::vector_length(this);
22563     if (vlen <= 8) {
22564       assert(UseSSE >= 3, "required");
22565       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22566       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22567       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22568     } else {
22569       assert(UseAVX > 0, "required");
22570       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22571       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22572       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22573     }
22574   %}
22575   ins_pipe( pipe_slow );
22576 %}
22577 
22578 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
22579   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22580   match(Set dst (VectorStoreMask src size));
22581   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22582   effect(TEMP_DEF dst, TEMP xtmp);
22583   ins_encode %{
22584     int vlen_enc = Assembler::AVX_128bit;
22585     int vlen = Matcher::vector_length(this);
22586     if (vlen <= 4) {
22587       assert(UseSSE >= 3, "required");
22588       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22589       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22590       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22591       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22592     } else {
22593       assert(UseAVX > 0, "required");
22594       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22595       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22596       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22597       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22598       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22599     }
22600   %}
22601   ins_pipe( pipe_slow );
22602 %}
22603 
22604 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
22605   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
22606   match(Set dst (VectorStoreMask src size));
22607   effect(TEMP_DEF dst, TEMP xtmp);
22608   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22609   ins_encode %{
22610     assert(UseSSE >= 3, "required");
22611     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22612     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
22613     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
22614     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22615     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22616   %}
22617   ins_pipe( pipe_slow );
22618 %}
22619 
22620 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
22621   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
22622   match(Set dst (VectorStoreMask src size));
22623   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
22624   effect(TEMP_DEF dst, TEMP vtmp);
22625   ins_encode %{
22626     int vlen_enc = Assembler::AVX_128bit;
22627     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
22628     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22629     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
22630     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22631     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22632     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22633     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22634   %}
22635   ins_pipe( pipe_slow );
22636 %}
22637 
22638 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
22639   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22640   match(Set dst (VectorStoreMask src size));
22641   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22642   ins_encode %{
22643     int src_vlen_enc = vector_length_encoding(this, $src);
22644     int dst_vlen_enc = vector_length_encoding(this);
22645     if (!VM_Version::supports_avx512vl()) {
22646       src_vlen_enc = Assembler::AVX_512bit;
22647     }
22648     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22649     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22650   %}
22651   ins_pipe( pipe_slow );
22652 %}
22653 
22654 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
22655   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22656   match(Set dst (VectorStoreMask src size));
22657   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22658   ins_encode %{
22659     int src_vlen_enc = vector_length_encoding(this, $src);
22660     int dst_vlen_enc = vector_length_encoding(this);
22661     if (!VM_Version::supports_avx512vl()) {
22662       src_vlen_enc = Assembler::AVX_512bit;
22663     }
22664     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22665     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22666   %}
22667   ins_pipe( pipe_slow );
22668 %}
22669 
22670 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
22671   predicate(n->in(1)->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
22672   match(Set dst (VectorStoreMask mask size));
22673   effect(TEMP_DEF dst);
22674   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
22675   ins_encode %{
22676     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
22677     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
22678                  false, Assembler::AVX_512bit, noreg);
22679     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
22680   %}
22681   ins_pipe( pipe_slow );
22682 %}
22683 
22684 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
22685   predicate(n->in(1)->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
22686   match(Set dst (VectorStoreMask mask size));
22687   effect(TEMP_DEF dst);
22688   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
22689   ins_encode %{
22690     int dst_vlen_enc = vector_length_encoding(this);
22691     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
22692     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22693   %}
22694   ins_pipe( pipe_slow );
22695 %}
22696 
22697 instruct vmaskcast_evex(kReg dst) %{
22698   match(Set dst (VectorMaskCast dst));
22699   ins_cost(0);
22700   format %{ "vector_mask_cast $dst" %}
22701   ins_encode %{
22702     // empty
22703   %}
22704   ins_pipe(empty);
22705 %}
22706 
22707 instruct vmaskcast(vec dst) %{
22708   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
22709   match(Set dst (VectorMaskCast dst));
22710   ins_cost(0);
22711   format %{ "vector_mask_cast $dst" %}
22712   ins_encode %{
22713     // empty
22714   %}
22715   ins_pipe(empty);
22716 %}
22717 
22718 instruct vmaskcast_avx(vec dst, vec src) %{
22719   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
22720   match(Set dst (VectorMaskCast src));
22721   format %{ "vector_mask_cast $dst, $src" %}
22722   ins_encode %{
22723     int vlen = Matcher::vector_length(this);
22724     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
22725     BasicType dst_bt = Matcher::vector_element_basic_type(this);
22726     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
22727   %}
22728   ins_pipe(pipe_slow);
22729 %}
22730 
22731 //-------------------------------- Load Iota Indices ----------------------------------
22732 
22733 instruct loadIotaIndices(vec dst, immI_0 src) %{
22734   match(Set dst (VectorLoadConst src));
22735   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
22736   ins_encode %{
22737      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22738      BasicType bt = Matcher::vector_element_basic_type(this);
22739      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
22740   %}
22741   ins_pipe( pipe_slow );
22742 %}
22743 
22744 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
22745   match(Set dst (PopulateIndex src1 src2));
22746   effect(TEMP dst, TEMP vtmp);
22747   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
22748   ins_encode %{
22749      assert($src2$$constant == 1, "required");
22750      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22751      int vlen_enc = vector_length_encoding(this);
22752      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22753      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
22754      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
22755      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22756   %}
22757   ins_pipe( pipe_slow );
22758 %}
22759 
22760 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
22761   match(Set dst (PopulateIndex src1 src2));
22762   effect(TEMP dst, TEMP vtmp);
22763   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
22764   ins_encode %{
22765      assert($src2$$constant == 1, "required");
22766      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22767      int vlen_enc = vector_length_encoding(this);
22768      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22769      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
22770      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
22771      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22772   %}
22773   ins_pipe( pipe_slow );
22774 %}
22775 
22776 //-------------------------------- Rearrange ----------------------------------
22777 
22778 // LoadShuffle/Rearrange for Byte
22779 instruct rearrangeB(vec dst, vec shuffle) %{
22780   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
22781             Matcher::vector_length(n) < 32);
22782   match(Set dst (VectorRearrange dst shuffle));
22783   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
22784   ins_encode %{
22785     assert(UseSSE >= 4, "required");
22786     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
22787   %}
22788   ins_pipe( pipe_slow );
22789 %}
22790 
22791 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
22792   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
22793             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
22794   match(Set dst (VectorRearrange src shuffle));
22795   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
22796   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
22797   ins_encode %{
22798     assert(UseAVX >= 2, "required");
22799     // Swap src into vtmp1
22800     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
22801     // Shuffle swapped src to get entries from other 128 bit lane
22802     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
22803     // Shuffle original src to get entries from self 128 bit lane
22804     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
22805     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
22806     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
22807     // Perform the blend
22808     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
22809   %}
22810   ins_pipe( pipe_slow );
22811 %}
22812 
22813 
22814 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
22815   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
22816             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
22817   match(Set dst (VectorRearrange src shuffle));
22818   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
22819   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
22820   ins_encode %{
22821     int vlen_enc = vector_length_encoding(this);
22822     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
22823                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
22824                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
22825   %}
22826   ins_pipe( pipe_slow );
22827 %}
22828 
22829 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
22830   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
22831             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
22832   match(Set dst (VectorRearrange src shuffle));
22833   format %{ "vector_rearrange $dst, $shuffle, $src" %}
22834   ins_encode %{
22835     int vlen_enc = vector_length_encoding(this);
22836     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
22837   %}
22838   ins_pipe( pipe_slow );
22839 %}
22840 
22841 // LoadShuffle/Rearrange for Short
22842 
22843 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
22844   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
22845             !VM_Version::supports_avx512bw());
22846   match(Set dst (VectorLoadShuffle src));
22847   effect(TEMP dst, TEMP vtmp);
22848   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
22849   ins_encode %{
22850     // Create a byte shuffle mask from short shuffle mask
22851     // only byte shuffle instruction available on these platforms
22852     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22853     if (UseAVX == 0) {
22854       assert(vlen_in_bytes <= 16, "required");
22855       // Multiply each shuffle by two to get byte index
22856       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
22857       __ psllw($vtmp$$XMMRegister, 1);
22858 
22859       // Duplicate to create 2 copies of byte index
22860       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
22861       __ psllw($dst$$XMMRegister, 8);
22862       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
22863 
22864       // Add one to get alternate byte index
22865       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
22866       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
22867     } else {
22868       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
22869       int vlen_enc = vector_length_encoding(this);
22870       // Multiply each shuffle by two to get byte index
22871       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
22872 
22873       // Duplicate to create 2 copies of byte index
22874       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
22875       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22876 
22877       // Add one to get alternate byte index
22878       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
22879     }
22880   %}
22881   ins_pipe( pipe_slow );
22882 %}
22883 
22884 instruct rearrangeS(vec dst, vec shuffle) %{
22885   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
22886             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
22887   match(Set dst (VectorRearrange dst shuffle));
22888   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
22889   ins_encode %{
22890     assert(UseSSE >= 4, "required");
22891     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
22892   %}
22893   ins_pipe( pipe_slow );
22894 %}
22895 
22896 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
22897   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
22898             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
22899   match(Set dst (VectorRearrange src shuffle));
22900   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
22901   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
22902   ins_encode %{
22903     assert(UseAVX >= 2, "required");
22904     // Swap src into vtmp1
22905     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
22906     // Shuffle swapped src to get entries from other 128 bit lane
22907     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
22908     // Shuffle original src to get entries from self 128 bit lane
22909     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
22910     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
22911     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
22912     // Perform the blend
22913     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
22914   %}
22915   ins_pipe( pipe_slow );
22916 %}
22917 
22918 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
22919   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
22920             VM_Version::supports_avx512bw());
22921   match(Set dst (VectorRearrange src shuffle));
22922   format %{ "vector_rearrange $dst, $shuffle, $src" %}
22923   ins_encode %{
22924     int vlen_enc = vector_length_encoding(this);
22925     if (!VM_Version::supports_avx512vl()) {
22926       vlen_enc = Assembler::AVX_512bit;
22927     }
22928     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
22929   %}
22930   ins_pipe( pipe_slow );
22931 %}
22932 
22933 // LoadShuffle/Rearrange for Integer and Float
22934 
22935 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
22936   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
22937             Matcher::vector_length(n) == 4 && UseAVX == 0);
22938   match(Set dst (VectorLoadShuffle src));
22939   effect(TEMP dst, TEMP vtmp);
22940   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
22941   ins_encode %{
22942     assert(UseSSE >= 4, "required");
22943 
22944     // Create a byte shuffle mask from int shuffle mask
22945     // only byte shuffle instruction available on these platforms
22946 
22947     // Duplicate and multiply each shuffle by 4
22948     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
22949     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
22950     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
22951     __ psllw($vtmp$$XMMRegister, 2);
22952 
22953     // Duplicate again to create 4 copies of byte index
22954     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
22955     __ psllw($dst$$XMMRegister, 8);
22956     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
22957 
22958     // Add 3,2,1,0 to get alternate byte index
22959     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
22960     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
22961   %}
22962   ins_pipe( pipe_slow );
22963 %}
22964 
22965 instruct rearrangeI(vec dst, vec shuffle) %{
22966   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
22967             UseAVX == 0);
22968   match(Set dst (VectorRearrange dst shuffle));
22969   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
22970   ins_encode %{
22971     assert(UseSSE >= 4, "required");
22972     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
22973   %}
22974   ins_pipe( pipe_slow );
22975 %}
22976 
22977 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
22978   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
22979             UseAVX > 0);
22980   match(Set dst (VectorRearrange src shuffle));
22981   format %{ "vector_rearrange $dst, $shuffle, $src" %}
22982   ins_encode %{
22983     int vlen_enc = vector_length_encoding(this);
22984     BasicType bt = Matcher::vector_element_basic_type(this);
22985     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
22986   %}
22987   ins_pipe( pipe_slow );
22988 %}
22989 
22990 // LoadShuffle/Rearrange for Long and Double
22991 
22992 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
22993   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
22994             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
22995   match(Set dst (VectorLoadShuffle src));
22996   effect(TEMP dst, TEMP vtmp);
22997   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
22998   ins_encode %{
22999     assert(UseAVX >= 2, "required");
23000 
23001     int vlen_enc = vector_length_encoding(this);
23002     // Create a double word shuffle mask from long shuffle mask
23003     // only double word shuffle instruction available on these platforms
23004 
23005     // Multiply each shuffle by two to get double word index
23006     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23007 
23008     // Duplicate each double word shuffle
23009     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23010     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23011 
23012     // Add one to get alternate double word index
23013     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23014   %}
23015   ins_pipe( pipe_slow );
23016 %}
23017 
23018 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23019   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23020             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23021   match(Set dst (VectorRearrange src shuffle));
23022   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23023   ins_encode %{
23024     assert(UseAVX >= 2, "required");
23025 
23026     int vlen_enc = vector_length_encoding(this);
23027     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23028   %}
23029   ins_pipe( pipe_slow );
23030 %}
23031 
23032 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23033   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23034             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23035   match(Set dst (VectorRearrange src shuffle));
23036   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23037   ins_encode %{
23038     assert(UseAVX > 2, "required");
23039 
23040     int vlen_enc = vector_length_encoding(this);
23041     if (vlen_enc == Assembler::AVX_128bit) {
23042       vlen_enc = Assembler::AVX_256bit;
23043     }
23044     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23045   %}
23046   ins_pipe( pipe_slow );
23047 %}
23048 
23049 // --------------------------------- FMA --------------------------------------
23050 // a * b + c
23051 
23052 instruct vfmaF_reg(vec a, vec b, vec c) %{
23053   match(Set c (FmaVF  c (Binary a b)));
23054   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23055   ins_cost(150);
23056   ins_encode %{
23057     assert(UseFMA, "not enabled");
23058     int vlen_enc = vector_length_encoding(this);
23059     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23060   %}
23061   ins_pipe( pipe_slow );
23062 %}
23063 
23064 instruct vfmaF_mem(vec a, memory b, vec c) %{
23065   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23066   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23067   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23068   ins_cost(150);
23069   ins_encode %{
23070     assert(UseFMA, "not enabled");
23071     int vlen_enc = vector_length_encoding(this);
23072     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23073   %}
23074   ins_pipe( pipe_slow );
23075 %}
23076 
23077 instruct vfmaD_reg(vec a, vec b, vec c) %{
23078   match(Set c (FmaVD  c (Binary a b)));
23079   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23080   ins_cost(150);
23081   ins_encode %{
23082     assert(UseFMA, "not enabled");
23083     int vlen_enc = vector_length_encoding(this);
23084     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23085   %}
23086   ins_pipe( pipe_slow );
23087 %}
23088 
23089 instruct vfmaD_mem(vec a, memory b, vec c) %{
23090   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23091   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23092   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23093   ins_cost(150);
23094   ins_encode %{
23095     assert(UseFMA, "not enabled");
23096     int vlen_enc = vector_length_encoding(this);
23097     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23098   %}
23099   ins_pipe( pipe_slow );
23100 %}
23101 
23102 // --------------------------------- Vector Multiply Add --------------------------------------
23103 
23104 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23105   predicate(UseAVX == 0);
23106   match(Set dst (MulAddVS2VI dst src1));
23107   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23108   ins_encode %{
23109     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23110   %}
23111   ins_pipe( pipe_slow );
23112 %}
23113 
23114 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23115   predicate(UseAVX > 0);
23116   match(Set dst (MulAddVS2VI src1 src2));
23117   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23118   ins_encode %{
23119     int vlen_enc = vector_length_encoding(this);
23120     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23121   %}
23122   ins_pipe( pipe_slow );
23123 %}
23124 
23125 // --------------------------------- Vector Multiply Add Add ----------------------------------
23126 
23127 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23128   predicate(VM_Version::supports_avx512_vnni());
23129   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23130   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23131   ins_encode %{
23132     assert(UseAVX > 2, "required");
23133     int vlen_enc = vector_length_encoding(this);
23134     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23135   %}
23136   ins_pipe( pipe_slow );
23137   ins_cost(10);
23138 %}
23139 
23140 // --------------------------------- PopCount --------------------------------------
23141 
23142 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23143   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23144   match(Set dst (PopCountVI src));
23145   match(Set dst (PopCountVL src));
23146   format %{ "vector_popcount_integral $dst, $src" %}
23147   ins_encode %{
23148     int opcode = this->ideal_Opcode();
23149     int vlen_enc = vector_length_encoding(this, $src);
23150     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23151     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23152   %}
23153   ins_pipe( pipe_slow );
23154 %}
23155 
23156 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23157   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23158   match(Set dst (PopCountVI src mask));
23159   match(Set dst (PopCountVL src mask));
23160   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23161   ins_encode %{
23162     int vlen_enc = vector_length_encoding(this, $src);
23163     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23164     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23165     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23166   %}
23167   ins_pipe( pipe_slow );
23168 %}
23169 
23170 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23171   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23172   match(Set dst (PopCountVI src));
23173   match(Set dst (PopCountVL src));
23174   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23175   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23176   ins_encode %{
23177     int opcode = this->ideal_Opcode();
23178     int vlen_enc = vector_length_encoding(this, $src);
23179     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23180     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23181                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23182   %}
23183   ins_pipe( pipe_slow );
23184 %}
23185 
23186 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23187 
23188 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23189   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23190                                               Matcher::vector_length_in_bytes(n->in(1))));
23191   match(Set dst (CountTrailingZerosV src));
23192   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23193   ins_cost(400);
23194   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23195   ins_encode %{
23196     int vlen_enc = vector_length_encoding(this, $src);
23197     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23198     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23199                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23200   %}
23201   ins_pipe( pipe_slow );
23202 %}
23203 
23204 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23205   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23206             VM_Version::supports_avx512cd() &&
23207             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23208   match(Set dst (CountTrailingZerosV src));
23209   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23210   ins_cost(400);
23211   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23212   ins_encode %{
23213     int vlen_enc = vector_length_encoding(this, $src);
23214     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23215     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23216                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23217   %}
23218   ins_pipe( pipe_slow );
23219 %}
23220 
23221 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23222   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23223   match(Set dst (CountTrailingZerosV src));
23224   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23225   ins_cost(400);
23226   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23227   ins_encode %{
23228     int vlen_enc = vector_length_encoding(this, $src);
23229     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23230     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23231                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23232                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23233   %}
23234   ins_pipe( pipe_slow );
23235 %}
23236 
23237 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23238   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23239   match(Set dst (CountTrailingZerosV src));
23240   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23241   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23242   ins_encode %{
23243     int vlen_enc = vector_length_encoding(this, $src);
23244     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23245     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23246                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23247   %}
23248   ins_pipe( pipe_slow );
23249 %}
23250 
23251 
23252 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23253 
23254 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23255   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23256   effect(TEMP dst);
23257   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23258   ins_encode %{
23259     int vector_len = vector_length_encoding(this);
23260     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23261   %}
23262   ins_pipe( pipe_slow );
23263 %}
23264 
23265 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23266   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23267   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23268   effect(TEMP dst);
23269   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23270   ins_encode %{
23271     int vector_len = vector_length_encoding(this);
23272     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23273   %}
23274   ins_pipe( pipe_slow );
23275 %}
23276 
23277 // --------------------------------- Rotation Operations ----------------------------------
23278 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23279   match(Set dst (RotateLeftV src shift));
23280   match(Set dst (RotateRightV src shift));
23281   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23282   ins_encode %{
23283     int opcode      = this->ideal_Opcode();
23284     int vector_len  = vector_length_encoding(this);
23285     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23286     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23287   %}
23288   ins_pipe( pipe_slow );
23289 %}
23290 
23291 instruct vprorate(vec dst, vec src, vec shift) %{
23292   match(Set dst (RotateLeftV src shift));
23293   match(Set dst (RotateRightV src shift));
23294   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23295   ins_encode %{
23296     int opcode      = this->ideal_Opcode();
23297     int vector_len  = vector_length_encoding(this);
23298     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23299     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23300   %}
23301   ins_pipe( pipe_slow );
23302 %}
23303 
23304 // ---------------------------------- Masked Operations ------------------------------------
23305 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23306   predicate(!n->in(3)->bottom_type()->isa_pvectmask());
23307   match(Set dst (LoadVectorMasked mem mask));
23308   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23309   ins_encode %{
23310     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23311     int vlen_enc = vector_length_encoding(this);
23312     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23313   %}
23314   ins_pipe( pipe_slow );
23315 %}
23316 
23317 
23318 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23319   predicate(n->in(3)->bottom_type()->isa_pvectmask());
23320   match(Set dst (LoadVectorMasked mem mask));
23321   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23322   ins_encode %{
23323     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
23324     int vector_len = vector_length_encoding(this);
23325     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23326   %}
23327   ins_pipe( pipe_slow );
23328 %}
23329 
23330 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23331   predicate(!n->in(3)->in(2)->bottom_type()->isa_pvectmask());
23332   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23333   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23334   ins_encode %{
23335     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23336     int vlen_enc = vector_length_encoding(src_node);
23337     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23338     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23339   %}
23340   ins_pipe( pipe_slow );
23341 %}
23342 
23343 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23344   predicate(n->in(3)->in(2)->bottom_type()->isa_pvectmask());
23345   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23346   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23347   ins_encode %{
23348     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23349     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23350     int vlen_enc = vector_length_encoding(src_node);
23351     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23352   %}
23353   ins_pipe( pipe_slow );
23354 %}
23355 
23356 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23357   match(Set addr (VerifyVectorAlignment addr mask));
23358   effect(KILL cr);
23359   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23360   ins_encode %{
23361     Label Lskip;
23362     // check if masked bits of addr are zero
23363     __ testq($addr$$Register, $mask$$constant);
23364     __ jccb(Assembler::equal, Lskip);
23365     __ stop("verify_vector_alignment found a misaligned vector memory access");
23366     __ bind(Lskip);
23367   %}
23368   ins_pipe(pipe_slow);
23369 %}
23370 
23371 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23372   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23373   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23374   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23375   ins_encode %{
23376     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23377     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23378 
23379     Label DONE;
23380     int vlen_enc = vector_length_encoding(this, $src1);
23381     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23382 
23383     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23384     __ mov64($dst$$Register, -1L);
23385     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23386     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23387     __ jccb(Assembler::carrySet, DONE);
23388     __ kmovql($dst$$Register, $ktmp1$$KRegister);
23389     __ notq($dst$$Register);
23390     __ tzcntq($dst$$Register, $dst$$Register);
23391     __ bind(DONE);
23392   %}
23393   ins_pipe( pipe_slow );
23394 %}
23395 
23396 
23397 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23398   match(Set dst (VectorMaskGen len));
23399   effect(TEMP temp, KILL cr);
23400   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23401   ins_encode %{
23402     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23403   %}
23404   ins_pipe( pipe_slow );
23405 %}
23406 
23407 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23408   match(Set dst (VectorMaskGen len));
23409   format %{ "vector_mask_gen $len \t! vector mask generator" %}
23410   effect(TEMP temp);
23411   ins_encode %{
23412     if ($len$$constant > 0) {
23413       __ mov64($temp$$Register, right_n_bits($len$$constant));
23414       __ kmovql($dst$$KRegister, $temp$$Register);
23415     } else {
23416       __ kxorql($dst$$KRegister, $dst$$KRegister, $dst$$KRegister);
23417     }
23418   %}
23419   ins_pipe( pipe_slow );
23420 %}
23421 
23422 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23423   predicate(n->in(1)->bottom_type()->isa_pvectmask());
23424   match(Set dst (VectorMaskToLong mask));
23425   effect(TEMP dst, KILL cr);
23426   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23427   ins_encode %{
23428     int opcode = this->ideal_Opcode();
23429     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23430     int mask_len = Matcher::vector_length(this, $mask);
23431     int mask_size = mask_len * type2aelembytes(mbt);
23432     int vlen_enc = vector_length_encoding(this, $mask);
23433     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23434                              $dst$$Register, mask_len, mask_size, vlen_enc);
23435   %}
23436   ins_pipe( pipe_slow );
23437 %}
23438 
23439 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23440   predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23441   match(Set dst (VectorMaskToLong mask));
23442   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23443   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23444   ins_encode %{
23445     int opcode = this->ideal_Opcode();
23446     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23447     int mask_len = Matcher::vector_length(this, $mask);
23448     int vlen_enc = vector_length_encoding(this, $mask);
23449     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23450                              $dst$$Register, mask_len, mbt, vlen_enc);
23451   %}
23452   ins_pipe( pipe_slow );
23453 %}
23454 
23455 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
23456   predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23457   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
23458   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
23459   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23460   ins_encode %{
23461     int opcode = this->ideal_Opcode();
23462     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23463     int mask_len = Matcher::vector_length(this, $mask);
23464     int vlen_enc = vector_length_encoding(this, $mask);
23465     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23466                              $dst$$Register, mask_len, mbt, vlen_enc);
23467   %}
23468   ins_pipe( pipe_slow );
23469 %}
23470 
23471 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23472   predicate(n->in(1)->bottom_type()->isa_pvectmask());
23473   match(Set dst (VectorMaskTrueCount mask));
23474   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23475   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
23476   ins_encode %{
23477     int opcode = this->ideal_Opcode();
23478     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23479     int mask_len = Matcher::vector_length(this, $mask);
23480     int mask_size = mask_len * type2aelembytes(mbt);
23481     int vlen_enc = vector_length_encoding(this, $mask);
23482     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23483                              $tmp$$Register, mask_len, mask_size, vlen_enc);
23484   %}
23485   ins_pipe( pipe_slow );
23486 %}
23487 
23488 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23489   predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23490   match(Set dst (VectorMaskTrueCount mask));
23491   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23492   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23493   ins_encode %{
23494     int opcode = this->ideal_Opcode();
23495     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23496     int mask_len = Matcher::vector_length(this, $mask);
23497     int vlen_enc = vector_length_encoding(this, $mask);
23498     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23499                              $tmp$$Register, mask_len, mbt, vlen_enc);
23500   %}
23501   ins_pipe( pipe_slow );
23502 %}
23503 
23504 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23505   predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23506   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
23507   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23508   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23509   ins_encode %{
23510     int opcode = this->ideal_Opcode();
23511     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23512     int mask_len = Matcher::vector_length(this, $mask);
23513     int vlen_enc = vector_length_encoding(this, $mask);
23514     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23515                              $tmp$$Register, mask_len, mbt, vlen_enc);
23516   %}
23517   ins_pipe( pipe_slow );
23518 %}
23519 
23520 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23521   predicate(n->in(1)->bottom_type()->isa_pvectmask());
23522   match(Set dst (VectorMaskFirstTrue mask));
23523   match(Set dst (VectorMaskLastTrue mask));
23524   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23525   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
23526   ins_encode %{
23527     int opcode = this->ideal_Opcode();
23528     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23529     int mask_len = Matcher::vector_length(this, $mask);
23530     int mask_size = mask_len * type2aelembytes(mbt);
23531     int vlen_enc = vector_length_encoding(this, $mask);
23532     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23533                              $tmp$$Register, mask_len, mask_size, vlen_enc);
23534   %}
23535   ins_pipe( pipe_slow );
23536 %}
23537 
23538 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23539   predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23540   match(Set dst (VectorMaskFirstTrue mask));
23541   match(Set dst (VectorMaskLastTrue mask));
23542   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23543   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23544   ins_encode %{
23545     int opcode = this->ideal_Opcode();
23546     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23547     int mask_len = Matcher::vector_length(this, $mask);
23548     int vlen_enc = vector_length_encoding(this, $mask);
23549     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23550                              $tmp$$Register, mask_len, mbt, vlen_enc);
23551   %}
23552   ins_pipe( pipe_slow );
23553 %}
23554 
23555 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23556   predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23557   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
23558   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
23559   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23560   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23561   ins_encode %{
23562     int opcode = this->ideal_Opcode();
23563     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23564     int mask_len = Matcher::vector_length(this, $mask);
23565     int vlen_enc = vector_length_encoding(this, $mask);
23566     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23567                              $tmp$$Register, mask_len, mbt, vlen_enc);
23568   %}
23569   ins_pipe( pipe_slow );
23570 %}
23571 
23572 // --------------------------------- Compress/Expand Operations ---------------------------
23573 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
23574   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
23575   match(Set dst (CompressV src mask));
23576   match(Set dst (ExpandV src mask));
23577   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
23578   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
23579   ins_encode %{
23580     int opcode = this->ideal_Opcode();
23581     int vlen_enc = vector_length_encoding(this);
23582     BasicType bt  = Matcher::vector_element_basic_type(this);
23583     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
23584                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
23585   %}
23586   ins_pipe( pipe_slow );
23587 %}
23588 
23589 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
23590   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
23591   match(Set dst (CompressV src mask));
23592   match(Set dst (ExpandV src mask));
23593   format %{ "vector_compress_expand $dst, $src, $mask" %}
23594   ins_encode %{
23595     int opcode = this->ideal_Opcode();
23596     int vector_len = vector_length_encoding(this);
23597     BasicType bt  = Matcher::vector_element_basic_type(this);
23598     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
23599   %}
23600   ins_pipe( pipe_slow );
23601 %}
23602 
23603 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
23604   match(Set dst (CompressM mask));
23605   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
23606   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
23607   ins_encode %{
23608     assert(this->in(1)->bottom_type()->isa_pvectmask(), "");
23609     int mask_len = Matcher::vector_length(this);
23610     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
23611   %}
23612   ins_pipe( pipe_slow );
23613 %}
23614 
23615 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
23616 
23617 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23618   predicate(!VM_Version::supports_gfni());
23619   match(Set dst (ReverseV src));
23620   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23621   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23622   ins_encode %{
23623     int vec_enc = vector_length_encoding(this);
23624     BasicType bt = Matcher::vector_element_basic_type(this);
23625     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23626                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23627   %}
23628   ins_pipe( pipe_slow );
23629 %}
23630 
23631 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
23632   predicate(VM_Version::supports_gfni());
23633   match(Set dst (ReverseV src));
23634   effect(TEMP dst, TEMP xtmp);
23635   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
23636   ins_encode %{
23637     int vec_enc = vector_length_encoding(this);
23638     BasicType bt  = Matcher::vector_element_basic_type(this);
23639     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
23640     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
23641                                $xtmp$$XMMRegister);
23642   %}
23643   ins_pipe( pipe_slow );
23644 %}
23645 
23646 instruct vreverse_byte_reg(vec dst, vec src) %{
23647   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
23648   match(Set dst (ReverseBytesV src));
23649   effect(TEMP dst);
23650   format %{ "vector_reverse_byte $dst, $src" %}
23651   ins_encode %{
23652     int vec_enc = vector_length_encoding(this);
23653     BasicType bt = Matcher::vector_element_basic_type(this);
23654     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
23655   %}
23656   ins_pipe( pipe_slow );
23657 %}
23658 
23659 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23660   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
23661   match(Set dst (ReverseBytesV src));
23662   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23663   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23664   ins_encode %{
23665     int vec_enc = vector_length_encoding(this);
23666     BasicType bt = Matcher::vector_element_basic_type(this);
23667     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23668                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23669   %}
23670   ins_pipe( pipe_slow );
23671 %}
23672 
23673 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
23674 
23675 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
23676   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23677                                               Matcher::vector_length_in_bytes(n->in(1))));
23678   match(Set dst (CountLeadingZerosV src));
23679   format %{ "vector_count_leading_zeros $dst, $src" %}
23680   ins_encode %{
23681      int vlen_enc = vector_length_encoding(this, $src);
23682      BasicType bt = Matcher::vector_element_basic_type(this, $src);
23683      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23684                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
23685   %}
23686   ins_pipe( pipe_slow );
23687 %}
23688 
23689 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
23690   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23691                                               Matcher::vector_length_in_bytes(n->in(1))));
23692   match(Set dst (CountLeadingZerosV src mask));
23693   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
23694   ins_encode %{
23695     int vlen_enc = vector_length_encoding(this, $src);
23696     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23697     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23698     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
23699                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
23700   %}
23701   ins_pipe( pipe_slow );
23702 %}
23703 
23704 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
23705   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23706             VM_Version::supports_avx512cd() &&
23707             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23708   match(Set dst (CountLeadingZerosV src));
23709   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
23710   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
23711   ins_encode %{
23712     int vlen_enc = vector_length_encoding(this, $src);
23713     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23714     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23715                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
23716   %}
23717   ins_pipe( pipe_slow );
23718 %}
23719 
23720 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
23721   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23722   match(Set dst (CountLeadingZerosV src));
23723   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23724   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
23725   ins_encode %{
23726     int vlen_enc = vector_length_encoding(this, $src);
23727     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23728     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23729                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
23730                                        $rtmp$$Register, true, vlen_enc);
23731   %}
23732   ins_pipe( pipe_slow );
23733 %}
23734 
23735 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
23736   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
23737             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23738   match(Set dst (CountLeadingZerosV src));
23739   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
23740   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
23741   ins_encode %{
23742     int vlen_enc = vector_length_encoding(this, $src);
23743     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23744     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23745                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
23746   %}
23747   ins_pipe( pipe_slow );
23748 %}
23749 
23750 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23751   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
23752             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23753   match(Set dst (CountLeadingZerosV src));
23754   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23755   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23756   ins_encode %{
23757     int vlen_enc = vector_length_encoding(this, $src);
23758     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23759     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23760                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23761   %}
23762   ins_pipe( pipe_slow );
23763 %}
23764 
23765 // ---------------------------------- Vector Masked Operations ------------------------------------
23766 
23767 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
23768   match(Set dst (AddVB (Binary dst src2) mask));
23769   match(Set dst (AddVS (Binary dst src2) mask));
23770   match(Set dst (AddVI (Binary dst src2) mask));
23771   match(Set dst (AddVL (Binary dst src2) mask));
23772   match(Set dst (AddVF (Binary dst src2) mask));
23773   match(Set dst (AddVD (Binary dst src2) mask));
23774   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
23775   ins_encode %{
23776     int vlen_enc = vector_length_encoding(this);
23777     BasicType bt = Matcher::vector_element_basic_type(this);
23778     int opc = this->ideal_Opcode();
23779     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23780                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23781   %}
23782   ins_pipe( pipe_slow );
23783 %}
23784 
23785 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
23786   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
23787   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
23788   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
23789   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
23790   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
23791   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
23792   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
23793   ins_encode %{
23794     int vlen_enc = vector_length_encoding(this);
23795     BasicType bt = Matcher::vector_element_basic_type(this);
23796     int opc = this->ideal_Opcode();
23797     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23798                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23799   %}
23800   ins_pipe( pipe_slow );
23801 %}
23802 
23803 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
23804   match(Set dst (XorV (Binary dst src2) mask));
23805   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
23806   ins_encode %{
23807     int vlen_enc = vector_length_encoding(this);
23808     BasicType bt = Matcher::vector_element_basic_type(this);
23809     int opc = this->ideal_Opcode();
23810     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23811                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23812   %}
23813   ins_pipe( pipe_slow );
23814 %}
23815 
23816 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
23817   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
23818   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
23819   ins_encode %{
23820     int vlen_enc = vector_length_encoding(this);
23821     BasicType bt = Matcher::vector_element_basic_type(this);
23822     int opc = this->ideal_Opcode();
23823     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23824                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23825   %}
23826   ins_pipe( pipe_slow );
23827 %}
23828 
23829 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
23830   match(Set dst (OrV (Binary dst src2) mask));
23831   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
23832   ins_encode %{
23833     int vlen_enc = vector_length_encoding(this);
23834     BasicType bt = Matcher::vector_element_basic_type(this);
23835     int opc = this->ideal_Opcode();
23836     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23837                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23838   %}
23839   ins_pipe( pipe_slow );
23840 %}
23841 
23842 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
23843   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
23844   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
23845   ins_encode %{
23846     int vlen_enc = vector_length_encoding(this);
23847     BasicType bt = Matcher::vector_element_basic_type(this);
23848     int opc = this->ideal_Opcode();
23849     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23850                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23851   %}
23852   ins_pipe( pipe_slow );
23853 %}
23854 
23855 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
23856   match(Set dst (AndV (Binary dst src2) mask));
23857   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
23858   ins_encode %{
23859     int vlen_enc = vector_length_encoding(this);
23860     BasicType bt = Matcher::vector_element_basic_type(this);
23861     int opc = this->ideal_Opcode();
23862     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23863                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23864   %}
23865   ins_pipe( pipe_slow );
23866 %}
23867 
23868 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
23869   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
23870   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
23871   ins_encode %{
23872     int vlen_enc = vector_length_encoding(this);
23873     BasicType bt = Matcher::vector_element_basic_type(this);
23874     int opc = this->ideal_Opcode();
23875     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23876                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23877   %}
23878   ins_pipe( pipe_slow );
23879 %}
23880 
23881 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
23882   match(Set dst (SubVB (Binary dst src2) mask));
23883   match(Set dst (SubVS (Binary dst src2) mask));
23884   match(Set dst (SubVI (Binary dst src2) mask));
23885   match(Set dst (SubVL (Binary dst src2) mask));
23886   match(Set dst (SubVF (Binary dst src2) mask));
23887   match(Set dst (SubVD (Binary dst src2) mask));
23888   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
23889   ins_encode %{
23890     int vlen_enc = vector_length_encoding(this);
23891     BasicType bt = Matcher::vector_element_basic_type(this);
23892     int opc = this->ideal_Opcode();
23893     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23894                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23895   %}
23896   ins_pipe( pipe_slow );
23897 %}
23898 
23899 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
23900   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
23901   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
23902   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
23903   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
23904   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
23905   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
23906   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
23907   ins_encode %{
23908     int vlen_enc = vector_length_encoding(this);
23909     BasicType bt = Matcher::vector_element_basic_type(this);
23910     int opc = this->ideal_Opcode();
23911     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23912                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23913   %}
23914   ins_pipe( pipe_slow );
23915 %}
23916 
23917 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
23918   match(Set dst (MulVS (Binary dst src2) mask));
23919   match(Set dst (MulVI (Binary dst src2) mask));
23920   match(Set dst (MulVL (Binary dst src2) mask));
23921   match(Set dst (MulVF (Binary dst src2) mask));
23922   match(Set dst (MulVD (Binary dst src2) mask));
23923   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
23924   ins_encode %{
23925     int vlen_enc = vector_length_encoding(this);
23926     BasicType bt = Matcher::vector_element_basic_type(this);
23927     int opc = this->ideal_Opcode();
23928     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23929                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23930   %}
23931   ins_pipe( pipe_slow );
23932 %}
23933 
23934 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
23935   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
23936   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
23937   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
23938   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
23939   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
23940   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
23941   ins_encode %{
23942     int vlen_enc = vector_length_encoding(this);
23943     BasicType bt = Matcher::vector_element_basic_type(this);
23944     int opc = this->ideal_Opcode();
23945     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23946                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23947   %}
23948   ins_pipe( pipe_slow );
23949 %}
23950 
23951 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
23952   match(Set dst (SqrtVF dst mask));
23953   match(Set dst (SqrtVD dst mask));
23954   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
23955   ins_encode %{
23956     int vlen_enc = vector_length_encoding(this);
23957     BasicType bt = Matcher::vector_element_basic_type(this);
23958     int opc = this->ideal_Opcode();
23959     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23960                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
23961   %}
23962   ins_pipe( pipe_slow );
23963 %}
23964 
23965 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
23966   match(Set dst (DivVF (Binary dst src2) mask));
23967   match(Set dst (DivVD (Binary dst src2) mask));
23968   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
23969   ins_encode %{
23970     int vlen_enc = vector_length_encoding(this);
23971     BasicType bt = Matcher::vector_element_basic_type(this);
23972     int opc = this->ideal_Opcode();
23973     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23974                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23975   %}
23976   ins_pipe( pipe_slow );
23977 %}
23978 
23979 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
23980   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
23981   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
23982   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
23983   ins_encode %{
23984     int vlen_enc = vector_length_encoding(this);
23985     BasicType bt = Matcher::vector_element_basic_type(this);
23986     int opc = this->ideal_Opcode();
23987     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23988                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23989   %}
23990   ins_pipe( pipe_slow );
23991 %}
23992 
23993 
23994 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
23995   match(Set dst (RotateLeftV (Binary dst shift) mask));
23996   match(Set dst (RotateRightV (Binary dst shift) mask));
23997   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
23998   ins_encode %{
23999     int vlen_enc = vector_length_encoding(this);
24000     BasicType bt = Matcher::vector_element_basic_type(this);
24001     int opc = this->ideal_Opcode();
24002     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24003                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24004   %}
24005   ins_pipe( pipe_slow );
24006 %}
24007 
24008 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24009   match(Set dst (RotateLeftV (Binary dst src2) mask));
24010   match(Set dst (RotateRightV (Binary dst src2) mask));
24011   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24012   ins_encode %{
24013     int vlen_enc = vector_length_encoding(this);
24014     BasicType bt = Matcher::vector_element_basic_type(this);
24015     int opc = this->ideal_Opcode();
24016     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24017                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24018   %}
24019   ins_pipe( pipe_slow );
24020 %}
24021 
24022 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24023   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24024   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24025   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24026   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24027   ins_encode %{
24028     int vlen_enc = vector_length_encoding(this);
24029     BasicType bt = Matcher::vector_element_basic_type(this);
24030     int opc = this->ideal_Opcode();
24031     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24032                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24033   %}
24034   ins_pipe( pipe_slow );
24035 %}
24036 
24037 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24038   predicate(!n->as_ShiftV()->is_var_shift());
24039   match(Set dst (LShiftVS (Binary dst src2) mask));
24040   match(Set dst (LShiftVI (Binary dst src2) mask));
24041   match(Set dst (LShiftVL (Binary dst src2) mask));
24042   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24043   ins_encode %{
24044     int vlen_enc = vector_length_encoding(this);
24045     BasicType bt = Matcher::vector_element_basic_type(this);
24046     int opc = this->ideal_Opcode();
24047     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24048                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24049   %}
24050   ins_pipe( pipe_slow );
24051 %}
24052 
24053 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24054   predicate(n->as_ShiftV()->is_var_shift());
24055   match(Set dst (LShiftVS (Binary dst src2) mask));
24056   match(Set dst (LShiftVI (Binary dst src2) mask));
24057   match(Set dst (LShiftVL (Binary dst src2) mask));
24058   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24059   ins_encode %{
24060     int vlen_enc = vector_length_encoding(this);
24061     BasicType bt = Matcher::vector_element_basic_type(this);
24062     int opc = this->ideal_Opcode();
24063     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24064                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24065   %}
24066   ins_pipe( pipe_slow );
24067 %}
24068 
24069 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24070   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24071   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24072   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24073   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24074   ins_encode %{
24075     int vlen_enc = vector_length_encoding(this);
24076     BasicType bt = Matcher::vector_element_basic_type(this);
24077     int opc = this->ideal_Opcode();
24078     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24079                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24080   %}
24081   ins_pipe( pipe_slow );
24082 %}
24083 
24084 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24085   predicate(!n->as_ShiftV()->is_var_shift());
24086   match(Set dst (RShiftVS (Binary dst src2) mask));
24087   match(Set dst (RShiftVI (Binary dst src2) mask));
24088   match(Set dst (RShiftVL (Binary dst src2) mask));
24089   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24090   ins_encode %{
24091     int vlen_enc = vector_length_encoding(this);
24092     BasicType bt = Matcher::vector_element_basic_type(this);
24093     int opc = this->ideal_Opcode();
24094     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24095                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24096   %}
24097   ins_pipe( pipe_slow );
24098 %}
24099 
24100 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24101   predicate(n->as_ShiftV()->is_var_shift());
24102   match(Set dst (RShiftVS (Binary dst src2) mask));
24103   match(Set dst (RShiftVI (Binary dst src2) mask));
24104   match(Set dst (RShiftVL (Binary dst src2) mask));
24105   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24106   ins_encode %{
24107     int vlen_enc = vector_length_encoding(this);
24108     BasicType bt = Matcher::vector_element_basic_type(this);
24109     int opc = this->ideal_Opcode();
24110     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24111                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24112   %}
24113   ins_pipe( pipe_slow );
24114 %}
24115 
24116 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24117   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24118   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24119   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24120   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24121   ins_encode %{
24122     int vlen_enc = vector_length_encoding(this);
24123     BasicType bt = Matcher::vector_element_basic_type(this);
24124     int opc = this->ideal_Opcode();
24125     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24126                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24127   %}
24128   ins_pipe( pipe_slow );
24129 %}
24130 
24131 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24132   predicate(!n->as_ShiftV()->is_var_shift());
24133   match(Set dst (URShiftVS (Binary dst src2) mask));
24134   match(Set dst (URShiftVI (Binary dst src2) mask));
24135   match(Set dst (URShiftVL (Binary dst src2) mask));
24136   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24137   ins_encode %{
24138     int vlen_enc = vector_length_encoding(this);
24139     BasicType bt = Matcher::vector_element_basic_type(this);
24140     int opc = this->ideal_Opcode();
24141     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24142                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24143   %}
24144   ins_pipe( pipe_slow );
24145 %}
24146 
24147 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24148   predicate(n->as_ShiftV()->is_var_shift());
24149   match(Set dst (URShiftVS (Binary dst src2) mask));
24150   match(Set dst (URShiftVI (Binary dst src2) mask));
24151   match(Set dst (URShiftVL (Binary dst src2) mask));
24152   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24153   ins_encode %{
24154     int vlen_enc = vector_length_encoding(this);
24155     BasicType bt = Matcher::vector_element_basic_type(this);
24156     int opc = this->ideal_Opcode();
24157     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24158                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24159   %}
24160   ins_pipe( pipe_slow );
24161 %}
24162 
24163 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24164   match(Set dst (MaxV (Binary dst src2) mask));
24165   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24166   ins_encode %{
24167     int vlen_enc = vector_length_encoding(this);
24168     BasicType bt = Matcher::vector_element_basic_type(this);
24169     int opc = this->ideal_Opcode();
24170     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24171                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24172   %}
24173   ins_pipe( pipe_slow );
24174 %}
24175 
24176 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24177   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24178   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24179   ins_encode %{
24180     int vlen_enc = vector_length_encoding(this);
24181     BasicType bt = Matcher::vector_element_basic_type(this);
24182     int opc = this->ideal_Opcode();
24183     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24184                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24185   %}
24186   ins_pipe( pipe_slow );
24187 %}
24188 
24189 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24190   match(Set dst (MinV (Binary dst src2) mask));
24191   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24192   ins_encode %{
24193     int vlen_enc = vector_length_encoding(this);
24194     BasicType bt = Matcher::vector_element_basic_type(this);
24195     int opc = this->ideal_Opcode();
24196     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24197                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24198   %}
24199   ins_pipe( pipe_slow );
24200 %}
24201 
24202 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24203   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24204   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24205   ins_encode %{
24206     int vlen_enc = vector_length_encoding(this);
24207     BasicType bt = Matcher::vector_element_basic_type(this);
24208     int opc = this->ideal_Opcode();
24209     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24210                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24211   %}
24212   ins_pipe( pipe_slow );
24213 %}
24214 
24215 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24216   match(Set dst (VectorRearrange (Binary dst src2) mask));
24217   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24218   ins_encode %{
24219     int vlen_enc = vector_length_encoding(this);
24220     BasicType bt = Matcher::vector_element_basic_type(this);
24221     int opc = this->ideal_Opcode();
24222     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24223                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24224   %}
24225   ins_pipe( pipe_slow );
24226 %}
24227 
24228 instruct vabs_masked(vec dst, kReg mask) %{
24229   match(Set dst (AbsVB dst mask));
24230   match(Set dst (AbsVS dst mask));
24231   match(Set dst (AbsVI dst mask));
24232   match(Set dst (AbsVL dst mask));
24233   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24234   ins_encode %{
24235     int vlen_enc = vector_length_encoding(this);
24236     BasicType bt = Matcher::vector_element_basic_type(this);
24237     int opc = this->ideal_Opcode();
24238     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24239                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24240   %}
24241   ins_pipe( pipe_slow );
24242 %}
24243 
24244 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24245   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24246   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24247   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24248   ins_encode %{
24249     assert(UseFMA, "Needs FMA instructions support.");
24250     int vlen_enc = vector_length_encoding(this);
24251     BasicType bt = Matcher::vector_element_basic_type(this);
24252     int opc = this->ideal_Opcode();
24253     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24254                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24255   %}
24256   ins_pipe( pipe_slow );
24257 %}
24258 
24259 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24260   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24261   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24262   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24263   ins_encode %{
24264     assert(UseFMA, "Needs FMA instructions support.");
24265     int vlen_enc = vector_length_encoding(this);
24266     BasicType bt = Matcher::vector_element_basic_type(this);
24267     int opc = this->ideal_Opcode();
24268     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24269                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24270   %}
24271   ins_pipe( pipe_slow );
24272 %}
24273 
24274 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24275   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24276   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24277   ins_encode %{
24278     assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
24279     int vlen_enc = vector_length_encoding(this, $src1);
24280     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24281 
24282     // Comparison i
24283     switch (src1_elem_bt) {
24284       case T_BYTE: {
24285         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24286         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24287         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24288         break;
24289       }
24290       case T_SHORT: {
24291         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24292         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24293         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24294         break;
24295       }
24296       case T_INT: {
24297         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24298         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24299         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24300         break;
24301       }
24302       case T_LONG: {
24303         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24304         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24305         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24306         break;
24307       }
24308       case T_FLOAT: {
24309         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24310         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24311         break;
24312       }
24313       case T_DOUBLE: {
24314         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24315         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24316         break;
24317       }
24318       default: assert(false, "%s", type2name(src1_elem_bt)); break;
24319     }
24320   %}
24321   ins_pipe( pipe_slow );
24322 %}
24323 
24324 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24325   predicate(Matcher::vector_length(n) <= 32);
24326   match(Set dst (MaskAll src));
24327   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24328   ins_encode %{
24329     int mask_len = Matcher::vector_length(this);
24330     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24331   %}
24332   ins_pipe( pipe_slow );
24333 %}
24334 
24335 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24336   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24337   match(Set dst (XorVMask src (MaskAll cnt)));
24338   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24339   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24340   ins_encode %{
24341     uint masklen = Matcher::vector_length(this);
24342     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24343   %}
24344   ins_pipe( pipe_slow );
24345 %}
24346 
24347 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24348   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24349             (Matcher::vector_length(n) == 16) ||
24350             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24351   match(Set dst (XorVMask src (MaskAll cnt)));
24352   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24353   ins_encode %{
24354     uint masklen = Matcher::vector_length(this);
24355     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24356   %}
24357   ins_pipe( pipe_slow );
24358 %}
24359 
24360 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2) %{
24361   predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) <= 8);
24362   match(Set dst (VectorLongToMask src));
24363   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2);
24364   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2" %}
24365   ins_encode %{
24366     int mask_len = Matcher::vector_length(this);
24367     int vec_enc  = vector_length_encoding(mask_len);
24368     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24369                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24370   %}
24371   ins_pipe( pipe_slow );
24372 %}
24373 
24374 
24375 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24376   predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) > 8);
24377   match(Set dst (VectorLongToMask src));
24378   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24379   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24380   ins_encode %{
24381     int mask_len = Matcher::vector_length(this);
24382     assert(mask_len <= 32, "invalid mask length");
24383     int vec_enc  = vector_length_encoding(mask_len);
24384     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24385                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24386   %}
24387   ins_pipe( pipe_slow );
24388 %}
24389 
24390 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24391   predicate(n->bottom_type()->isa_pvectmask());
24392   match(Set dst (VectorLongToMask src));
24393   format %{ "long_to_mask_evex $dst, $src\t!" %}
24394   ins_encode %{
24395     __ kmov($dst$$KRegister, $src$$Register);
24396   %}
24397   ins_pipe( pipe_slow );
24398 %}
24399 
24400 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24401   match(Set dst (AndVMask src1 src2));
24402   match(Set dst (OrVMask src1 src2));
24403   match(Set dst (XorVMask src1 src2));
24404   effect(TEMP kscratch);
24405   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24406   ins_encode %{
24407     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24408     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24409     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24410     uint masklen = Matcher::vector_length(this);
24411     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24412     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24413   %}
24414   ins_pipe( pipe_slow );
24415 %}
24416 
24417 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24418   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24419   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24420   ins_encode %{
24421     int vlen_enc = vector_length_encoding(this);
24422     BasicType bt = Matcher::vector_element_basic_type(this);
24423     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24424                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24425   %}
24426   ins_pipe( pipe_slow );
24427 %}
24428 
24429 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24430   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24431   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24432   ins_encode %{
24433     int vlen_enc = vector_length_encoding(this);
24434     BasicType bt = Matcher::vector_element_basic_type(this);
24435     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24436                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24437   %}
24438   ins_pipe( pipe_slow );
24439 %}
24440 
24441 instruct castMM(kReg dst)
24442 %{
24443   match(Set dst (CastVV dst));
24444 
24445   size(0);
24446   format %{ "# castVV of $dst" %}
24447   ins_encode(/* empty encoding */);
24448   ins_cost(0);
24449   ins_pipe(empty);
24450 %}
24451 
24452 instruct castVV(vec dst)
24453 %{
24454   match(Set dst (CastVV dst));
24455 
24456   size(0);
24457   format %{ "# castVV of $dst" %}
24458   ins_encode(/* empty encoding */);
24459   ins_cost(0);
24460   ins_pipe(empty);
24461 %}
24462 
24463 instruct castVVLeg(legVec dst)
24464 %{
24465   match(Set dst (CastVV dst));
24466 
24467   size(0);
24468   format %{ "# castVV of $dst" %}
24469   ins_encode(/* empty encoding */);
24470   ins_cost(0);
24471   ins_pipe(empty);
24472 %}
24473 
24474 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
24475 %{
24476   match(Set dst (IsInfiniteF src));
24477   effect(TEMP ktmp, KILL cr);
24478   format %{ "float_class_check $dst, $src" %}
24479   ins_encode %{
24480     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24481     __ kmovbl($dst$$Register, $ktmp$$KRegister);
24482   %}
24483   ins_pipe(pipe_slow);
24484 %}
24485 
24486 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
24487 %{
24488   match(Set dst (IsInfiniteD src));
24489   effect(TEMP ktmp, KILL cr);
24490   format %{ "double_class_check $dst, $src" %}
24491   ins_encode %{
24492     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24493     __ kmovbl($dst$$Register, $ktmp$$KRegister);
24494   %}
24495   ins_pipe(pipe_slow);
24496 %}
24497 
24498 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
24499 %{
24500   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24501             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24502   match(Set dst (SaturatingAddV src1 src2));
24503   match(Set dst (SaturatingSubV src1 src2));
24504   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24505   ins_encode %{
24506     int vlen_enc = vector_length_encoding(this);
24507     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24508     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24509                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24510   %}
24511   ins_pipe(pipe_slow);
24512 %}
24513 
24514 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
24515 %{
24516   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24517             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24518   match(Set dst (SaturatingAddV src1 src2));
24519   match(Set dst (SaturatingSubV src1 src2));
24520   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24521   ins_encode %{
24522     int vlen_enc = vector_length_encoding(this);
24523     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24524     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24525                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24526   %}
24527   ins_pipe(pipe_slow);
24528 %}
24529 
24530 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
24531 %{
24532   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24533             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24534             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24535   match(Set dst (SaturatingAddV src1 src2));
24536   match(Set dst (SaturatingSubV src1 src2));
24537   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
24538   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
24539   ins_encode %{
24540     int vlen_enc = vector_length_encoding(this);
24541     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24542     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24543                                         $src1$$XMMRegister, $src2$$XMMRegister,
24544                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24545                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
24546   %}
24547   ins_pipe(pipe_slow);
24548 %}
24549 
24550 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
24551 %{
24552   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24553             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24554             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24555   match(Set dst (SaturatingAddV src1 src2));
24556   match(Set dst (SaturatingSubV src1 src2));
24557   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
24558   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
24559   ins_encode %{
24560     int vlen_enc = vector_length_encoding(this);
24561     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24562     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24563                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24564                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
24565   %}
24566   ins_pipe(pipe_slow);
24567 %}
24568 
24569 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
24570 %{
24571   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24572             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24573             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24574   match(Set dst (SaturatingAddV src1 src2));
24575   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
24576   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
24577   ins_encode %{
24578     int vlen_enc = vector_length_encoding(this);
24579     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24580     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24581                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24582   %}
24583   ins_pipe(pipe_slow);
24584 %}
24585 
24586 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
24587 %{
24588   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24589             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24590             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24591   match(Set dst (SaturatingAddV src1 src2));
24592   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24593   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24594   ins_encode %{
24595     int vlen_enc = vector_length_encoding(this);
24596     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24597     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24598                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
24599   %}
24600   ins_pipe(pipe_slow);
24601 %}
24602 
24603 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
24604 %{
24605   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24606             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24607             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24608   match(Set dst (SaturatingSubV src1 src2));
24609   effect(TEMP ktmp);
24610   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
24611   ins_encode %{
24612     int vlen_enc = vector_length_encoding(this);
24613     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24614     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24615                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24616   %}
24617   ins_pipe(pipe_slow);
24618 %}
24619 
24620 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
24621 %{
24622   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24623             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24624             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24625   match(Set dst (SaturatingSubV src1 src2));
24626   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24627   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
24628   ins_encode %{
24629     int vlen_enc = vector_length_encoding(this);
24630     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24631     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24632                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
24633   %}
24634   ins_pipe(pipe_slow);
24635 %}
24636 
24637 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
24638 %{
24639   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24640             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24641   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24642   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24643   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24644   ins_encode %{
24645     int vlen_enc = vector_length_encoding(this);
24646     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24647     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24648                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
24649   %}
24650   ins_pipe(pipe_slow);
24651 %}
24652 
24653 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
24654 %{
24655   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24656             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24657   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24658   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24659   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24660   ins_encode %{
24661     int vlen_enc = vector_length_encoding(this);
24662     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24663     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24664                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
24665   %}
24666   ins_pipe(pipe_slow);
24667 %}
24668 
24669 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
24670   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24671             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24672   match(Set dst (SaturatingAddV (Binary dst src) mask));
24673   match(Set dst (SaturatingSubV (Binary dst src) mask));
24674   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
24675   ins_encode %{
24676     int vlen_enc = vector_length_encoding(this);
24677     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24678     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24679                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
24680   %}
24681   ins_pipe( pipe_slow );
24682 %}
24683 
24684 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
24685   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24686             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24687   match(Set dst (SaturatingAddV (Binary dst src) mask));
24688   match(Set dst (SaturatingSubV (Binary dst src) mask));
24689   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
24690   ins_encode %{
24691     int vlen_enc = vector_length_encoding(this);
24692     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24693     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24694                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
24695   %}
24696   ins_pipe( pipe_slow );
24697 %}
24698 
24699 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
24700   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24701             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24702   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
24703   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
24704   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
24705   ins_encode %{
24706     int vlen_enc = vector_length_encoding(this);
24707     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24708     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24709                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
24710   %}
24711   ins_pipe( pipe_slow );
24712 %}
24713 
24714 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
24715   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24716             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24717   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
24718   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
24719   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
24720   ins_encode %{
24721     int vlen_enc = vector_length_encoding(this);
24722     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24723     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24724                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
24725   %}
24726   ins_pipe( pipe_slow );
24727 %}
24728 
24729 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
24730 %{
24731   match(Set index (SelectFromTwoVector (Binary index src1) src2));
24732   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
24733   ins_encode %{
24734     int vlen_enc = vector_length_encoding(this);
24735     BasicType bt = Matcher::vector_element_basic_type(this);
24736     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
24737   %}
24738   ins_pipe(pipe_slow);
24739 %}
24740 
24741 instruct reinterpretS2HF(regF dst, rRegI src)
24742 %{
24743   match(Set dst (ReinterpretS2HF src));
24744   format %{ "evmovw $dst, $src" %}
24745   ins_encode %{
24746     __ evmovw($dst$$XMMRegister, $src$$Register);
24747   %}
24748   ins_pipe(pipe_slow);
24749 %}
24750 
24751 instruct reinterpretHF2S(rRegI dst, regF src)
24752 %{
24753   match(Set dst (ReinterpretHF2S src));
24754   format %{ "evmovw $dst, $src" %}
24755   ins_encode %{
24756     __ evmovw($dst$$Register, $src$$XMMRegister);
24757     __ narrow_subword_type($dst$$Register, T_SHORT);
24758   %}
24759   ins_pipe(pipe_slow);
24760 %}
24761 
24762 instruct convF2HFAndS2HF(regF dst, regF src)
24763 %{
24764   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
24765   format %{ "convF2HFAndS2HF $dst, $src" %}
24766   ins_encode %{
24767     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
24768   %}
24769   ins_pipe(pipe_slow);
24770 %}
24771 
24772 instruct convHF2SAndHF2F(regF dst, regF src)
24773 %{
24774   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
24775   format %{ "convHF2SAndHF2F $dst, $src" %}
24776   ins_encode %{
24777     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
24778   %}
24779   ins_pipe(pipe_slow);
24780 %}
24781 
24782 instruct scalar_sqrt_HF_reg(regF dst, regF src)
24783 %{
24784   match(Set dst (SqrtHF src));
24785   format %{ "scalar_sqrt_fp16 $dst, $src" %}
24786   ins_encode %{
24787     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
24788   %}
24789   ins_pipe(pipe_slow);
24790 %}
24791 
24792 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
24793 %{
24794   match(Set dst (AddHF src1 src2));
24795   match(Set dst (DivHF src1 src2));
24796   match(Set dst (MulHF src1 src2));
24797   match(Set dst (SubHF src1 src2));
24798   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
24799   ins_encode %{
24800     int opcode = this->ideal_Opcode();
24801     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
24802   %}
24803   ins_pipe(pipe_slow);
24804 %}
24805 
24806 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
24807 %{
24808   predicate(VM_Version::supports_avx10_2());
24809   match(Set dst (MaxHF src1 src2));
24810   match(Set dst (MinHF src1 src2));
24811 
24812   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
24813   ins_encode %{
24814     int opcode = this->ideal_Opcode();
24815     __ sminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, k0);
24816   %}
24817   ins_pipe( pipe_slow );
24818 %}
24819 
24820 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
24821 %{
24822   predicate(!VM_Version::supports_avx10_2());
24823   match(Set dst (MaxHF src1 src2));
24824   match(Set dst (MinHF src1 src2));
24825   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
24826 
24827   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
24828   ins_encode %{
24829     int opcode = this->ideal_Opcode();
24830     __ sminmax_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
24831                     $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
24832   %}
24833   ins_pipe( pipe_slow );
24834 %}
24835 
24836 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
24837 %{
24838   match(Set dst (FmaHF  src2 (Binary dst src1)));
24839   effect(DEF dst);
24840   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
24841   ins_encode %{
24842     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
24843   %}
24844   ins_pipe( pipe_slow );
24845 %}
24846 
24847 
24848 instruct vector_sqrt_HF_reg(vec dst, vec src)
24849 %{
24850   match(Set dst (SqrtVHF src));
24851   format %{ "vector_sqrt_fp16 $dst, $src" %}
24852   ins_encode %{
24853     int vlen_enc = vector_length_encoding(this);
24854     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24855   %}
24856   ins_pipe(pipe_slow);
24857 %}
24858 
24859 instruct vector_sqrt_HF_mem(vec dst, memory src)
24860 %{
24861   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
24862   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
24863   ins_encode %{
24864     int vlen_enc = vector_length_encoding(this);
24865     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
24866   %}
24867   ins_pipe(pipe_slow);
24868 %}
24869 
24870 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
24871 %{
24872   match(Set dst (AddVHF src1 src2));
24873   match(Set dst (DivVHF src1 src2));
24874   match(Set dst (MulVHF src1 src2));
24875   match(Set dst (SubVHF src1 src2));
24876   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
24877   ins_encode %{
24878     int vlen_enc = vector_length_encoding(this);
24879     int opcode = this->ideal_Opcode();
24880     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
24881   %}
24882   ins_pipe(pipe_slow);
24883 %}
24884 
24885 
24886 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
24887 %{
24888   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
24889   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
24890   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
24891   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
24892   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
24893   ins_encode %{
24894     int vlen_enc = vector_length_encoding(this);
24895     int opcode = this->ideal_Opcode();
24896     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
24897   %}
24898   ins_pipe(pipe_slow);
24899 %}
24900 
24901 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
24902 %{
24903   match(Set dst (FmaVHF src2 (Binary dst src1)));
24904   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
24905   ins_encode %{
24906     int vlen_enc = vector_length_encoding(this);
24907     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
24908   %}
24909   ins_pipe( pipe_slow );
24910 %}
24911 
24912 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
24913 %{
24914   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
24915   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
24916   ins_encode %{
24917     int vlen_enc = vector_length_encoding(this);
24918     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
24919   %}
24920   ins_pipe( pipe_slow );
24921 %}
24922 
24923 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
24924 %{
24925   predicate(VM_Version::supports_avx10_2());
24926   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
24927   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
24928   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
24929   ins_encode %{
24930     int vlen_enc = vector_length_encoding(this);
24931     int opcode = this->ideal_Opcode();
24932     __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address,
24933                             k0, vlen_enc);
24934   %}
24935   ins_pipe( pipe_slow );
24936 %}
24937 
24938 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
24939 %{
24940   predicate(VM_Version::supports_avx10_2());
24941   match(Set dst (MinVHF src1 src2));
24942   match(Set dst (MaxVHF src1 src2));
24943   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
24944   ins_encode %{
24945     int vlen_enc = vector_length_encoding(this);
24946     int opcode = this->ideal_Opcode();
24947     __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24948                             k0, vlen_enc);
24949   %}
24950   ins_pipe( pipe_slow );
24951 %}
24952 
24953 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
24954 %{
24955   predicate(!VM_Version::supports_avx10_2());
24956   match(Set dst (MinVHF src1 src2));
24957   match(Set dst (MaxVHF src1 src2));
24958   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
24959   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
24960   ins_encode %{
24961     int vlen_enc = vector_length_encoding(this);
24962     int opcode = this->ideal_Opcode();
24963     __ vminmax_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
24964                     $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
24965   %}
24966   ins_pipe( pipe_slow );
24967 %}
24968 
24969 //----------PEEPHOLE RULES-----------------------------------------------------
24970 // These must follow all instruction definitions as they use the names
24971 // defined in the instructions definitions.
24972 //
24973 // peeppredicate ( rule_predicate );
24974 // // the predicate unless which the peephole rule will be ignored
24975 //
24976 // peepmatch ( root_instr_name [preceding_instruction]* );
24977 //
24978 // peepprocedure ( procedure_name );
24979 // // provide a procedure name to perform the optimization, the procedure should
24980 // // reside in the architecture dependent peephole file, the method has the
24981 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
24982 // // with the arguments being the basic block, the current node index inside the
24983 // // block, the register allocator, the functions upon invoked return a new node
24984 // // defined in peepreplace, and the rules of the nodes appearing in the
24985 // // corresponding peepmatch, the function return true if successful, else
24986 // // return false
24987 //
24988 // peepconstraint %{
24989 // (instruction_number.operand_name relational_op instruction_number.operand_name
24990 //  [, ...] );
24991 // // instruction numbers are zero-based using left to right order in peepmatch
24992 //
24993 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
24994 // // provide an instruction_number.operand_name for each operand that appears
24995 // // in the replacement instruction's match rule
24996 //
24997 // ---------VM FLAGS---------------------------------------------------------
24998 //
24999 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25000 //
25001 // Each peephole rule is given an identifying number starting with zero and
25002 // increasing by one in the order seen by the parser.  An individual peephole
25003 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25004 // on the command-line.
25005 //
25006 // ---------CURRENT LIMITATIONS----------------------------------------------
25007 //
25008 // Only transformations inside a basic block (do we need more for peephole)
25009 //
25010 // ---------EXAMPLE----------------------------------------------------------
25011 //
25012 // // pertinent parts of existing instructions in architecture description
25013 // instruct movI(rRegI dst, rRegI src)
25014 // %{
25015 //   match(Set dst (CopyI src));
25016 // %}
25017 //
25018 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25019 // %{
25020 //   match(Set dst (AddI dst src));
25021 //   effect(KILL cr);
25022 // %}
25023 //
25024 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25025 // %{
25026 //   match(Set dst (AddI dst src));
25027 // %}
25028 //
25029 // 1. Simple replacement
25030 // - Only match adjacent instructions in same basic block
25031 // - Only equality constraints
25032 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25033 // - Only one replacement instruction
25034 //
25035 // // Change (inc mov) to lea
25036 // peephole %{
25037 //   // lea should only be emitted when beneficial
25038 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25039 //   // increment preceded by register-register move
25040 //   peepmatch ( incI_rReg movI );
25041 //   // require that the destination register of the increment
25042 //   // match the destination register of the move
25043 //   peepconstraint ( 0.dst == 1.dst );
25044 //   // construct a replacement instruction that sets
25045 //   // the destination to ( move's source register + one )
25046 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25047 // %}
25048 //
25049 // 2. Procedural replacement
25050 // - More flexible finding relevent nodes
25051 // - More flexible constraints
25052 // - More flexible transformations
25053 // - May utilise architecture-dependent API more effectively
25054 // - Currently only one replacement instruction due to adlc parsing capabilities
25055 //
25056 // // Change (inc mov) to lea
25057 // peephole %{
25058 //   // lea should only be emitted when beneficial
25059 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25060 //   // the rule numbers of these nodes inside are passed into the function below
25061 //   peepmatch ( incI_rReg movI );
25062 //   // the method that takes the responsibility of transformation
25063 //   peepprocedure ( inc_mov_to_lea );
25064 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25065 //   // node is passed into the function above
25066 //   peepreplace ( leaI_rReg_immI() );
25067 // %}
25068 
25069 // These instructions is not matched by the matcher but used by the peephole
25070 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25071 %{
25072   predicate(false);
25073   match(Set dst (AddI src1 src2));
25074   format %{ "leal    $dst, [$src1 + $src2]" %}
25075   ins_encode %{
25076     Register dst = $dst$$Register;
25077     Register src1 = $src1$$Register;
25078     Register src2 = $src2$$Register;
25079     if (src1 != rbp && src1 != r13) {
25080       __ leal(dst, Address(src1, src2, Address::times_1));
25081     } else {
25082       assert(src2 != rbp && src2 != r13, "");
25083       __ leal(dst, Address(src2, src1, Address::times_1));
25084     }
25085   %}
25086   ins_pipe(ialu_reg_reg);
25087 %}
25088 
25089 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25090 %{
25091   predicate(false);
25092   match(Set dst (AddI src1 src2));
25093   format %{ "leal    $dst, [$src1 + $src2]" %}
25094   ins_encode %{
25095     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25096   %}
25097   ins_pipe(ialu_reg_reg);
25098 %}
25099 
25100 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25101 %{
25102   predicate(false);
25103   match(Set dst (LShiftI src shift));
25104   format %{ "leal    $dst, [$src << $shift]" %}
25105   ins_encode %{
25106     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25107     Register src = $src$$Register;
25108     if (scale == Address::times_2 && src != rbp && src != r13) {
25109       __ leal($dst$$Register, Address(src, src, Address::times_1));
25110     } else {
25111       __ leal($dst$$Register, Address(noreg, src, scale));
25112     }
25113   %}
25114   ins_pipe(ialu_reg_reg);
25115 %}
25116 
25117 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25118 %{
25119   predicate(false);
25120   match(Set dst (AddL src1 src2));
25121   format %{ "leaq    $dst, [$src1 + $src2]" %}
25122   ins_encode %{
25123     Register dst = $dst$$Register;
25124     Register src1 = $src1$$Register;
25125     Register src2 = $src2$$Register;
25126     if (src1 != rbp && src1 != r13) {
25127       __ leaq(dst, Address(src1, src2, Address::times_1));
25128     } else {
25129       assert(src2 != rbp && src2 != r13, "");
25130       __ leaq(dst, Address(src2, src1, Address::times_1));
25131     }
25132   %}
25133   ins_pipe(ialu_reg_reg);
25134 %}
25135 
25136 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25137 %{
25138   predicate(false);
25139   match(Set dst (AddL src1 src2));
25140   format %{ "leaq    $dst, [$src1 + $src2]" %}
25141   ins_encode %{
25142     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25143   %}
25144   ins_pipe(ialu_reg_reg);
25145 %}
25146 
25147 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25148 %{
25149   predicate(false);
25150   match(Set dst (LShiftL src shift));
25151   format %{ "leaq    $dst, [$src << $shift]" %}
25152   ins_encode %{
25153     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25154     Register src = $src$$Register;
25155     if (scale == Address::times_2 && src != rbp && src != r13) {
25156       __ leaq($dst$$Register, Address(src, src, Address::times_1));
25157     } else {
25158       __ leaq($dst$$Register, Address(noreg, src, scale));
25159     }
25160   %}
25161   ins_pipe(ialu_reg_reg);
25162 %}
25163 
25164 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25165 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25166 // processors with at least partial ALU support for lea
25167 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25168 // beneficial for processors with full ALU support
25169 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25170 
25171 peephole
25172 %{
25173   peeppredicate(VM_Version::supports_fast_2op_lea());
25174   peepmatch (addI_rReg);
25175   peepprocedure (lea_coalesce_reg);
25176   peepreplace (leaI_rReg_rReg_peep());
25177 %}
25178 
25179 peephole
25180 %{
25181   peeppredicate(VM_Version::supports_fast_2op_lea());
25182   peepmatch (addI_rReg_imm);
25183   peepprocedure (lea_coalesce_imm);
25184   peepreplace (leaI_rReg_immI_peep());
25185 %}
25186 
25187 peephole
25188 %{
25189   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25190                 VM_Version::is_intel_cascade_lake());
25191   peepmatch (incI_rReg);
25192   peepprocedure (lea_coalesce_imm);
25193   peepreplace (leaI_rReg_immI_peep());
25194 %}
25195 
25196 peephole
25197 %{
25198   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25199                 VM_Version::is_intel_cascade_lake());
25200   peepmatch (decI_rReg);
25201   peepprocedure (lea_coalesce_imm);
25202   peepreplace (leaI_rReg_immI_peep());
25203 %}
25204 
25205 peephole
25206 %{
25207   peeppredicate(VM_Version::supports_fast_2op_lea());
25208   peepmatch (salI_rReg_immI2);
25209   peepprocedure (lea_coalesce_imm);
25210   peepreplace (leaI_rReg_immI2_peep());
25211 %}
25212 
25213 peephole
25214 %{
25215   peeppredicate(VM_Version::supports_fast_2op_lea());
25216   peepmatch (addL_rReg);
25217   peepprocedure (lea_coalesce_reg);
25218   peepreplace (leaL_rReg_rReg_peep());
25219 %}
25220 
25221 peephole
25222 %{
25223   peeppredicate(VM_Version::supports_fast_2op_lea());
25224   peepmatch (addL_rReg_imm);
25225   peepprocedure (lea_coalesce_imm);
25226   peepreplace (leaL_rReg_immL32_peep());
25227 %}
25228 
25229 peephole
25230 %{
25231   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25232                 VM_Version::is_intel_cascade_lake());
25233   peepmatch (incL_rReg);
25234   peepprocedure (lea_coalesce_imm);
25235   peepreplace (leaL_rReg_immL32_peep());
25236 %}
25237 
25238 peephole
25239 %{
25240   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25241                 VM_Version::is_intel_cascade_lake());
25242   peepmatch (decL_rReg);
25243   peepprocedure (lea_coalesce_imm);
25244   peepreplace (leaL_rReg_immL32_peep());
25245 %}
25246 
25247 peephole
25248 %{
25249   peeppredicate(VM_Version::supports_fast_2op_lea());
25250   peepmatch (salL_rReg_immI2);
25251   peepprocedure (lea_coalesce_imm);
25252   peepreplace (leaL_rReg_immI2_peep());
25253 %}
25254 
25255 peephole
25256 %{
25257   peepmatch (leaPCompressedOopOffset);
25258   peepprocedure (lea_remove_redundant);
25259 %}
25260 
25261 peephole
25262 %{
25263   peepmatch (leaP8Narrow);
25264   peepprocedure (lea_remove_redundant);
25265 %}
25266 
25267 peephole
25268 %{
25269   peepmatch (leaP32Narrow);
25270   peepprocedure (lea_remove_redundant);
25271 %}
25272 
25273 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25274 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25275 
25276 //int variant
25277 peephole
25278 %{
25279   peepmatch (testI_reg);
25280   peepprocedure (test_may_remove);
25281 %}
25282 
25283 //long variant
25284 peephole
25285 %{
25286   peepmatch (testL_reg);
25287   peepprocedure (test_may_remove);
25288 %}
25289 
25290 
25291 //----------SMARTSPILL RULES---------------------------------------------------
25292 // These must follow all instruction definitions as they use the names
25293 // defined in the instructions definitions.