1 //
    2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }
 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;
 1678 }
 1679 
 1680 // This could be in MacroAssembler but it's fairly C2 specific
 1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1682   Label exit;
 1683   __ jccb(Assembler::noParity, exit);
 1684   __ pushf();
 1685   //
 1686   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1687   // zero OF,AF,SF for NaN values.
 1688   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1689   // values returns 'less than' result (CF is set).
 1690   // Leave the rest of flags unchanged.
 1691   //
 1692   //    7 6 5 4 3 2 1 0
 1693   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1694   //    0 0 1 0 1 0 1 1   (0x2B)
 1695   //
 1696   __ andq(Address(rsp, 0), 0xffffff2b);
 1697   __ popf();
 1698   __ bind(exit);
 1699 }
 1700 
 1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1702   Label done;
 1703   __ movl(dst, -1);
 1704   __ jcc(Assembler::parity, done);
 1705   __ jcc(Assembler::below, done);
 1706   __ setcc(Assembler::notEqual, dst);
 1707   __ bind(done);
 1708 }
 1709 
 1710 // Math.min()    # Math.max()
 1711 // --------------------------
 1712 // ucomis[s/d]   #
 1713 // ja   -> b     # a
 1714 // jp   -> NaN   # NaN
 1715 // jb   -> a     # b
 1716 // je            #
 1717 // |-jz -> a | b # a & b
 1718 // |    -> a     #
 1719 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1720                             XMMRegister a, XMMRegister b,
 1721                             XMMRegister xmmt, Register rt,
 1722                             bool min, bool single) {
 1723 
 1724   Label nan, zero, below, above, done;
 1725 
 1726   if (single)
 1727     __ ucomiss(a, b);
 1728   else
 1729     __ ucomisd(a, b);
 1730 
 1731   if (dst->encoding() != (min ? b : a)->encoding())
 1732     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1733   else
 1734     __ jccb(Assembler::above, done);
 1735 
 1736   __ jccb(Assembler::parity, nan);  // PF=1
 1737   __ jccb(Assembler::below, below); // CF=1
 1738 
 1739   // equal
 1740   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
 1741   if (single) {
 1742     __ ucomiss(a, xmmt);
 1743     __ jccb(Assembler::equal, zero);
 1744 
 1745     __ movflt(dst, a);
 1746     __ jmp(done);
 1747   }
 1748   else {
 1749     __ ucomisd(a, xmmt);
 1750     __ jccb(Assembler::equal, zero);
 1751 
 1752     __ movdbl(dst, a);
 1753     __ jmp(done);
 1754   }
 1755 
 1756   __ bind(zero);
 1757   if (min)
 1758     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1759   else
 1760     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1761 
 1762   __ jmp(done);
 1763 
 1764   __ bind(above);
 1765   if (single)
 1766     __ movflt(dst, min ? b : a);
 1767   else
 1768     __ movdbl(dst, min ? b : a);
 1769 
 1770   __ jmp(done);
 1771 
 1772   __ bind(nan);
 1773   if (single) {
 1774     __ movl(rt, 0x7fc00000); // Float.NaN
 1775     __ movdl(dst, rt);
 1776   }
 1777   else {
 1778     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1779     __ movdq(dst, rt);
 1780   }
 1781   __ jmp(done);
 1782 
 1783   __ bind(below);
 1784   if (single)
 1785     __ movflt(dst, min ? a : b);
 1786   else
 1787     __ movdbl(dst, min ? a : b);
 1788 
 1789   __ bind(done);
 1790 }
 1791 
 1792 //=============================================================================
 1793 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1794 
 1795 int ConstantTable::calculate_table_base_offset() const {
 1796   return 0;  // absolute addressing, no offset
 1797 }
 1798 
 1799 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1800 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1801   ShouldNotReachHere();
 1802 }
 1803 
 1804 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1805   // Empty encoding
 1806 }
 1807 
 1808 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1809   return 0;
 1810 }
 1811 
 1812 #ifndef PRODUCT
 1813 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1814   st->print("# MachConstantBaseNode (empty encoding)");
 1815 }
 1816 #endif
 1817 
 1818 
 1819 //=============================================================================
 1820 #ifndef PRODUCT
 1821 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1822   Compile* C = ra_->C;
 1823 
 1824   int framesize = C->output()->frame_size_in_bytes();
 1825   int bangsize = C->output()->bang_size_in_bytes();
 1826   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1827   // Remove wordSize for return addr which is already pushed.
 1828   framesize -= wordSize;
 1829 
 1830   if (C->output()->need_stack_bang(bangsize)) {
 1831     framesize -= wordSize;
 1832     st->print("# stack bang (%d bytes)", bangsize);
 1833     st->print("\n\t");
 1834     st->print("pushq   rbp\t# Save rbp");
 1835     if (PreserveFramePointer) {
 1836         st->print("\n\t");
 1837         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1838     }
 1839     if (framesize) {
 1840       st->print("\n\t");
 1841       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1842     }
 1843   } else {
 1844     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1845     st->print("\n\t");
 1846     framesize -= wordSize;
 1847     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1848     if (PreserveFramePointer) {
 1849       st->print("\n\t");
 1850       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1851       if (framesize > 0) {
 1852         st->print("\n\t");
 1853         st->print("addq    rbp, #%d", framesize);
 1854       }
 1855     }
 1856   }
 1857 
 1858   if (VerifyStackAtCalls) {
 1859     st->print("\n\t");
 1860     framesize -= wordSize;
 1861     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1862 #ifdef ASSERT
 1863     st->print("\n\t");
 1864     st->print("# stack alignment check");
 1865 #endif
 1866   }
 1867   if (C->stub_function() != nullptr) {
 1868     st->print("\n\t");
 1869     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1870     st->print("\n\t");
 1871     st->print("je      fast_entry\t");
 1872     st->print("\n\t");
 1873     st->print("call    #nmethod_entry_barrier_stub\t");
 1874     st->print("\n\tfast_entry:");
 1875   }
 1876   st->cr();
 1877 }
 1878 #endif
 1879 
 1880 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1881   Compile* C = ra_->C;
 1882 
 1883   int framesize = C->output()->frame_size_in_bytes();
 1884   int bangsize = C->output()->bang_size_in_bytes();
 1885 
 1886   if (C->clinit_barrier_on_entry()) {
 1887     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1888     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1889 
 1890     Label L_skip_barrier;
 1891     Register klass = rscratch1;
 1892 
 1893     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1894     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1895 
 1896     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1897 
 1898     __ bind(L_skip_barrier);
 1899   }
 1900 
 1901   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
 1902 
 1903   C->output()->set_frame_complete(__ offset());
 1904 
 1905   if (C->has_mach_constant_base_node()) {
 1906     // NOTE: We set the table base offset here because users might be
 1907     // emitted before MachConstantBaseNode.
 1908     ConstantTable& constant_table = C->output()->constant_table();
 1909     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1910   }
 1911 }
 1912 
 1913 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1914 {
 1915   return MachNode::size(ra_); // too many variables; just compute it
 1916                               // the hard way
 1917 }
 1918 
 1919 int MachPrologNode::reloc() const
 1920 {
 1921   return 0; // a large enough number
 1922 }
 1923 
 1924 //=============================================================================
 1925 #ifndef PRODUCT
 1926 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1927 {
 1928   Compile* C = ra_->C;
 1929   if (generate_vzeroupper(C)) {
 1930     st->print("vzeroupper");
 1931     st->cr(); st->print("\t");
 1932   }
 1933 
 1934   int framesize = C->output()->frame_size_in_bytes();
 1935   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1936   // Remove word for return adr already pushed
 1937   // and RBP
 1938   framesize -= 2*wordSize;
 1939 
 1940   if (framesize) {
 1941     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1942     st->print("\t");
 1943   }
 1944 
 1945   st->print_cr("popq    rbp");
 1946   if (do_polling() && C->is_method_compilation()) {
 1947     st->print("\t");
 1948     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1949                  "ja      #safepoint_stub\t"
 1950                  "# Safepoint: poll for GC");
 1951   }
 1952 }
 1953 #endif
 1954 
 1955 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1956 {
 1957   Compile* C = ra_->C;
 1958 
 1959   if (generate_vzeroupper(C)) {
 1960     // Clear upper bits of YMM registers when current compiled code uses
 1961     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1962     __ vzeroupper();
 1963   }
 1964 
 1965   int framesize = C->output()->frame_size_in_bytes();
 1966   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1967   // Remove word for return adr already pushed
 1968   // and RBP
 1969   framesize -= 2*wordSize;
 1970 
 1971   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1972 
 1973   if (framesize) {
 1974     __ addq(rsp, framesize);
 1975   }
 1976 
 1977   __ popq(rbp);
 1978 
 1979   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1980     __ reserved_stack_check();
 1981   }
 1982 
 1983   if (do_polling() && C->is_method_compilation()) {
 1984     Label dummy_label;
 1985     Label* code_stub = &dummy_label;
 1986     if (!C->output()->in_scratch_emit_size()) {
 1987       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1988       C->output()->add_stub(stub);
 1989       code_stub = &stub->entry();
 1990     }
 1991     __ relocate(relocInfo::poll_return_type);
 1992     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1993   }
 1994 }
 1995 
 1996 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1997 {
 1998   return MachNode::size(ra_); // too many variables; just compute it
 1999                               // the hard way
 2000 }
 2001 
 2002 int MachEpilogNode::reloc() const
 2003 {
 2004   return 2; // a large enough number
 2005 }
 2006 
 2007 const Pipeline* MachEpilogNode::pipeline() const
 2008 {
 2009   return MachNode::pipeline_class();
 2010 }
 2011 
 2012 //=============================================================================
 2013 
 2014 enum RC {
 2015   rc_bad,
 2016   rc_int,
 2017   rc_kreg,
 2018   rc_float,
 2019   rc_stack
 2020 };
 2021 
 2022 static enum RC rc_class(OptoReg::Name reg)
 2023 {
 2024   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 2025 
 2026   if (OptoReg::is_stack(reg)) return rc_stack;
 2027 
 2028   VMReg r = OptoReg::as_VMReg(reg);
 2029 
 2030   if (r->is_Register()) return rc_int;
 2031 
 2032   if (r->is_KRegister()) return rc_kreg;
 2033 
 2034   assert(r->is_XMMRegister(), "must be");
 2035   return rc_float;
 2036 }
 2037 
 2038 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2039 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2040                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2041 
 2042 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2043                      int stack_offset, int reg, uint ireg, outputStream* st);
 2044 
 2045 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2046                                       int dst_offset, uint ireg, outputStream* st) {
 2047   if (masm) {
 2048     switch (ireg) {
 2049     case Op_VecS:
 2050       __ movq(Address(rsp, -8), rax);
 2051       __ movl(rax, Address(rsp, src_offset));
 2052       __ movl(Address(rsp, dst_offset), rax);
 2053       __ movq(rax, Address(rsp, -8));
 2054       break;
 2055     case Op_VecD:
 2056       __ pushq(Address(rsp, src_offset));
 2057       __ popq (Address(rsp, dst_offset));
 2058       break;
 2059     case Op_VecX:
 2060       __ pushq(Address(rsp, src_offset));
 2061       __ popq (Address(rsp, dst_offset));
 2062       __ pushq(Address(rsp, src_offset+8));
 2063       __ popq (Address(rsp, dst_offset+8));
 2064       break;
 2065     case Op_VecY:
 2066       __ vmovdqu(Address(rsp, -32), xmm0);
 2067       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2068       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2069       __ vmovdqu(xmm0, Address(rsp, -32));
 2070       break;
 2071     case Op_VecZ:
 2072       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2073       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2074       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2075       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2076       break;
 2077     default:
 2078       ShouldNotReachHere();
 2079     }
 2080 #ifndef PRODUCT
 2081   } else {
 2082     switch (ireg) {
 2083     case Op_VecS:
 2084       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2085                 "movl    rax, [rsp + #%d]\n\t"
 2086                 "movl    [rsp + #%d], rax\n\t"
 2087                 "movq    rax, [rsp - #8]",
 2088                 src_offset, dst_offset);
 2089       break;
 2090     case Op_VecD:
 2091       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2092                 "popq    [rsp + #%d]",
 2093                 src_offset, dst_offset);
 2094       break;
 2095      case Op_VecX:
 2096       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2097                 "popq    [rsp + #%d]\n\t"
 2098                 "pushq   [rsp + #%d]\n\t"
 2099                 "popq    [rsp + #%d]",
 2100                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2101       break;
 2102     case Op_VecY:
 2103       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2104                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2105                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2106                 "vmovdqu xmm0, [rsp - #32]",
 2107                 src_offset, dst_offset);
 2108       break;
 2109     case Op_VecZ:
 2110       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2111                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2112                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2113                 "vmovdqu xmm0, [rsp - #64]",
 2114                 src_offset, dst_offset);
 2115       break;
 2116     default:
 2117       ShouldNotReachHere();
 2118     }
 2119 #endif
 2120   }
 2121 }
 2122 
 2123 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2124                                        PhaseRegAlloc* ra_,
 2125                                        bool do_size,
 2126                                        outputStream* st) const {
 2127   assert(masm != nullptr || st  != nullptr, "sanity");
 2128   // Get registers to move
 2129   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2130   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2131   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2132   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2133 
 2134   enum RC src_second_rc = rc_class(src_second);
 2135   enum RC src_first_rc = rc_class(src_first);
 2136   enum RC dst_second_rc = rc_class(dst_second);
 2137   enum RC dst_first_rc = rc_class(dst_first);
 2138 
 2139   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2140          "must move at least 1 register" );
 2141 
 2142   if (src_first == dst_first && src_second == dst_second) {
 2143     // Self copy, no move
 2144     return 0;
 2145   }
 2146   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 2147     uint ireg = ideal_reg();
 2148     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2149     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2150     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2151       // mem -> mem
 2152       int src_offset = ra_->reg2offset(src_first);
 2153       int dst_offset = ra_->reg2offset(dst_first);
 2154       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2155     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2156       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2157     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2158       int stack_offset = ra_->reg2offset(dst_first);
 2159       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2160     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2161       int stack_offset = ra_->reg2offset(src_first);
 2162       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2163     } else {
 2164       ShouldNotReachHere();
 2165     }
 2166     return 0;
 2167   }
 2168   if (src_first_rc == rc_stack) {
 2169     // mem ->
 2170     if (dst_first_rc == rc_stack) {
 2171       // mem -> mem
 2172       assert(src_second != dst_first, "overlap");
 2173       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2174           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2175         // 64-bit
 2176         int src_offset = ra_->reg2offset(src_first);
 2177         int dst_offset = ra_->reg2offset(dst_first);
 2178         if (masm) {
 2179           __ pushq(Address(rsp, src_offset));
 2180           __ popq (Address(rsp, dst_offset));
 2181 #ifndef PRODUCT
 2182         } else {
 2183           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2184                     "popq    [rsp + #%d]",
 2185                      src_offset, dst_offset);
 2186 #endif
 2187         }
 2188       } else {
 2189         // 32-bit
 2190         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2191         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2192         // No pushl/popl, so:
 2193         int src_offset = ra_->reg2offset(src_first);
 2194         int dst_offset = ra_->reg2offset(dst_first);
 2195         if (masm) {
 2196           __ movq(Address(rsp, -8), rax);
 2197           __ movl(rax, Address(rsp, src_offset));
 2198           __ movl(Address(rsp, dst_offset), rax);
 2199           __ movq(rax, Address(rsp, -8));
 2200 #ifndef PRODUCT
 2201         } else {
 2202           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2203                     "movl    rax, [rsp + #%d]\n\t"
 2204                     "movl    [rsp + #%d], rax\n\t"
 2205                     "movq    rax, [rsp - #8]",
 2206                      src_offset, dst_offset);
 2207 #endif
 2208         }
 2209       }
 2210       return 0;
 2211     } else if (dst_first_rc == rc_int) {
 2212       // mem -> gpr
 2213       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2214           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2215         // 64-bit
 2216         int offset = ra_->reg2offset(src_first);
 2217         if (masm) {
 2218           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2219 #ifndef PRODUCT
 2220         } else {
 2221           st->print("movq    %s, [rsp + #%d]\t# spill",
 2222                      Matcher::regName[dst_first],
 2223                      offset);
 2224 #endif
 2225         }
 2226       } else {
 2227         // 32-bit
 2228         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2229         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2230         int offset = ra_->reg2offset(src_first);
 2231         if (masm) {
 2232           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2233 #ifndef PRODUCT
 2234         } else {
 2235           st->print("movl    %s, [rsp + #%d]\t# spill",
 2236                      Matcher::regName[dst_first],
 2237                      offset);
 2238 #endif
 2239         }
 2240       }
 2241       return 0;
 2242     } else if (dst_first_rc == rc_float) {
 2243       // mem-> xmm
 2244       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2245           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2246         // 64-bit
 2247         int offset = ra_->reg2offset(src_first);
 2248         if (masm) {
 2249           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2250 #ifndef PRODUCT
 2251         } else {
 2252           st->print("%s  %s, [rsp + #%d]\t# spill",
 2253                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2254                      Matcher::regName[dst_first],
 2255                      offset);
 2256 #endif
 2257         }
 2258       } else {
 2259         // 32-bit
 2260         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2261         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2262         int offset = ra_->reg2offset(src_first);
 2263         if (masm) {
 2264           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2265 #ifndef PRODUCT
 2266         } else {
 2267           st->print("movss   %s, [rsp + #%d]\t# spill",
 2268                      Matcher::regName[dst_first],
 2269                      offset);
 2270 #endif
 2271         }
 2272       }
 2273       return 0;
 2274     } else if (dst_first_rc == rc_kreg) {
 2275       // mem -> kreg
 2276       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2277           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2278         // 64-bit
 2279         int offset = ra_->reg2offset(src_first);
 2280         if (masm) {
 2281           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2282 #ifndef PRODUCT
 2283         } else {
 2284           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2285                      Matcher::regName[dst_first],
 2286                      offset);
 2287 #endif
 2288         }
 2289       }
 2290       return 0;
 2291     }
 2292   } else if (src_first_rc == rc_int) {
 2293     // gpr ->
 2294     if (dst_first_rc == rc_stack) {
 2295       // gpr -> mem
 2296       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2297           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2298         // 64-bit
 2299         int offset = ra_->reg2offset(dst_first);
 2300         if (masm) {
 2301           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2302 #ifndef PRODUCT
 2303         } else {
 2304           st->print("movq    [rsp + #%d], %s\t# spill",
 2305                      offset,
 2306                      Matcher::regName[src_first]);
 2307 #endif
 2308         }
 2309       } else {
 2310         // 32-bit
 2311         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2312         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2313         int offset = ra_->reg2offset(dst_first);
 2314         if (masm) {
 2315           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2316 #ifndef PRODUCT
 2317         } else {
 2318           st->print("movl    [rsp + #%d], %s\t# spill",
 2319                      offset,
 2320                      Matcher::regName[src_first]);
 2321 #endif
 2322         }
 2323       }
 2324       return 0;
 2325     } else if (dst_first_rc == rc_int) {
 2326       // gpr -> gpr
 2327       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2328           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2329         // 64-bit
 2330         if (masm) {
 2331           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2332                   as_Register(Matcher::_regEncode[src_first]));
 2333 #ifndef PRODUCT
 2334         } else {
 2335           st->print("movq    %s, %s\t# spill",
 2336                      Matcher::regName[dst_first],
 2337                      Matcher::regName[src_first]);
 2338 #endif
 2339         }
 2340         return 0;
 2341       } else {
 2342         // 32-bit
 2343         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2344         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2345         if (masm) {
 2346           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2347                   as_Register(Matcher::_regEncode[src_first]));
 2348 #ifndef PRODUCT
 2349         } else {
 2350           st->print("movl    %s, %s\t# spill",
 2351                      Matcher::regName[dst_first],
 2352                      Matcher::regName[src_first]);
 2353 #endif
 2354         }
 2355         return 0;
 2356       }
 2357     } else if (dst_first_rc == rc_float) {
 2358       // gpr -> xmm
 2359       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2360           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2361         // 64-bit
 2362         if (masm) {
 2363           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2364 #ifndef PRODUCT
 2365         } else {
 2366           st->print("movdq   %s, %s\t# spill",
 2367                      Matcher::regName[dst_first],
 2368                      Matcher::regName[src_first]);
 2369 #endif
 2370         }
 2371       } else {
 2372         // 32-bit
 2373         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2374         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2375         if (masm) {
 2376           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2377 #ifndef PRODUCT
 2378         } else {
 2379           st->print("movdl   %s, %s\t# spill",
 2380                      Matcher::regName[dst_first],
 2381                      Matcher::regName[src_first]);
 2382 #endif
 2383         }
 2384       }
 2385       return 0;
 2386     } else if (dst_first_rc == rc_kreg) {
 2387       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2388           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2389         // 64-bit
 2390         if (masm) {
 2391           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2392   #ifndef PRODUCT
 2393         } else {
 2394            st->print("kmovq   %s, %s\t# spill",
 2395                        Matcher::regName[dst_first],
 2396                        Matcher::regName[src_first]);
 2397   #endif
 2398         }
 2399       }
 2400       Unimplemented();
 2401       return 0;
 2402     }
 2403   } else if (src_first_rc == rc_float) {
 2404     // xmm ->
 2405     if (dst_first_rc == rc_stack) {
 2406       // xmm -> mem
 2407       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2408           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2409         // 64-bit
 2410         int offset = ra_->reg2offset(dst_first);
 2411         if (masm) {
 2412           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2413 #ifndef PRODUCT
 2414         } else {
 2415           st->print("movsd   [rsp + #%d], %s\t# spill",
 2416                      offset,
 2417                      Matcher::regName[src_first]);
 2418 #endif
 2419         }
 2420       } else {
 2421         // 32-bit
 2422         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2423         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2424         int offset = ra_->reg2offset(dst_first);
 2425         if (masm) {
 2426           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2427 #ifndef PRODUCT
 2428         } else {
 2429           st->print("movss   [rsp + #%d], %s\t# spill",
 2430                      offset,
 2431                      Matcher::regName[src_first]);
 2432 #endif
 2433         }
 2434       }
 2435       return 0;
 2436     } else if (dst_first_rc == rc_int) {
 2437       // xmm -> gpr
 2438       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2439           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2440         // 64-bit
 2441         if (masm) {
 2442           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2443 #ifndef PRODUCT
 2444         } else {
 2445           st->print("movdq   %s, %s\t# spill",
 2446                      Matcher::regName[dst_first],
 2447                      Matcher::regName[src_first]);
 2448 #endif
 2449         }
 2450       } else {
 2451         // 32-bit
 2452         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2453         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2454         if (masm) {
 2455           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2456 #ifndef PRODUCT
 2457         } else {
 2458           st->print("movdl   %s, %s\t# spill",
 2459                      Matcher::regName[dst_first],
 2460                      Matcher::regName[src_first]);
 2461 #endif
 2462         }
 2463       }
 2464       return 0;
 2465     } else if (dst_first_rc == rc_float) {
 2466       // xmm -> xmm
 2467       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2468           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2469         // 64-bit
 2470         if (masm) {
 2471           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2472 #ifndef PRODUCT
 2473         } else {
 2474           st->print("%s  %s, %s\t# spill",
 2475                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2476                      Matcher::regName[dst_first],
 2477                      Matcher::regName[src_first]);
 2478 #endif
 2479         }
 2480       } else {
 2481         // 32-bit
 2482         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2483         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2484         if (masm) {
 2485           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2486 #ifndef PRODUCT
 2487         } else {
 2488           st->print("%s  %s, %s\t# spill",
 2489                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2490                      Matcher::regName[dst_first],
 2491                      Matcher::regName[src_first]);
 2492 #endif
 2493         }
 2494       }
 2495       return 0;
 2496     } else if (dst_first_rc == rc_kreg) {
 2497       assert(false, "Illegal spilling");
 2498       return 0;
 2499     }
 2500   } else if (src_first_rc == rc_kreg) {
 2501     if (dst_first_rc == rc_stack) {
 2502       // mem -> kreg
 2503       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2504           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2505         // 64-bit
 2506         int offset = ra_->reg2offset(dst_first);
 2507         if (masm) {
 2508           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2509 #ifndef PRODUCT
 2510         } else {
 2511           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2512                      offset,
 2513                      Matcher::regName[src_first]);
 2514 #endif
 2515         }
 2516       }
 2517       return 0;
 2518     } else if (dst_first_rc == rc_int) {
 2519       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2520           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2521         // 64-bit
 2522         if (masm) {
 2523           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2524 #ifndef PRODUCT
 2525         } else {
 2526          st->print("kmovq   %s, %s\t# spill",
 2527                      Matcher::regName[dst_first],
 2528                      Matcher::regName[src_first]);
 2529 #endif
 2530         }
 2531       }
 2532       Unimplemented();
 2533       return 0;
 2534     } else if (dst_first_rc == rc_kreg) {
 2535       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2536           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2537         // 64-bit
 2538         if (masm) {
 2539           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2540 #ifndef PRODUCT
 2541         } else {
 2542          st->print("kmovq   %s, %s\t# spill",
 2543                      Matcher::regName[dst_first],
 2544                      Matcher::regName[src_first]);
 2545 #endif
 2546         }
 2547       }
 2548       return 0;
 2549     } else if (dst_first_rc == rc_float) {
 2550       assert(false, "Illegal spill");
 2551       return 0;
 2552     }
 2553   }
 2554 
 2555   assert(0," foo ");
 2556   Unimplemented();
 2557   return 0;
 2558 }
 2559 
 2560 #ifndef PRODUCT
 2561 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2562   implementation(nullptr, ra_, false, st);
 2563 }
 2564 #endif
 2565 
 2566 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2567   implementation(masm, ra_, false, nullptr);
 2568 }
 2569 
 2570 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2571   return MachNode::size(ra_);
 2572 }
 2573 
 2574 //=============================================================================
 2575 #ifndef PRODUCT
 2576 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2577 {
 2578   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2579   int reg = ra_->get_reg_first(this);
 2580   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2581             Matcher::regName[reg], offset);
 2582 }
 2583 #endif
 2584 
 2585 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2586 {
 2587   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2588   int reg = ra_->get_encode(this);
 2589 
 2590   __ lea(as_Register(reg), Address(rsp, offset));
 2591 }
 2592 
 2593 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2594 {
 2595   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2596   if (ra_->get_encode(this) > 15) {
 2597     return (offset < 0x80) ? 6 : 9; // REX2
 2598   } else {
 2599     return (offset < 0x80) ? 5 : 8; // REX
 2600   }
 2601 }
 2602 
 2603 //=============================================================================
 2604 #ifndef PRODUCT
 2605 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2606 {
 2607   if (UseCompressedClassPointers) {
 2608     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2609     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2610   } else {
 2611     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2612     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2613   }
 2614   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2615 }
 2616 #endif
 2617 
 2618 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2619 {
 2620   __ ic_check(InteriorEntryAlignment);
 2621 }
 2622 
 2623 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2624 {
 2625   return MachNode::size(ra_); // too many variables; just compute it
 2626                               // the hard way
 2627 }
 2628 
 2629 
 2630 //=============================================================================
 2631 
 2632 bool Matcher::supports_vector_calling_convention(void) {
 2633   return EnableVectorSupport;
 2634 }
 2635 
 2636 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2637   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2638 }
 2639 
 2640 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2641   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2642 }
 2643 
 2644 #ifdef ASSERT
 2645 static bool is_ndd_demotable(const MachNode* mdef) {
 2646   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2647 }
 2648 #endif
 2649 
 2650 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
 2651                                             int oper_index) {
 2652   if (mdef == nullptr) {
 2653     return false;
 2654   }
 2655 
 2656   if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
 2657       mdef->in(mdef->operand_index(oper_index)) == nullptr) {
 2658     assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
 2659     assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
 2660     return false;
 2661   }
 2662 
 2663   // Complex memory operand covers multiple incoming edges needed for
 2664   // address computation. Biasing def towards any address component will not
 2665   // result in NDD demotion by assembler.
 2666   if (mdef->operand_num_edges(oper_index) != 1) {
 2667     return false;
 2668   }
 2669 
 2670   // Demotion candidate must be register mask compatible with definition.
 2671   const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
 2672   if (!oper_mask.overlap(mdef->out_RegMask())) {
 2673     assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
 2674     return false;
 2675   }
 2676 
 2677   switch (oper_index) {
 2678   // First operand of MachNode corresponding to Intel APX NDD selection
 2679   // pattern can share its assigned register with definition operand if
 2680   // their live ranges do not overlap. In such a scenario we can demote
 2681   // it to legacy map0/map1 instruction by replacing its 4-byte extended
 2682   // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
 2683   // are decorated with a special flag by instruction selector.
 2684   case 1:
 2685     return is_ndd_demotable_opr1(mdef);
 2686 
 2687   // Definition operand of commutative operation can be biased towards second
 2688   // operand.
 2689   case 2:
 2690     return is_ndd_demotable_opr2(mdef);
 2691 
 2692   // Current scheme only selects up to two biasing candidates
 2693   default:
 2694     assert(false, "unhandled operand index: %s", mdef->Name());
 2695     break;
 2696   }
 2697 
 2698   return false;
 2699 }
 2700 
 2701 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2702   assert(EnableVectorSupport, "sanity");
 2703   int lo = XMM0_num;
 2704   int hi = XMM0b_num;
 2705   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2706   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2707   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2708   return OptoRegPair(hi, lo);
 2709 }
 2710 
 2711 // Is this branch offset short enough that a short branch can be used?
 2712 //
 2713 // NOTE: If the platform does not provide any short branch variants, then
 2714 //       this method should return false for offset 0.
 2715 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2716   // The passed offset is relative to address of the branch.
 2717   // On 86 a branch displacement is calculated relative to address
 2718   // of a next instruction.
 2719   offset -= br_size;
 2720 
 2721   // the short version of jmpConUCF2 contains multiple branches,
 2722   // making the reach slightly less
 2723   if (rule == jmpConUCF2_rule)
 2724     return (-126 <= offset && offset <= 125);
 2725   return (-128 <= offset && offset <= 127);
 2726 }
 2727 
 2728 // Return whether or not this register is ever used as an argument.
 2729 // This function is used on startup to build the trampoline stubs in
 2730 // generateOptoStub.  Registers not mentioned will be killed by the VM
 2731 // call in the trampoline, and arguments in those registers not be
 2732 // available to the callee.
 2733 bool Matcher::can_be_java_arg(int reg)
 2734 {
 2735   return
 2736     reg ==  RDI_num || reg == RDI_H_num ||
 2737     reg ==  RSI_num || reg == RSI_H_num ||
 2738     reg ==  RDX_num || reg == RDX_H_num ||
 2739     reg ==  RCX_num || reg == RCX_H_num ||
 2740     reg ==   R8_num || reg ==  R8_H_num ||
 2741     reg ==   R9_num || reg ==  R9_H_num ||
 2742     reg ==  R12_num || reg == R12_H_num ||
 2743     reg == XMM0_num || reg == XMM0b_num ||
 2744     reg == XMM1_num || reg == XMM1b_num ||
 2745     reg == XMM2_num || reg == XMM2b_num ||
 2746     reg == XMM3_num || reg == XMM3b_num ||
 2747     reg == XMM4_num || reg == XMM4b_num ||
 2748     reg == XMM5_num || reg == XMM5b_num ||
 2749     reg == XMM6_num || reg == XMM6b_num ||
 2750     reg == XMM7_num || reg == XMM7b_num;
 2751 }
 2752 
 2753 bool Matcher::is_spillable_arg(int reg)
 2754 {
 2755   return can_be_java_arg(reg);
 2756 }
 2757 
 2758 uint Matcher::int_pressure_limit()
 2759 {
 2760   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2761 }
 2762 
 2763 uint Matcher::float_pressure_limit()
 2764 {
 2765   // After experiment around with different values, the following default threshold
 2766   // works best for LCM's register pressure scheduling on x64.
 2767   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2768   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2769   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2770 }
 2771 
 2772 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 2773   // In 64 bit mode a code which use multiply when
 2774   // devisor is constant is faster than hardware
 2775   // DIV instruction (it uses MulHiL).
 2776   return false;
 2777 }
 2778 
 2779 // Register for DIVI projection of divmodI
 2780 const RegMask& Matcher::divI_proj_mask() {
 2781   return INT_RAX_REG_mask();
 2782 }
 2783 
 2784 // Register for MODI projection of divmodI
 2785 const RegMask& Matcher::modI_proj_mask() {
 2786   return INT_RDX_REG_mask();
 2787 }
 2788 
 2789 // Register for DIVL projection of divmodL
 2790 const RegMask& Matcher::divL_proj_mask() {
 2791   return LONG_RAX_REG_mask();
 2792 }
 2793 
 2794 // Register for MODL projection of divmodL
 2795 const RegMask& Matcher::modL_proj_mask() {
 2796   return LONG_RDX_REG_mask();
 2797 }
 2798 
 2799 %}
 2800 
 2801 source_hpp %{
 2802 // Header information of the source block.
 2803 // Method declarations/definitions which are used outside
 2804 // the ad-scope can conveniently be defined here.
 2805 //
 2806 // To keep related declarations/definitions/uses close together,
 2807 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2808 
 2809 #include "runtime/vm_version.hpp"
 2810 
 2811 class NativeJump;
 2812 
 2813 class CallStubImpl {
 2814 
 2815   //--------------------------------------------------------------
 2816   //---<  Used for optimization in Compile::shorten_branches  >---
 2817   //--------------------------------------------------------------
 2818 
 2819  public:
 2820   // Size of call trampoline stub.
 2821   static uint size_call_trampoline() {
 2822     return 0; // no call trampolines on this platform
 2823   }
 2824 
 2825   // number of relocations needed by a call trampoline stub
 2826   static uint reloc_call_trampoline() {
 2827     return 0; // no call trampolines on this platform
 2828   }
 2829 };
 2830 
 2831 class HandlerImpl {
 2832 
 2833  public:
 2834 
 2835   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2836 
 2837   static uint size_deopt_handler() {
 2838     // one call and one jmp.
 2839     return 7;
 2840   }
 2841 };
 2842 
 2843 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2844   switch(bytes) {
 2845     case  4: // fall-through
 2846     case  8: // fall-through
 2847     case 16: return Assembler::AVX_128bit;
 2848     case 32: return Assembler::AVX_256bit;
 2849     case 64: return Assembler::AVX_512bit;
 2850 
 2851     default: {
 2852       ShouldNotReachHere();
 2853       return Assembler::AVX_NoVec;
 2854     }
 2855   }
 2856 }
 2857 
 2858 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2859   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2860 }
 2861 
 2862 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2863   uint def_idx = use->operand_index(opnd);
 2864   Node* def = use->in(def_idx);
 2865   return vector_length_encoding(def);
 2866 }
 2867 
 2868 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2869   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2870          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2871 }
 2872 
 2873 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2874   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2875            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2876 }
 2877 
 2878 class Node::PD {
 2879 public:
 2880   enum NodeFlags : uint64_t {
 2881     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2882     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2883     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2884     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2885     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2886     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2887     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2888     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2889     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2890     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2891     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2892     Flag_ndd_demotable_opr1   = Node::_last_flag << 12,
 2893     Flag_ndd_demotable_opr2   = Node::_last_flag << 13,
 2894     _last_flag                = Flag_ndd_demotable_opr2
 2895   };
 2896 };
 2897 
 2898 %} // end source_hpp
 2899 
 2900 source %{
 2901 
 2902 #include "opto/addnode.hpp"
 2903 #include "c2_intelJccErratum_x86.hpp"
 2904 
 2905 void PhaseOutput::pd_perform_mach_node_analysis() {
 2906   if (VM_Version::has_intel_jcc_erratum()) {
 2907     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2908     _buf_sizes._code += extra_padding;
 2909   }
 2910 }
 2911 
 2912 int MachNode::pd_alignment_required() const {
 2913   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2914     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2915     return IntelJccErratum::largest_jcc_size() + 1;
 2916   } else {
 2917     return 1;
 2918   }
 2919 }
 2920 
 2921 int MachNode::compute_padding(int current_offset) const {
 2922   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2923     Compile* C = Compile::current();
 2924     PhaseOutput* output = C->output();
 2925     Block* block = output->block();
 2926     int index = output->index();
 2927     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2928   } else {
 2929     return 0;
 2930   }
 2931 }
 2932 
 2933 // Emit deopt handler code.
 2934 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2935 
 2936   // Note that the code buffer's insts_mark is always relative to insts.
 2937   // That's why we must use the macroassembler to generate a handler.
 2938   address base = __ start_a_stub(size_deopt_handler());
 2939   if (base == nullptr) {
 2940     ciEnv::current()->record_failure("CodeCache is full");
 2941     return 0;  // CodeBuffer::expand failed
 2942   }
 2943   int offset = __ offset();
 2944 
 2945   Label start;
 2946   __ bind(start);
 2947 
 2948   __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2949 
 2950   int entry_offset = __ offset();
 2951 
 2952   __ jmp(start);
 2953 
 2954   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2955   assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
 2956          "out of bounds read in post-call NOP check");
 2957   __ end_a_stub();
 2958   return entry_offset;
 2959 }
 2960 
 2961 static Assembler::Width widthForType(BasicType bt) {
 2962   if (bt == T_BYTE) {
 2963     return Assembler::B;
 2964   } else if (bt == T_SHORT) {
 2965     return Assembler::W;
 2966   } else if (bt == T_INT) {
 2967     return Assembler::D;
 2968   } else {
 2969     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2970     return Assembler::Q;
 2971   }
 2972 }
 2973 
 2974 //=============================================================================
 2975 
 2976   // Float masks come from different places depending on platform.
 2977   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2978   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2979   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2980   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2981   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2982   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2983   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2984   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2985   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2986   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2987   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2988   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 2989   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 2990   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 2991   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 2992   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 2993   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 2994   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 2995   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 2996 
 2997 //=============================================================================
 2998 bool Matcher::match_rule_supported(int opcode) {
 2999   if (!has_match_rule(opcode)) {
 3000     return false; // no match rule present
 3001   }
 3002   switch (opcode) {
 3003     case Op_AbsVL:
 3004     case Op_StoreVectorScatter:
 3005       if (UseAVX < 3) {
 3006         return false;
 3007       }
 3008       break;
 3009     case Op_PopCountI:
 3010     case Op_PopCountL:
 3011       if (!UsePopCountInstruction) {
 3012         return false;
 3013       }
 3014       break;
 3015     case Op_PopCountVI:
 3016       if (UseAVX < 2) {
 3017         return false;
 3018       }
 3019       break;
 3020     case Op_CompressV:
 3021     case Op_ExpandV:
 3022     case Op_PopCountVL:
 3023       if (UseAVX < 2) {
 3024         return false;
 3025       }
 3026       break;
 3027     case Op_MulVI:
 3028       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 3029         return false;
 3030       }
 3031       break;
 3032     case Op_MulVL:
 3033       if (UseSSE < 4) { // only with SSE4_1 or AVX
 3034         return false;
 3035       }
 3036       break;
 3037     case Op_MulReductionVL:
 3038       if (VM_Version::supports_avx512dq() == false) {
 3039         return false;
 3040       }
 3041       break;
 3042     case Op_AbsVB:
 3043     case Op_AbsVS:
 3044     case Op_AbsVI:
 3045     case Op_AddReductionVI:
 3046     case Op_AndReductionV:
 3047     case Op_OrReductionV:
 3048     case Op_XorReductionV:
 3049       if (UseSSE < 3) { // requires at least SSSE3
 3050         return false;
 3051       }
 3052       break;
 3053     case Op_MaxHF:
 3054     case Op_MinHF:
 3055       if (!VM_Version::supports_avx512vlbw()) {
 3056         return false;
 3057       }  // fallthrough
 3058     case Op_AddHF:
 3059     case Op_DivHF:
 3060     case Op_FmaHF:
 3061     case Op_MulHF:
 3062     case Op_ReinterpretS2HF:
 3063     case Op_ReinterpretHF2S:
 3064     case Op_SubHF:
 3065     case Op_SqrtHF:
 3066       if (!VM_Version::supports_avx512_fp16()) {
 3067         return false;
 3068       }
 3069       break;
 3070     case Op_VectorLoadShuffle:
 3071     case Op_VectorRearrange:
 3072     case Op_MulReductionVI:
 3073       if (UseSSE < 4) { // requires at least SSE4
 3074         return false;
 3075       }
 3076       break;
 3077     case Op_IsInfiniteF:
 3078     case Op_IsInfiniteD:
 3079       if (!VM_Version::supports_avx512dq()) {
 3080         return false;
 3081       }
 3082       break;
 3083     case Op_SqrtVD:
 3084     case Op_SqrtVF:
 3085     case Op_VectorMaskCmp:
 3086     case Op_VectorCastB2X:
 3087     case Op_VectorCastS2X:
 3088     case Op_VectorCastI2X:
 3089     case Op_VectorCastL2X:
 3090     case Op_VectorCastF2X:
 3091     case Op_VectorCastD2X:
 3092     case Op_VectorUCastB2X:
 3093     case Op_VectorUCastS2X:
 3094     case Op_VectorUCastI2X:
 3095     case Op_VectorMaskCast:
 3096       if (UseAVX < 1) { // enabled for AVX only
 3097         return false;
 3098       }
 3099       break;
 3100     case Op_PopulateIndex:
 3101       if (UseAVX < 2) {
 3102         return false;
 3103       }
 3104       break;
 3105     case Op_RoundVF:
 3106       if (UseAVX < 2) { // enabled for AVX2 only
 3107         return false;
 3108       }
 3109       break;
 3110     case Op_RoundVD:
 3111       if (UseAVX < 3) {
 3112         return false;  // enabled for AVX3 only
 3113       }
 3114       break;
 3115     case Op_CompareAndSwapL:
 3116     case Op_CompareAndSwapP:
 3117       break;
 3118     case Op_StrIndexOf:
 3119       if (!UseSSE42Intrinsics) {
 3120         return false;
 3121       }
 3122       break;
 3123     case Op_StrIndexOfChar:
 3124       if (!UseSSE42Intrinsics) {
 3125         return false;
 3126       }
 3127       break;
 3128     case Op_OnSpinWait:
 3129       if (VM_Version::supports_on_spin_wait() == false) {
 3130         return false;
 3131       }
 3132       break;
 3133     case Op_MulVB:
 3134     case Op_LShiftVB:
 3135     case Op_RShiftVB:
 3136     case Op_URShiftVB:
 3137     case Op_VectorInsert:
 3138     case Op_VectorLoadMask:
 3139     case Op_VectorStoreMask:
 3140     case Op_VectorBlend:
 3141       if (UseSSE < 4) {
 3142         return false;
 3143       }
 3144       break;
 3145     case Op_MaxD:
 3146     case Op_MaxF:
 3147     case Op_MinD:
 3148     case Op_MinF:
 3149       if (UseAVX < 1) { // enabled for AVX only
 3150         return false;
 3151       }
 3152       break;
 3153     case Op_CacheWB:
 3154     case Op_CacheWBPreSync:
 3155     case Op_CacheWBPostSync:
 3156       if (!VM_Version::supports_data_cache_line_flush()) {
 3157         return false;
 3158       }
 3159       break;
 3160     case Op_ExtractB:
 3161     case Op_ExtractL:
 3162     case Op_ExtractI:
 3163     case Op_RoundDoubleMode:
 3164       if (UseSSE < 4) {
 3165         return false;
 3166       }
 3167       break;
 3168     case Op_RoundDoubleModeV:
 3169       if (VM_Version::supports_avx() == false) {
 3170         return false; // 128bit vroundpd is not available
 3171       }
 3172       break;
 3173     case Op_LoadVectorGather:
 3174     case Op_LoadVectorGatherMasked:
 3175       if (UseAVX < 2) {
 3176         return false;
 3177       }
 3178       break;
 3179     case Op_FmaF:
 3180     case Op_FmaD:
 3181     case Op_FmaVD:
 3182     case Op_FmaVF:
 3183       if (!UseFMA) {
 3184         return false;
 3185       }
 3186       break;
 3187     case Op_MacroLogicV:
 3188       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3189         return false;
 3190       }
 3191       break;
 3192 
 3193     case Op_VectorCmpMasked:
 3194     case Op_VectorMaskGen:
 3195       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3196         return false;
 3197       }
 3198       break;
 3199     case Op_VectorMaskFirstTrue:
 3200     case Op_VectorMaskLastTrue:
 3201     case Op_VectorMaskTrueCount:
 3202     case Op_VectorMaskToLong:
 3203       if (UseAVX < 1) {
 3204          return false;
 3205       }
 3206       break;
 3207     case Op_RoundF:
 3208     case Op_RoundD:
 3209       break;
 3210     case Op_CopySignD:
 3211     case Op_CopySignF:
 3212       if (UseAVX < 3)  {
 3213         return false;
 3214       }
 3215       if (!VM_Version::supports_avx512vl()) {
 3216         return false;
 3217       }
 3218       break;
 3219     case Op_CompressBits:
 3220     case Op_ExpandBits:
 3221       if (!VM_Version::supports_bmi2()) {
 3222         return false;
 3223       }
 3224       break;
 3225     case Op_CompressM:
 3226       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3227         return false;
 3228       }
 3229       break;
 3230     case Op_ConvF2HF:
 3231     case Op_ConvHF2F:
 3232       if (!VM_Version::supports_float16()) {
 3233         return false;
 3234       }
 3235       break;
 3236     case Op_VectorCastF2HF:
 3237     case Op_VectorCastHF2F:
 3238       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3239         return false;
 3240       }
 3241       break;
 3242   }
 3243   return true;  // Match rules are supported by default.
 3244 }
 3245 
 3246 //------------------------------------------------------------------------
 3247 
 3248 static inline bool is_pop_count_instr_target(BasicType bt) {
 3249   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3250          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3251 }
 3252 
 3253 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3254   return match_rule_supported_vector(opcode, vlen, bt);
 3255 }
 3256 
 3257 // Identify extra cases that we might want to provide match rules for vector nodes and
 3258 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3259 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3260   if (!match_rule_supported(opcode)) {
 3261     return false;
 3262   }
 3263   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3264   //   * SSE2 supports 128bit vectors for all types;
 3265   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3266   //   * AVX2 supports 256bit vectors for all types;
 3267   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3268   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3269   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3270   // And MaxVectorSize is taken into account as well.
 3271   if (!vector_size_supported(bt, vlen)) {
 3272     return false;
 3273   }
 3274   // Special cases which require vector length follow:
 3275   //   * implementation limitations
 3276   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3277   //   * 128bit vroundpd instruction is present only in AVX1
 3278   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3279   switch (opcode) {
 3280     case Op_MaxVHF:
 3281     case Op_MinVHF:
 3282       if (!VM_Version::supports_avx512bw()) {
 3283         return false;
 3284       }
 3285     case Op_AddVHF:
 3286     case Op_DivVHF:
 3287     case Op_FmaVHF:
 3288     case Op_MulVHF:
 3289     case Op_SubVHF:
 3290     case Op_SqrtVHF:
 3291       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3292         return false;
 3293       }
 3294       if (!VM_Version::supports_avx512_fp16()) {
 3295         return false;
 3296       }
 3297       break;
 3298     case Op_AbsVF:
 3299     case Op_NegVF:
 3300       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3301         return false; // 512bit vandps and vxorps are not available
 3302       }
 3303       break;
 3304     case Op_AbsVD:
 3305     case Op_NegVD:
 3306       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3307         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3308       }
 3309       break;
 3310     case Op_RotateRightV:
 3311     case Op_RotateLeftV:
 3312       if (bt != T_INT && bt != T_LONG) {
 3313         return false;
 3314       } // fallthrough
 3315     case Op_MacroLogicV:
 3316       if (!VM_Version::supports_evex() ||
 3317           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3318         return false;
 3319       }
 3320       break;
 3321     case Op_ClearArray:
 3322     case Op_VectorMaskGen:
 3323     case Op_VectorCmpMasked:
 3324       if (!VM_Version::supports_avx512bw()) {
 3325         return false;
 3326       }
 3327       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3328         return false;
 3329       }
 3330       break;
 3331     case Op_LoadVectorMasked:
 3332     case Op_StoreVectorMasked:
 3333       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3334         return false;
 3335       }
 3336       break;
 3337     case Op_UMinV:
 3338     case Op_UMaxV:
 3339       if (UseAVX == 0) {
 3340         return false;
 3341       }
 3342       break;
 3343     case Op_MaxV:
 3344     case Op_MinV:
 3345       if (UseSSE < 4 && is_integral_type(bt)) {
 3346         return false;
 3347       }
 3348       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3349           // Float/Double intrinsics are enabled for AVX family currently.
 3350           if (UseAVX == 0) {
 3351             return false;
 3352           }
 3353           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3354             return false;
 3355           }
 3356       }
 3357       break;
 3358     case Op_CallLeafVector:
 3359       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3360         return false;
 3361       }
 3362       break;
 3363     case Op_AddReductionVI:
 3364       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3365         return false;
 3366       }
 3367       // fallthrough
 3368     case Op_AndReductionV:
 3369     case Op_OrReductionV:
 3370     case Op_XorReductionV:
 3371       if (is_subword_type(bt) && (UseSSE < 4)) {
 3372         return false;
 3373       }
 3374       break;
 3375     case Op_MinReductionV:
 3376     case Op_MaxReductionV:
 3377       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3378         return false;
 3379       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3380         return false;
 3381       }
 3382       // Float/Double intrinsics enabled for AVX family.
 3383       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3384         return false;
 3385       }
 3386       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3387         return false;
 3388       }
 3389       break;
 3390     case Op_VectorBlend:
 3391       if (UseAVX == 0 && size_in_bits < 128) {
 3392         return false;
 3393       }
 3394       break;
 3395     case Op_VectorTest:
 3396       if (UseSSE < 4) {
 3397         return false; // Implementation limitation
 3398       } else if (size_in_bits < 32) {
 3399         return false; // Implementation limitation
 3400       }
 3401       break;
 3402     case Op_VectorLoadShuffle:
 3403     case Op_VectorRearrange:
 3404       if(vlen == 2) {
 3405         return false; // Implementation limitation due to how shuffle is loaded
 3406       } else if (size_in_bits == 256 && UseAVX < 2) {
 3407         return false; // Implementation limitation
 3408       }
 3409       break;
 3410     case Op_VectorLoadMask:
 3411     case Op_VectorMaskCast:
 3412       if (size_in_bits == 256 && UseAVX < 2) {
 3413         return false; // Implementation limitation
 3414       }
 3415       // fallthrough
 3416     case Op_VectorStoreMask:
 3417       if (vlen == 2) {
 3418         return false; // Implementation limitation
 3419       }
 3420       break;
 3421     case Op_PopulateIndex:
 3422       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3423         return false;
 3424       }
 3425       break;
 3426     case Op_VectorCastB2X:
 3427     case Op_VectorCastS2X:
 3428     case Op_VectorCastI2X:
 3429       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3430         return false;
 3431       }
 3432       break;
 3433     case Op_VectorCastL2X:
 3434       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3435         return false;
 3436       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3437         return false;
 3438       }
 3439       break;
 3440     case Op_VectorCastF2X: {
 3441         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3442         // happen after intermediate conversion to integer and special handling
 3443         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3444         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3445         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3446           return false;
 3447         }
 3448       }
 3449       // fallthrough
 3450     case Op_VectorCastD2X:
 3451       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3452         return false;
 3453       }
 3454       break;
 3455     case Op_VectorCastF2HF:
 3456     case Op_VectorCastHF2F:
 3457       if (!VM_Version::supports_f16c() &&
 3458          ((!VM_Version::supports_evex() ||
 3459          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3460         return false;
 3461       }
 3462       break;
 3463     case Op_RoundVD:
 3464       if (!VM_Version::supports_avx512dq()) {
 3465         return false;
 3466       }
 3467       break;
 3468     case Op_MulReductionVI:
 3469       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3470         return false;
 3471       }
 3472       break;
 3473     case Op_LoadVectorGatherMasked:
 3474       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3475         return false;
 3476       }
 3477       if (is_subword_type(bt) &&
 3478          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3479           (size_in_bits < 64)                                      ||
 3480           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3481         return false;
 3482       }
 3483       break;
 3484     case Op_StoreVectorScatterMasked:
 3485     case Op_StoreVectorScatter:
 3486       if (is_subword_type(bt)) {
 3487         return false;
 3488       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3489         return false;
 3490       }
 3491       // fallthrough
 3492     case Op_LoadVectorGather:
 3493       if (!is_subword_type(bt) && size_in_bits == 64) {
 3494         return false;
 3495       }
 3496       if (is_subword_type(bt) && size_in_bits < 64) {
 3497         return false;
 3498       }
 3499       break;
 3500     case Op_SaturatingAddV:
 3501     case Op_SaturatingSubV:
 3502       if (UseAVX < 1) {
 3503         return false; // Implementation limitation
 3504       }
 3505       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3506         return false;
 3507       }
 3508       break;
 3509     case Op_SelectFromTwoVector:
 3510        if (size_in_bits < 128) {
 3511          return false;
 3512        }
 3513        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3514          return false;
 3515        }
 3516        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3517          return false;
 3518        }
 3519        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3520          return false;
 3521        }
 3522        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3523          return false;
 3524        }
 3525        break;
 3526     case Op_MaskAll:
 3527       if (!VM_Version::supports_evex()) {
 3528         return false;
 3529       }
 3530       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3531         return false;
 3532       }
 3533       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3534         return false;
 3535       }
 3536       break;
 3537     case Op_VectorMaskCmp:
 3538       if (vlen < 2 || size_in_bits < 32) {
 3539         return false;
 3540       }
 3541       break;
 3542     case Op_CompressM:
 3543       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3544         return false;
 3545       }
 3546       break;
 3547     case Op_CompressV:
 3548     case Op_ExpandV:
 3549       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3550         return false;
 3551       }
 3552       if (size_in_bits < 128 ) {
 3553         return false;
 3554       }
 3555     case Op_VectorLongToMask:
 3556       if (UseAVX < 1) {
 3557         return false;
 3558       }
 3559       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3560         return false;
 3561       }
 3562       break;
 3563     case Op_SignumVD:
 3564     case Op_SignumVF:
 3565       if (UseAVX < 1) {
 3566         return false;
 3567       }
 3568       break;
 3569     case Op_PopCountVI:
 3570     case Op_PopCountVL: {
 3571         if (!is_pop_count_instr_target(bt) &&
 3572             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3573           return false;
 3574         }
 3575       }
 3576       break;
 3577     case Op_ReverseV:
 3578     case Op_ReverseBytesV:
 3579       if (UseAVX < 2) {
 3580         return false;
 3581       }
 3582       break;
 3583     case Op_CountTrailingZerosV:
 3584     case Op_CountLeadingZerosV:
 3585       if (UseAVX < 2) {
 3586         return false;
 3587       }
 3588       break;
 3589   }
 3590   return true;  // Per default match rules are supported.
 3591 }
 3592 
 3593 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3594   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3595   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3596   // of their non-masked counterpart with mask edge being the differentiator.
 3597   // This routine does a strict check on the existence of masked operation patterns
 3598   // by returning a default false value for all the other opcodes apart from the
 3599   // ones whose masked instruction patterns are defined in this file.
 3600   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3601     return false;
 3602   }
 3603 
 3604   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3605   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3606     return false;
 3607   }
 3608   switch(opcode) {
 3609     // Unary masked operations
 3610     case Op_AbsVB:
 3611     case Op_AbsVS:
 3612       if(!VM_Version::supports_avx512bw()) {
 3613         return false;  // Implementation limitation
 3614       }
 3615     case Op_AbsVI:
 3616     case Op_AbsVL:
 3617       return true;
 3618 
 3619     // Ternary masked operations
 3620     case Op_FmaVF:
 3621     case Op_FmaVD:
 3622       return true;
 3623 
 3624     case Op_MacroLogicV:
 3625       if(bt != T_INT && bt != T_LONG) {
 3626         return false;
 3627       }
 3628       return true;
 3629 
 3630     // Binary masked operations
 3631     case Op_AddVB:
 3632     case Op_AddVS:
 3633     case Op_SubVB:
 3634     case Op_SubVS:
 3635     case Op_MulVS:
 3636     case Op_LShiftVS:
 3637     case Op_RShiftVS:
 3638     case Op_URShiftVS:
 3639       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3640       if (!VM_Version::supports_avx512bw()) {
 3641         return false;  // Implementation limitation
 3642       }
 3643       return true;
 3644 
 3645     case Op_MulVL:
 3646       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3647       if (!VM_Version::supports_avx512dq()) {
 3648         return false;  // Implementation limitation
 3649       }
 3650       return true;
 3651 
 3652     case Op_AndV:
 3653     case Op_OrV:
 3654     case Op_XorV:
 3655     case Op_RotateRightV:
 3656     case Op_RotateLeftV:
 3657       if (bt != T_INT && bt != T_LONG) {
 3658         return false; // Implementation limitation
 3659       }
 3660       return true;
 3661 
 3662     case Op_VectorLoadMask:
 3663       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3664       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3665         return false;
 3666       }
 3667       return true;
 3668 
 3669     case Op_AddVI:
 3670     case Op_AddVL:
 3671     case Op_AddVF:
 3672     case Op_AddVD:
 3673     case Op_SubVI:
 3674     case Op_SubVL:
 3675     case Op_SubVF:
 3676     case Op_SubVD:
 3677     case Op_MulVI:
 3678     case Op_MulVF:
 3679     case Op_MulVD:
 3680     case Op_DivVF:
 3681     case Op_DivVD:
 3682     case Op_SqrtVF:
 3683     case Op_SqrtVD:
 3684     case Op_LShiftVI:
 3685     case Op_LShiftVL:
 3686     case Op_RShiftVI:
 3687     case Op_RShiftVL:
 3688     case Op_URShiftVI:
 3689     case Op_URShiftVL:
 3690     case Op_LoadVectorMasked:
 3691     case Op_StoreVectorMasked:
 3692     case Op_LoadVectorGatherMasked:
 3693     case Op_StoreVectorScatterMasked:
 3694       return true;
 3695 
 3696     case Op_UMinV:
 3697     case Op_UMaxV:
 3698       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3699         return false;
 3700       } // fallthrough
 3701     case Op_MaxV:
 3702     case Op_MinV:
 3703       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3704         return false; // Implementation limitation
 3705       }
 3706       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3707         return false; // Implementation limitation
 3708       }
 3709       return true;
 3710     case Op_SaturatingAddV:
 3711     case Op_SaturatingSubV:
 3712       if (!is_subword_type(bt)) {
 3713         return false;
 3714       }
 3715       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3716         return false; // Implementation limitation
 3717       }
 3718       return true;
 3719 
 3720     case Op_VectorMaskCmp:
 3721       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3722         return false; // Implementation limitation
 3723       }
 3724       return true;
 3725 
 3726     case Op_VectorRearrange:
 3727       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3728         return false; // Implementation limitation
 3729       }
 3730       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3731         return false; // Implementation limitation
 3732       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3733         return false; // Implementation limitation
 3734       }
 3735       return true;
 3736 
 3737     // Binary Logical operations
 3738     case Op_AndVMask:
 3739     case Op_OrVMask:
 3740     case Op_XorVMask:
 3741       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3742         return false; // Implementation limitation
 3743       }
 3744       return true;
 3745 
 3746     case Op_PopCountVI:
 3747     case Op_PopCountVL:
 3748       if (!is_pop_count_instr_target(bt)) {
 3749         return false;
 3750       }
 3751       return true;
 3752 
 3753     case Op_MaskAll:
 3754       return true;
 3755 
 3756     case Op_CountLeadingZerosV:
 3757       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3758         return true;
 3759       }
 3760     default:
 3761       return false;
 3762   }
 3763 }
 3764 
 3765 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3766   return false;
 3767 }
 3768 
 3769 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3770 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3771   switch (elem_bt) {
 3772     case T_BYTE:  return false;
 3773     case T_SHORT: return !VM_Version::supports_avx512bw();
 3774     case T_INT:   return !VM_Version::supports_avx();
 3775     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3776     default:
 3777       ShouldNotReachHere();
 3778       return false;
 3779   }
 3780 }
 3781 
 3782 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 3783   // Prefer predicate if the mask type is "TypeVectMask".
 3784   return vt->isa_vectmask() != nullptr;
 3785 }
 3786 
 3787 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3788   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3789   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3790   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3791       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3792     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3793     return new legVecZOper();
 3794   }
 3795   if (legacy) {
 3796     switch (ideal_reg) {
 3797       case Op_VecS: return new legVecSOper();
 3798       case Op_VecD: return new legVecDOper();
 3799       case Op_VecX: return new legVecXOper();
 3800       case Op_VecY: return new legVecYOper();
 3801       case Op_VecZ: return new legVecZOper();
 3802     }
 3803   } else {
 3804     switch (ideal_reg) {
 3805       case Op_VecS: return new vecSOper();
 3806       case Op_VecD: return new vecDOper();
 3807       case Op_VecX: return new vecXOper();
 3808       case Op_VecY: return new vecYOper();
 3809       case Op_VecZ: return new vecZOper();
 3810     }
 3811   }
 3812   ShouldNotReachHere();
 3813   return nullptr;
 3814 }
 3815 
 3816 bool Matcher::is_reg2reg_move(MachNode* m) {
 3817   switch (m->rule()) {
 3818     case MoveVec2Leg_rule:
 3819     case MoveLeg2Vec_rule:
 3820     case MoveF2VL_rule:
 3821     case MoveF2LEG_rule:
 3822     case MoveVL2F_rule:
 3823     case MoveLEG2F_rule:
 3824     case MoveD2VL_rule:
 3825     case MoveD2LEG_rule:
 3826     case MoveVL2D_rule:
 3827     case MoveLEG2D_rule:
 3828       return true;
 3829     default:
 3830       return false;
 3831   }
 3832 }
 3833 
 3834 bool Matcher::is_generic_vector(MachOper* opnd) {
 3835   switch (opnd->opcode()) {
 3836     case VEC:
 3837     case LEGVEC:
 3838       return true;
 3839     default:
 3840       return false;
 3841   }
 3842 }
 3843 
 3844 //------------------------------------------------------------------------
 3845 
 3846 const RegMask* Matcher::predicate_reg_mask(void) {
 3847   return &_VECTMASK_REG_mask;
 3848 }
 3849 
 3850 // Max vector size in bytes. 0 if not supported.
 3851 int Matcher::vector_width_in_bytes(BasicType bt) {
 3852   assert(is_java_primitive(bt), "only primitive type vectors");
 3853   // SSE2 supports 128bit vectors for all types.
 3854   // AVX2 supports 256bit vectors for all types.
 3855   // AVX2/EVEX supports 512bit vectors for all types.
 3856   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3857   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3858   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3859     size = (UseAVX > 2) ? 64 : 32;
 3860   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3861     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3862   // Use flag to limit vector size.
 3863   size = MIN2(size,(int)MaxVectorSize);
 3864   // Minimum 2 values in vector (or 4 for bytes).
 3865   switch (bt) {
 3866   case T_DOUBLE:
 3867   case T_LONG:
 3868     if (size < 16) return 0;
 3869     break;
 3870   case T_FLOAT:
 3871   case T_INT:
 3872     if (size < 8) return 0;
 3873     break;
 3874   case T_BOOLEAN:
 3875     if (size < 4) return 0;
 3876     break;
 3877   case T_CHAR:
 3878     if (size < 4) return 0;
 3879     break;
 3880   case T_BYTE:
 3881     if (size < 4) return 0;
 3882     break;
 3883   case T_SHORT:
 3884     if (size < 4) return 0;
 3885     break;
 3886   default:
 3887     ShouldNotReachHere();
 3888   }
 3889   return size;
 3890 }
 3891 
 3892 // Limits on vector size (number of elements) loaded into vector.
 3893 int Matcher::max_vector_size(const BasicType bt) {
 3894   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3895 }
 3896 int Matcher::min_vector_size(const BasicType bt) {
 3897   int max_size = max_vector_size(bt);
 3898   // Min size which can be loaded into vector is 4 bytes.
 3899   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3900   // Support for calling svml double64 vectors
 3901   if (bt == T_DOUBLE) {
 3902     size = 1;
 3903   }
 3904   return MIN2(size,max_size);
 3905 }
 3906 
 3907 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3908   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3909   // by default on Cascade Lake
 3910   if (VM_Version::is_default_intel_cascade_lake()) {
 3911     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3912   }
 3913   return Matcher::max_vector_size(bt);
 3914 }
 3915 
 3916 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3917   return -1;
 3918 }
 3919 
 3920 // Vector ideal reg corresponding to specified size in bytes
 3921 uint Matcher::vector_ideal_reg(int size) {
 3922   assert(MaxVectorSize >= size, "");
 3923   switch(size) {
 3924     case  4: return Op_VecS;
 3925     case  8: return Op_VecD;
 3926     case 16: return Op_VecX;
 3927     case 32: return Op_VecY;
 3928     case 64: return Op_VecZ;
 3929   }
 3930   ShouldNotReachHere();
 3931   return 0;
 3932 }
 3933 
 3934 // Check for shift by small constant as well
 3935 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3936   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3937       shift->in(2)->get_int() <= 3 &&
 3938       // Are there other uses besides address expressions?
 3939       !matcher->is_visited(shift)) {
 3940     address_visited.set(shift->_idx); // Flag as address_visited
 3941     mstack.push(shift->in(2), Matcher::Visit);
 3942     Node *conv = shift->in(1);
 3943     // Allow Matcher to match the rule which bypass
 3944     // ConvI2L operation for an array index on LP64
 3945     // if the index value is positive.
 3946     if (conv->Opcode() == Op_ConvI2L &&
 3947         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3948         // Are there other uses besides address expressions?
 3949         !matcher->is_visited(conv)) {
 3950       address_visited.set(conv->_idx); // Flag as address_visited
 3951       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3952     } else {
 3953       mstack.push(conv, Matcher::Pre_Visit);
 3954     }
 3955     return true;
 3956   }
 3957   return false;
 3958 }
 3959 
 3960 // This function identifies sub-graphs in which a 'load' node is
 3961 // input to two different nodes, and such that it can be matched
 3962 // with BMI instructions like blsi, blsr, etc.
 3963 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3964 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3965 // refers to the same node.
 3966 //
 3967 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3968 // This is a temporary solution until we make DAGs expressible in ADL.
 3969 template<typename ConType>
 3970 class FusedPatternMatcher {
 3971   Node* _op1_node;
 3972   Node* _mop_node;
 3973   int _con_op;
 3974 
 3975   static int match_next(Node* n, int next_op, int next_op_idx) {
 3976     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3977       return -1;
 3978     }
 3979 
 3980     if (next_op_idx == -1) { // n is commutative, try rotations
 3981       if (n->in(1)->Opcode() == next_op) {
 3982         return 1;
 3983       } else if (n->in(2)->Opcode() == next_op) {
 3984         return 2;
 3985       }
 3986     } else {
 3987       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 3988       if (n->in(next_op_idx)->Opcode() == next_op) {
 3989         return next_op_idx;
 3990       }
 3991     }
 3992     return -1;
 3993   }
 3994 
 3995  public:
 3996   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 3997     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 3998 
 3999   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 4000              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 4001              typename ConType::NativeType con_value) {
 4002     if (_op1_node->Opcode() != op1) {
 4003       return false;
 4004     }
 4005     if (_mop_node->outcnt() > 2) {
 4006       return false;
 4007     }
 4008     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 4009     if (op1_op2_idx == -1) {
 4010       return false;
 4011     }
 4012     // Memory operation must be the other edge
 4013     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 4014 
 4015     // Check that the mop node is really what we want
 4016     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 4017       Node* op2_node = _op1_node->in(op1_op2_idx);
 4018       if (op2_node->outcnt() > 1) {
 4019         return false;
 4020       }
 4021       assert(op2_node->Opcode() == op2, "Should be");
 4022       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 4023       if (op2_con_idx == -1) {
 4024         return false;
 4025       }
 4026       // Memory operation must be the other edge
 4027       int op2_mop_idx = (op2_con_idx & 1) + 1;
 4028       // Check that the memory operation is the same node
 4029       if (op2_node->in(op2_mop_idx) == _mop_node) {
 4030         // Now check the constant
 4031         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 4032         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 4033           return true;
 4034         }
 4035       }
 4036     }
 4037     return false;
 4038   }
 4039 };
 4040 
 4041 static bool is_bmi_pattern(Node* n, Node* m) {
 4042   assert(UseBMI1Instructions, "sanity");
 4043   if (n != nullptr && m != nullptr) {
 4044     if (m->Opcode() == Op_LoadI) {
 4045       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 4046       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 4047              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 4048              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 4049     } else if (m->Opcode() == Op_LoadL) {
 4050       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 4051       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 4052              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 4053              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 4054     }
 4055   }
 4056   return false;
 4057 }
 4058 
 4059 // Should the matcher clone input 'm' of node 'n'?
 4060 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 4061   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 4062   if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
 4063     mstack.push(m, Visit);
 4064     return true;
 4065   }
 4066   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 4067     mstack.push(m, Visit);           // m = ShiftCntV
 4068     return true;
 4069   }
 4070   if (is_encode_and_store_pattern(n, m)) {
 4071     mstack.push(m, Visit);
 4072     return true;
 4073   }
 4074   return false;
 4075 }
 4076 
 4077 // Should the Matcher clone shifts on addressing modes, expecting them
 4078 // to be subsumed into complex addressing expressions or compute them
 4079 // into registers?
 4080 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4081   Node *off = m->in(AddPNode::Offset);
 4082   if (off->is_Con()) {
 4083     address_visited.test_set(m->_idx); // Flag as address_visited
 4084     Node *adr = m->in(AddPNode::Address);
 4085 
 4086     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4087     // AtomicAdd is not an addressing expression.
 4088     // Cheap to find it by looking for screwy base.
 4089     if (adr->is_AddP() &&
 4090         !adr->in(AddPNode::Base)->is_top() &&
 4091         !adr->in(AddPNode::Offset)->is_Con() &&
 4092         off->get_long() == (int) (off->get_long()) && // immL32
 4093         // Are there other uses besides address expressions?
 4094         !is_visited(adr)) {
 4095       address_visited.set(adr->_idx); // Flag as address_visited
 4096       Node *shift = adr->in(AddPNode::Offset);
 4097       if (!clone_shift(shift, this, mstack, address_visited)) {
 4098         mstack.push(shift, Pre_Visit);
 4099       }
 4100       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4101       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4102     } else {
 4103       mstack.push(adr, Pre_Visit);
 4104     }
 4105 
 4106     // Clone X+offset as it also folds into most addressing expressions
 4107     mstack.push(off, Visit);
 4108     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4109     return true;
 4110   } else if (clone_shift(off, this, mstack, address_visited)) {
 4111     address_visited.test_set(m->_idx); // Flag as address_visited
 4112     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4113     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4114     return true;
 4115   }
 4116   return false;
 4117 }
 4118 
 4119 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4120   switch (bt) {
 4121     case BoolTest::eq:
 4122       return Assembler::eq;
 4123     case BoolTest::ne:
 4124       return Assembler::neq;
 4125     case BoolTest::le:
 4126     case BoolTest::ule:
 4127       return Assembler::le;
 4128     case BoolTest::ge:
 4129     case BoolTest::uge:
 4130       return Assembler::nlt;
 4131     case BoolTest::lt:
 4132     case BoolTest::ult:
 4133       return Assembler::lt;
 4134     case BoolTest::gt:
 4135     case BoolTest::ugt:
 4136       return Assembler::nle;
 4137     default : ShouldNotReachHere(); return Assembler::_false;
 4138   }
 4139 }
 4140 
 4141 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4142   switch (bt) {
 4143   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4144   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4145   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4146   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4147   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4148   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4149   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4150   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4151   }
 4152 }
 4153 
 4154 // Helper methods for MachSpillCopyNode::implementation().
 4155 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4156                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4157   assert(ireg == Op_VecS || // 32bit vector
 4158          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4159           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4160          "no non-adjacent vector moves" );
 4161   if (masm) {
 4162     switch (ireg) {
 4163     case Op_VecS: // copy whole register
 4164     case Op_VecD:
 4165     case Op_VecX:
 4166       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4167         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4168       } else {
 4169         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4170      }
 4171       break;
 4172     case Op_VecY:
 4173       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4174         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4175       } else {
 4176         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4177      }
 4178       break;
 4179     case Op_VecZ:
 4180       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4181       break;
 4182     default:
 4183       ShouldNotReachHere();
 4184     }
 4185 #ifndef PRODUCT
 4186   } else {
 4187     switch (ireg) {
 4188     case Op_VecS:
 4189     case Op_VecD:
 4190     case Op_VecX:
 4191       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4192       break;
 4193     case Op_VecY:
 4194     case Op_VecZ:
 4195       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4196       break;
 4197     default:
 4198       ShouldNotReachHere();
 4199     }
 4200 #endif
 4201   }
 4202 }
 4203 
 4204 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4205                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4206   if (masm) {
 4207     if (is_load) {
 4208       switch (ireg) {
 4209       case Op_VecS:
 4210         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4211         break;
 4212       case Op_VecD:
 4213         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4214         break;
 4215       case Op_VecX:
 4216         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4217           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4218         } else {
 4219           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4220           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4221         }
 4222         break;
 4223       case Op_VecY:
 4224         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4225           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4226         } else {
 4227           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4228           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4229         }
 4230         break;
 4231       case Op_VecZ:
 4232         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4233         break;
 4234       default:
 4235         ShouldNotReachHere();
 4236       }
 4237     } else { // store
 4238       switch (ireg) {
 4239       case Op_VecS:
 4240         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4241         break;
 4242       case Op_VecD:
 4243         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4244         break;
 4245       case Op_VecX:
 4246         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4247           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4248         }
 4249         else {
 4250           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4251         }
 4252         break;
 4253       case Op_VecY:
 4254         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4255           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4256         }
 4257         else {
 4258           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4259         }
 4260         break;
 4261       case Op_VecZ:
 4262         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4263         break;
 4264       default:
 4265         ShouldNotReachHere();
 4266       }
 4267     }
 4268 #ifndef PRODUCT
 4269   } else {
 4270     if (is_load) {
 4271       switch (ireg) {
 4272       case Op_VecS:
 4273         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4274         break;
 4275       case Op_VecD:
 4276         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4277         break;
 4278        case Op_VecX:
 4279         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4280         break;
 4281       case Op_VecY:
 4282       case Op_VecZ:
 4283         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4284         break;
 4285       default:
 4286         ShouldNotReachHere();
 4287       }
 4288     } else { // store
 4289       switch (ireg) {
 4290       case Op_VecS:
 4291         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4292         break;
 4293       case Op_VecD:
 4294         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4295         break;
 4296        case Op_VecX:
 4297         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4298         break;
 4299       case Op_VecY:
 4300       case Op_VecZ:
 4301         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4302         break;
 4303       default:
 4304         ShouldNotReachHere();
 4305       }
 4306     }
 4307 #endif
 4308   }
 4309 }
 4310 
 4311 template <class T>
 4312 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4313   int size = type2aelembytes(bt) * len;
 4314   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4315   for (int i = 0; i < len; i++) {
 4316     int offset = i * type2aelembytes(bt);
 4317     switch (bt) {
 4318       case T_BYTE: val->at(i) = con; break;
 4319       case T_SHORT: {
 4320         jshort c = con;
 4321         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4322         break;
 4323       }
 4324       case T_INT: {
 4325         jint c = con;
 4326         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4327         break;
 4328       }
 4329       case T_LONG: {
 4330         jlong c = con;
 4331         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4332         break;
 4333       }
 4334       case T_FLOAT: {
 4335         jfloat c = con;
 4336         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4337         break;
 4338       }
 4339       case T_DOUBLE: {
 4340         jdouble c = con;
 4341         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4342         break;
 4343       }
 4344       default: assert(false, "%s", type2name(bt));
 4345     }
 4346   }
 4347   return val;
 4348 }
 4349 
 4350 static inline jlong high_bit_set(BasicType bt) {
 4351   switch (bt) {
 4352     case T_BYTE:  return 0x8080808080808080;
 4353     case T_SHORT: return 0x8000800080008000;
 4354     case T_INT:   return 0x8000000080000000;
 4355     case T_LONG:  return 0x8000000000000000;
 4356     default:
 4357       ShouldNotReachHere();
 4358       return 0;
 4359   }
 4360 }
 4361 
 4362 #ifndef PRODUCT
 4363   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4364     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4365   }
 4366 #endif
 4367 
 4368   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4369     __ nop(_count);
 4370   }
 4371 
 4372   uint MachNopNode::size(PhaseRegAlloc*) const {
 4373     return _count;
 4374   }
 4375 
 4376 #ifndef PRODUCT
 4377   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4378     st->print("# breakpoint");
 4379   }
 4380 #endif
 4381 
 4382   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4383     __ int3();
 4384   }
 4385 
 4386   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4387     return MachNode::size(ra_);
 4388   }
 4389 
 4390 %}
 4391 
 4392 //----------ENCODING BLOCK-----------------------------------------------------
 4393 // This block specifies the encoding classes used by the compiler to
 4394 // output byte streams.  Encoding classes are parameterized macros
 4395 // used by Machine Instruction Nodes in order to generate the bit
 4396 // encoding of the instruction.  Operands specify their base encoding
 4397 // interface with the interface keyword.  There are currently
 4398 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4399 // COND_INTER.  REG_INTER causes an operand to generate a function
 4400 // which returns its register number when queried.  CONST_INTER causes
 4401 // an operand to generate a function which returns the value of the
 4402 // constant when queried.  MEMORY_INTER causes an operand to generate
 4403 // four functions which return the Base Register, the Index Register,
 4404 // the Scale Value, and the Offset Value of the operand when queried.
 4405 // COND_INTER causes an operand to generate six functions which return
 4406 // the encoding code (ie - encoding bits for the instruction)
 4407 // associated with each basic boolean condition for a conditional
 4408 // instruction.
 4409 //
 4410 // Instructions specify two basic values for encoding.  Again, a
 4411 // function is available to check if the constant displacement is an
 4412 // oop. They use the ins_encode keyword to specify their encoding
 4413 // classes (which must be a sequence of enc_class names, and their
 4414 // parameters, specified in the encoding block), and they use the
 4415 // opcode keyword to specify, in order, their primary, secondary, and
 4416 // tertiary opcode.  Only the opcode sections which a particular
 4417 // instruction needs for encoding need to be specified.
 4418 encode %{
 4419   enc_class cdql_enc(no_rax_rdx_RegI div)
 4420   %{
 4421     // Full implementation of Java idiv and irem; checks for
 4422     // special case as described in JVM spec., p.243 & p.271.
 4423     //
 4424     //         normal case                           special case
 4425     //
 4426     // input : rax: dividend                         min_int
 4427     //         reg: divisor                          -1
 4428     //
 4429     // output: rax: quotient  (= rax idiv reg)       min_int
 4430     //         rdx: remainder (= rax irem reg)       0
 4431     //
 4432     //  Code sequnce:
 4433     //
 4434     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4435     //    5:   75 07/08                jne    e <normal>
 4436     //    7:   33 d2                   xor    %edx,%edx
 4437     //  [div >= 8 -> offset + 1]
 4438     //  [REX_B]
 4439     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4440     //    c:   74 03/04                je     11 <done>
 4441     // 000000000000000e <normal>:
 4442     //    e:   99                      cltd
 4443     //  [div >= 8 -> offset + 1]
 4444     //  [REX_B]
 4445     //    f:   f7 f9                   idiv   $div
 4446     // 0000000000000011 <done>:
 4447     Label normal;
 4448     Label done;
 4449 
 4450     // cmp    $0x80000000,%eax
 4451     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4452 
 4453     // jne    e <normal>
 4454     __ jccb(Assembler::notEqual, normal);
 4455 
 4456     // xor    %edx,%edx
 4457     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4458 
 4459     // cmp    $0xffffffffffffffff,%ecx
 4460     __ cmpl($div$$Register, -1);
 4461 
 4462     // je     11 <done>
 4463     __ jccb(Assembler::equal, done);
 4464 
 4465     // <normal>
 4466     // cltd
 4467     __ bind(normal);
 4468     __ cdql();
 4469 
 4470     // idivl
 4471     // <done>
 4472     __ idivl($div$$Register);
 4473     __ bind(done);
 4474   %}
 4475 
 4476   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4477   %{
 4478     // Full implementation of Java ldiv and lrem; checks for
 4479     // special case as described in JVM spec., p.243 & p.271.
 4480     //
 4481     //         normal case                           special case
 4482     //
 4483     // input : rax: dividend                         min_long
 4484     //         reg: divisor                          -1
 4485     //
 4486     // output: rax: quotient  (= rax idiv reg)       min_long
 4487     //         rdx: remainder (= rax irem reg)       0
 4488     //
 4489     //  Code sequnce:
 4490     //
 4491     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4492     //    7:   00 00 80
 4493     //    a:   48 39 d0                cmp    %rdx,%rax
 4494     //    d:   75 08                   jne    17 <normal>
 4495     //    f:   33 d2                   xor    %edx,%edx
 4496     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4497     //   15:   74 05                   je     1c <done>
 4498     // 0000000000000017 <normal>:
 4499     //   17:   48 99                   cqto
 4500     //   19:   48 f7 f9                idiv   $div
 4501     // 000000000000001c <done>:
 4502     Label normal;
 4503     Label done;
 4504 
 4505     // mov    $0x8000000000000000,%rdx
 4506     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4507 
 4508     // cmp    %rdx,%rax
 4509     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4510 
 4511     // jne    17 <normal>
 4512     __ jccb(Assembler::notEqual, normal);
 4513 
 4514     // xor    %edx,%edx
 4515     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4516 
 4517     // cmp    $0xffffffffffffffff,$div
 4518     __ cmpq($div$$Register, -1);
 4519 
 4520     // je     1e <done>
 4521     __ jccb(Assembler::equal, done);
 4522 
 4523     // <normal>
 4524     // cqto
 4525     __ bind(normal);
 4526     __ cdqq();
 4527 
 4528     // idivq (note: must be emitted by the user of this rule)
 4529     // <done>
 4530     __ idivq($div$$Register);
 4531     __ bind(done);
 4532   %}
 4533 
 4534   enc_class clear_avx %{
 4535     DEBUG_ONLY(int off0 = __ offset());
 4536     if (generate_vzeroupper(Compile::current())) {
 4537       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4538       // Clear upper bits of YMM registers when current compiled code uses
 4539       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4540       __ vzeroupper();
 4541     }
 4542     DEBUG_ONLY(int off1 = __ offset());
 4543     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4544   %}
 4545 
 4546   enc_class Java_To_Runtime(method meth) %{
 4547     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4548     __ call(r10);
 4549     __ post_call_nop();
 4550   %}
 4551 
 4552   enc_class Java_Static_Call(method meth)
 4553   %{
 4554     // JAVA STATIC CALL
 4555     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4556     // determine who we intended to call.
 4557     if (!_method) {
 4558       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4559     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4560       // The NOP here is purely to ensure that eliding a call to
 4561       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4562       __ addr_nop_5();
 4563       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4564     } else {
 4565       int method_index = resolved_method_index(masm);
 4566       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4567                                                   : static_call_Relocation::spec(method_index);
 4568       address mark = __ pc();
 4569       int call_offset = __ offset();
 4570       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4571       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4572         // Calls of the same statically bound method can share
 4573         // a stub to the interpreter.
 4574         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4575       } else {
 4576         // Emit stubs for static call.
 4577         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4578         __ clear_inst_mark();
 4579         if (stub == nullptr) {
 4580           ciEnv::current()->record_failure("CodeCache is full");
 4581           return;
 4582         }
 4583       }
 4584     }
 4585     __ post_call_nop();
 4586   %}
 4587 
 4588   enc_class Java_Dynamic_Call(method meth) %{
 4589     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4590     __ post_call_nop();
 4591   %}
 4592 
 4593   enc_class call_epilog %{
 4594     if (VerifyStackAtCalls) {
 4595       // Check that stack depth is unchanged: find majik cookie on stack
 4596       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4597       Label L;
 4598       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4599       __ jccb(Assembler::equal, L);
 4600       // Die if stack mismatch
 4601       __ int3();
 4602       __ bind(L);
 4603     }
 4604   %}
 4605 
 4606 %}
 4607 
 4608 //----------FRAME--------------------------------------------------------------
 4609 // Definition of frame structure and management information.
 4610 //
 4611 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4612 //                             |   (to get allocators register number
 4613 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4614 //  r   CALLER     |        |
 4615 //  o     |        +--------+      pad to even-align allocators stack-slot
 4616 //  w     V        |  pad0  |        numbers; owned by CALLER
 4617 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4618 //  h     ^        |   in   |  5
 4619 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4620 //  |     |        |        |  3
 4621 //  |     |        +--------+
 4622 //  V     |        | old out|      Empty on Intel, window on Sparc
 4623 //        |    old |preserve|      Must be even aligned.
 4624 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4625 //        |        |   in   |  3   area for Intel ret address
 4626 //     Owned by    |preserve|      Empty on Sparc.
 4627 //       SELF      +--------+
 4628 //        |        |  pad2  |  2   pad to align old SP
 4629 //        |        +--------+  1
 4630 //        |        | locks  |  0
 4631 //        |        +--------+----> OptoReg::stack0(), even aligned
 4632 //        |        |  pad1  | 11   pad to align new SP
 4633 //        |        +--------+
 4634 //        |        |        | 10
 4635 //        |        | spills |  9   spills
 4636 //        V        |        |  8   (pad0 slot for callee)
 4637 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4638 //        ^        |  out   |  7
 4639 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4640 //     Owned by    +--------+
 4641 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4642 //        |    new |preserve|      Must be even-aligned.
 4643 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4644 //        |        |        |
 4645 //
 4646 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4647 //         known from SELF's arguments and the Java calling convention.
 4648 //         Region 6-7 is determined per call site.
 4649 // Note 2: If the calling convention leaves holes in the incoming argument
 4650 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4651 //         are owned by the CALLEE.  Holes should not be necessary in the
 4652 //         incoming area, as the Java calling convention is completely under
 4653 //         the control of the AD file.  Doubles can be sorted and packed to
 4654 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4655 //         varargs C calling conventions.
 4656 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4657 //         even aligned with pad0 as needed.
 4658 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4659 //         region 6-11 is even aligned; it may be padded out more so that
 4660 //         the region from SP to FP meets the minimum stack alignment.
 4661 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4662 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4663 //         SP meets the minimum alignment.
 4664 
 4665 frame
 4666 %{
 4667   // These three registers define part of the calling convention
 4668   // between compiled code and the interpreter.
 4669   inline_cache_reg(RAX);                // Inline Cache Register
 4670 
 4671   // Optional: name the operand used by cisc-spilling to access
 4672   // [stack_pointer + offset]
 4673   cisc_spilling_operand_name(indOffset32);
 4674 
 4675   // Number of stack slots consumed by locking an object
 4676   sync_stack_slots(2);
 4677 
 4678   // Compiled code's Frame Pointer
 4679   frame_pointer(RSP);
 4680 
 4681   // Interpreter stores its frame pointer in a register which is
 4682   // stored to the stack by I2CAdaptors.
 4683   // I2CAdaptors convert from interpreted java to compiled java.
 4684   interpreter_frame_pointer(RBP);
 4685 
 4686   // Stack alignment requirement
 4687   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4688 
 4689   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4690   // for calls to C.  Supports the var-args backing area for register parms.
 4691   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4692 
 4693   // The after-PROLOG location of the return address.  Location of
 4694   // return address specifies a type (REG or STACK) and a number
 4695   // representing the register number (i.e. - use a register name) or
 4696   // stack slot.
 4697   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4698   // Otherwise, it is above the locks and verification slot and alignment word
 4699   return_addr(STACK - 2 +
 4700               align_up((Compile::current()->in_preserve_stack_slots() +
 4701                         Compile::current()->fixed_slots()),
 4702                        stack_alignment_in_slots()));
 4703 
 4704   // Location of compiled Java return values.  Same as C for now.
 4705   return_value
 4706   %{
 4707     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4708            "only return normal values");
 4709 
 4710     static const int lo[Op_RegL + 1] = {
 4711       0,
 4712       0,
 4713       RAX_num,  // Op_RegN
 4714       RAX_num,  // Op_RegI
 4715       RAX_num,  // Op_RegP
 4716       XMM0_num, // Op_RegF
 4717       XMM0_num, // Op_RegD
 4718       RAX_num   // Op_RegL
 4719     };
 4720     static const int hi[Op_RegL + 1] = {
 4721       0,
 4722       0,
 4723       OptoReg::Bad, // Op_RegN
 4724       OptoReg::Bad, // Op_RegI
 4725       RAX_H_num,    // Op_RegP
 4726       OptoReg::Bad, // Op_RegF
 4727       XMM0b_num,    // Op_RegD
 4728       RAX_H_num     // Op_RegL
 4729     };
 4730     // Excluded flags and vector registers.
 4731     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4732     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4733   %}
 4734 %}
 4735 
 4736 //----------ATTRIBUTES---------------------------------------------------------
 4737 //----------Operand Attributes-------------------------------------------------
 4738 op_attrib op_cost(0);        // Required cost attribute
 4739 
 4740 //----------Instruction Attributes---------------------------------------------
 4741 ins_attrib ins_cost(100);       // Required cost attribute
 4742 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4743 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4744                                 // a non-matching short branch variant
 4745                                 // of some long branch?
 4746 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4747                                 // be a power of 2) specifies the
 4748                                 // alignment that some part of the
 4749                                 // instruction (not necessarily the
 4750                                 // start) requires.  If > 1, a
 4751                                 // compute_padding() function must be
 4752                                 // provided for the instruction
 4753 
 4754 // Whether this node is expanded during code emission into a sequence of
 4755 // instructions and the first instruction can perform an implicit null check.
 4756 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4757 
 4758 //----------OPERANDS-----------------------------------------------------------
 4759 // Operand definitions must precede instruction definitions for correct parsing
 4760 // in the ADLC because operands constitute user defined types which are used in
 4761 // instruction definitions.
 4762 
 4763 //----------Simple Operands----------------------------------------------------
 4764 // Immediate Operands
 4765 // Integer Immediate
 4766 operand immI()
 4767 %{
 4768   match(ConI);
 4769 
 4770   op_cost(10);
 4771   format %{ %}
 4772   interface(CONST_INTER);
 4773 %}
 4774 
 4775 // Constant for test vs zero
 4776 operand immI_0()
 4777 %{
 4778   predicate(n->get_int() == 0);
 4779   match(ConI);
 4780 
 4781   op_cost(0);
 4782   format %{ %}
 4783   interface(CONST_INTER);
 4784 %}
 4785 
 4786 // Constant for increment
 4787 operand immI_1()
 4788 %{
 4789   predicate(n->get_int() == 1);
 4790   match(ConI);
 4791 
 4792   op_cost(0);
 4793   format %{ %}
 4794   interface(CONST_INTER);
 4795 %}
 4796 
 4797 // Constant for decrement
 4798 operand immI_M1()
 4799 %{
 4800   predicate(n->get_int() == -1);
 4801   match(ConI);
 4802 
 4803   op_cost(0);
 4804   format %{ %}
 4805   interface(CONST_INTER);
 4806 %}
 4807 
 4808 operand immI_2()
 4809 %{
 4810   predicate(n->get_int() == 2);
 4811   match(ConI);
 4812 
 4813   op_cost(0);
 4814   format %{ %}
 4815   interface(CONST_INTER);
 4816 %}
 4817 
 4818 operand immI_4()
 4819 %{
 4820   predicate(n->get_int() == 4);
 4821   match(ConI);
 4822 
 4823   op_cost(0);
 4824   format %{ %}
 4825   interface(CONST_INTER);
 4826 %}
 4827 
 4828 operand immI_8()
 4829 %{
 4830   predicate(n->get_int() == 8);
 4831   match(ConI);
 4832 
 4833   op_cost(0);
 4834   format %{ %}
 4835   interface(CONST_INTER);
 4836 %}
 4837 
 4838 // Valid scale values for addressing modes
 4839 operand immI2()
 4840 %{
 4841   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4842   match(ConI);
 4843 
 4844   format %{ %}
 4845   interface(CONST_INTER);
 4846 %}
 4847 
 4848 operand immU7()
 4849 %{
 4850   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4851   match(ConI);
 4852 
 4853   op_cost(5);
 4854   format %{ %}
 4855   interface(CONST_INTER);
 4856 %}
 4857 
 4858 operand immI8()
 4859 %{
 4860   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4861   match(ConI);
 4862 
 4863   op_cost(5);
 4864   format %{ %}
 4865   interface(CONST_INTER);
 4866 %}
 4867 
 4868 operand immU8()
 4869 %{
 4870   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4871   match(ConI);
 4872 
 4873   op_cost(5);
 4874   format %{ %}
 4875   interface(CONST_INTER);
 4876 %}
 4877 
 4878 operand immI16()
 4879 %{
 4880   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4881   match(ConI);
 4882 
 4883   op_cost(10);
 4884   format %{ %}
 4885   interface(CONST_INTER);
 4886 %}
 4887 
 4888 // Int Immediate non-negative
 4889 operand immU31()
 4890 %{
 4891   predicate(n->get_int() >= 0);
 4892   match(ConI);
 4893 
 4894   op_cost(0);
 4895   format %{ %}
 4896   interface(CONST_INTER);
 4897 %}
 4898 
 4899 // Pointer Immediate
 4900 operand immP()
 4901 %{
 4902   match(ConP);
 4903 
 4904   op_cost(10);
 4905   format %{ %}
 4906   interface(CONST_INTER);
 4907 %}
 4908 
 4909 // Null Pointer Immediate
 4910 operand immP0()
 4911 %{
 4912   predicate(n->get_ptr() == 0);
 4913   match(ConP);
 4914 
 4915   op_cost(5);
 4916   format %{ %}
 4917   interface(CONST_INTER);
 4918 %}
 4919 
 4920 // Pointer Immediate
 4921 operand immN() %{
 4922   match(ConN);
 4923 
 4924   op_cost(10);
 4925   format %{ %}
 4926   interface(CONST_INTER);
 4927 %}
 4928 
 4929 operand immNKlass() %{
 4930   match(ConNKlass);
 4931 
 4932   op_cost(10);
 4933   format %{ %}
 4934   interface(CONST_INTER);
 4935 %}
 4936 
 4937 // Null Pointer Immediate
 4938 operand immN0() %{
 4939   predicate(n->get_narrowcon() == 0);
 4940   match(ConN);
 4941 
 4942   op_cost(5);
 4943   format %{ %}
 4944   interface(CONST_INTER);
 4945 %}
 4946 
 4947 operand immP31()
 4948 %{
 4949   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 4950             && (n->get_ptr() >> 31) == 0);
 4951   match(ConP);
 4952 
 4953   op_cost(5);
 4954   format %{ %}
 4955   interface(CONST_INTER);
 4956 %}
 4957 
 4958 
 4959 // Long Immediate
 4960 operand immL()
 4961 %{
 4962   match(ConL);
 4963 
 4964   op_cost(20);
 4965   format %{ %}
 4966   interface(CONST_INTER);
 4967 %}
 4968 
 4969 // Long Immediate 8-bit
 4970 operand immL8()
 4971 %{
 4972   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 4973   match(ConL);
 4974 
 4975   op_cost(5);
 4976   format %{ %}
 4977   interface(CONST_INTER);
 4978 %}
 4979 
 4980 // Long Immediate 32-bit unsigned
 4981 operand immUL32()
 4982 %{
 4983   predicate(n->get_long() == (unsigned int) (n->get_long()));
 4984   match(ConL);
 4985 
 4986   op_cost(10);
 4987   format %{ %}
 4988   interface(CONST_INTER);
 4989 %}
 4990 
 4991 // Long Immediate 32-bit signed
 4992 operand immL32()
 4993 %{
 4994   predicate(n->get_long() == (int) (n->get_long()));
 4995   match(ConL);
 4996 
 4997   op_cost(15);
 4998   format %{ %}
 4999   interface(CONST_INTER);
 5000 %}
 5001 
 5002 operand immL_Pow2()
 5003 %{
 5004   predicate(is_power_of_2((julong)n->get_long()));
 5005   match(ConL);
 5006 
 5007   op_cost(15);
 5008   format %{ %}
 5009   interface(CONST_INTER);
 5010 %}
 5011 
 5012 operand immL_NotPow2()
 5013 %{
 5014   predicate(is_power_of_2((julong)~n->get_long()));
 5015   match(ConL);
 5016 
 5017   op_cost(15);
 5018   format %{ %}
 5019   interface(CONST_INTER);
 5020 %}
 5021 
 5022 // Long Immediate zero
 5023 operand immL0()
 5024 %{
 5025   predicate(n->get_long() == 0L);
 5026   match(ConL);
 5027 
 5028   op_cost(10);
 5029   format %{ %}
 5030   interface(CONST_INTER);
 5031 %}
 5032 
 5033 // Constant for increment
 5034 operand immL1()
 5035 %{
 5036   predicate(n->get_long() == 1);
 5037   match(ConL);
 5038 
 5039   format %{ %}
 5040   interface(CONST_INTER);
 5041 %}
 5042 
 5043 // Constant for decrement
 5044 operand immL_M1()
 5045 %{
 5046   predicate(n->get_long() == -1);
 5047   match(ConL);
 5048 
 5049   format %{ %}
 5050   interface(CONST_INTER);
 5051 %}
 5052 
 5053 // Long Immediate: low 32-bit mask
 5054 operand immL_32bits()
 5055 %{
 5056   predicate(n->get_long() == 0xFFFFFFFFL);
 5057   match(ConL);
 5058   op_cost(20);
 5059 
 5060   format %{ %}
 5061   interface(CONST_INTER);
 5062 %}
 5063 
 5064 // Int Immediate: 2^n-1, positive
 5065 operand immI_Pow2M1()
 5066 %{
 5067   predicate((n->get_int() > 0)
 5068             && is_power_of_2((juint)n->get_int() + 1));
 5069   match(ConI);
 5070 
 5071   op_cost(20);
 5072   format %{ %}
 5073   interface(CONST_INTER);
 5074 %}
 5075 
 5076 // Float Immediate zero
 5077 operand immF0()
 5078 %{
 5079   predicate(jint_cast(n->getf()) == 0);
 5080   match(ConF);
 5081 
 5082   op_cost(5);
 5083   format %{ %}
 5084   interface(CONST_INTER);
 5085 %}
 5086 
 5087 // Float Immediate
 5088 operand immF()
 5089 %{
 5090   match(ConF);
 5091 
 5092   op_cost(15);
 5093   format %{ %}
 5094   interface(CONST_INTER);
 5095 %}
 5096 
 5097 // Half Float Immediate
 5098 operand immH()
 5099 %{
 5100   match(ConH);
 5101 
 5102   op_cost(15);
 5103   format %{ %}
 5104   interface(CONST_INTER);
 5105 %}
 5106 
 5107 // Double Immediate zero
 5108 operand immD0()
 5109 %{
 5110   predicate(jlong_cast(n->getd()) == 0);
 5111   match(ConD);
 5112 
 5113   op_cost(5);
 5114   format %{ %}
 5115   interface(CONST_INTER);
 5116 %}
 5117 
 5118 // Double Immediate
 5119 operand immD()
 5120 %{
 5121   match(ConD);
 5122 
 5123   op_cost(15);
 5124   format %{ %}
 5125   interface(CONST_INTER);
 5126 %}
 5127 
 5128 // Immediates for special shifts (sign extend)
 5129 
 5130 // Constants for increment
 5131 operand immI_16()
 5132 %{
 5133   predicate(n->get_int() == 16);
 5134   match(ConI);
 5135 
 5136   format %{ %}
 5137   interface(CONST_INTER);
 5138 %}
 5139 
 5140 operand immI_24()
 5141 %{
 5142   predicate(n->get_int() == 24);
 5143   match(ConI);
 5144 
 5145   format %{ %}
 5146   interface(CONST_INTER);
 5147 %}
 5148 
 5149 // Constant for byte-wide masking
 5150 operand immI_255()
 5151 %{
 5152   predicate(n->get_int() == 255);
 5153   match(ConI);
 5154 
 5155   format %{ %}
 5156   interface(CONST_INTER);
 5157 %}
 5158 
 5159 // Constant for short-wide masking
 5160 operand immI_65535()
 5161 %{
 5162   predicate(n->get_int() == 65535);
 5163   match(ConI);
 5164 
 5165   format %{ %}
 5166   interface(CONST_INTER);
 5167 %}
 5168 
 5169 // Constant for byte-wide masking
 5170 operand immL_255()
 5171 %{
 5172   predicate(n->get_long() == 255);
 5173   match(ConL);
 5174 
 5175   format %{ %}
 5176   interface(CONST_INTER);
 5177 %}
 5178 
 5179 // Constant for short-wide masking
 5180 operand immL_65535()
 5181 %{
 5182   predicate(n->get_long() == 65535);
 5183   match(ConL);
 5184 
 5185   format %{ %}
 5186   interface(CONST_INTER);
 5187 %}
 5188 
 5189 operand kReg()
 5190 %{
 5191   constraint(ALLOC_IN_RC(vectmask_reg));
 5192   match(RegVectMask);
 5193   format %{%}
 5194   interface(REG_INTER);
 5195 %}
 5196 
 5197 // Register Operands
 5198 // Integer Register
 5199 operand rRegI()
 5200 %{
 5201   constraint(ALLOC_IN_RC(int_reg));
 5202   match(RegI);
 5203 
 5204   match(rax_RegI);
 5205   match(rbx_RegI);
 5206   match(rcx_RegI);
 5207   match(rdx_RegI);
 5208   match(rdi_RegI);
 5209 
 5210   format %{ %}
 5211   interface(REG_INTER);
 5212 %}
 5213 
 5214 // Special Registers
 5215 operand rax_RegI()
 5216 %{
 5217   constraint(ALLOC_IN_RC(int_rax_reg));
 5218   match(RegI);
 5219   match(rRegI);
 5220 
 5221   format %{ "RAX" %}
 5222   interface(REG_INTER);
 5223 %}
 5224 
 5225 // Special Registers
 5226 operand rbx_RegI()
 5227 %{
 5228   constraint(ALLOC_IN_RC(int_rbx_reg));
 5229   match(RegI);
 5230   match(rRegI);
 5231 
 5232   format %{ "RBX" %}
 5233   interface(REG_INTER);
 5234 %}
 5235 
 5236 operand rcx_RegI()
 5237 %{
 5238   constraint(ALLOC_IN_RC(int_rcx_reg));
 5239   match(RegI);
 5240   match(rRegI);
 5241 
 5242   format %{ "RCX" %}
 5243   interface(REG_INTER);
 5244 %}
 5245 
 5246 operand rdx_RegI()
 5247 %{
 5248   constraint(ALLOC_IN_RC(int_rdx_reg));
 5249   match(RegI);
 5250   match(rRegI);
 5251 
 5252   format %{ "RDX" %}
 5253   interface(REG_INTER);
 5254 %}
 5255 
 5256 operand rdi_RegI()
 5257 %{
 5258   constraint(ALLOC_IN_RC(int_rdi_reg));
 5259   match(RegI);
 5260   match(rRegI);
 5261 
 5262   format %{ "RDI" %}
 5263   interface(REG_INTER);
 5264 %}
 5265 
 5266 operand no_rax_rdx_RegI()
 5267 %{
 5268   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5269   match(RegI);
 5270   match(rbx_RegI);
 5271   match(rcx_RegI);
 5272   match(rdi_RegI);
 5273 
 5274   format %{ %}
 5275   interface(REG_INTER);
 5276 %}
 5277 
 5278 operand no_rbp_r13_RegI()
 5279 %{
 5280   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5281   match(RegI);
 5282   match(rRegI);
 5283   match(rax_RegI);
 5284   match(rbx_RegI);
 5285   match(rcx_RegI);
 5286   match(rdx_RegI);
 5287   match(rdi_RegI);
 5288 
 5289   format %{ %}
 5290   interface(REG_INTER);
 5291 %}
 5292 
 5293 // Pointer Register
 5294 operand any_RegP()
 5295 %{
 5296   constraint(ALLOC_IN_RC(any_reg));
 5297   match(RegP);
 5298   match(rax_RegP);
 5299   match(rbx_RegP);
 5300   match(rdi_RegP);
 5301   match(rsi_RegP);
 5302   match(rbp_RegP);
 5303   match(r15_RegP);
 5304   match(rRegP);
 5305 
 5306   format %{ %}
 5307   interface(REG_INTER);
 5308 %}
 5309 
 5310 operand rRegP()
 5311 %{
 5312   constraint(ALLOC_IN_RC(ptr_reg));
 5313   match(RegP);
 5314   match(rax_RegP);
 5315   match(rbx_RegP);
 5316   match(rdi_RegP);
 5317   match(rsi_RegP);
 5318   match(rbp_RegP);  // See Q&A below about
 5319   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5320 
 5321   format %{ %}
 5322   interface(REG_INTER);
 5323 %}
 5324 
 5325 operand rRegN() %{
 5326   constraint(ALLOC_IN_RC(int_reg));
 5327   match(RegN);
 5328 
 5329   format %{ %}
 5330   interface(REG_INTER);
 5331 %}
 5332 
 5333 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5334 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5335 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5336 // The output of an instruction is controlled by the allocator, which respects
 5337 // register class masks, not match rules.  Unless an instruction mentions
 5338 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5339 // by the allocator as an input.
 5340 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5341 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5342 // result, RBP is not included in the output of the instruction either.
 5343 
 5344 // This operand is not allowed to use RBP even if
 5345 // RBP is not used to hold the frame pointer.
 5346 operand no_rbp_RegP()
 5347 %{
 5348   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5349   match(RegP);
 5350   match(rbx_RegP);
 5351   match(rsi_RegP);
 5352   match(rdi_RegP);
 5353 
 5354   format %{ %}
 5355   interface(REG_INTER);
 5356 %}
 5357 
 5358 // Special Registers
 5359 // Return a pointer value
 5360 operand rax_RegP()
 5361 %{
 5362   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5363   match(RegP);
 5364   match(rRegP);
 5365 
 5366   format %{ %}
 5367   interface(REG_INTER);
 5368 %}
 5369 
 5370 // Special Registers
 5371 // Return a compressed pointer value
 5372 operand rax_RegN()
 5373 %{
 5374   constraint(ALLOC_IN_RC(int_rax_reg));
 5375   match(RegN);
 5376   match(rRegN);
 5377 
 5378   format %{ %}
 5379   interface(REG_INTER);
 5380 %}
 5381 
 5382 // Used in AtomicAdd
 5383 operand rbx_RegP()
 5384 %{
 5385   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5386   match(RegP);
 5387   match(rRegP);
 5388 
 5389   format %{ %}
 5390   interface(REG_INTER);
 5391 %}
 5392 
 5393 operand rsi_RegP()
 5394 %{
 5395   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5396   match(RegP);
 5397   match(rRegP);
 5398 
 5399   format %{ %}
 5400   interface(REG_INTER);
 5401 %}
 5402 
 5403 operand rbp_RegP()
 5404 %{
 5405   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5406   match(RegP);
 5407   match(rRegP);
 5408 
 5409   format %{ %}
 5410   interface(REG_INTER);
 5411 %}
 5412 
 5413 // Used in rep stosq
 5414 operand rdi_RegP()
 5415 %{
 5416   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5417   match(RegP);
 5418   match(rRegP);
 5419 
 5420   format %{ %}
 5421   interface(REG_INTER);
 5422 %}
 5423 
 5424 operand r15_RegP()
 5425 %{
 5426   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5427   match(RegP);
 5428   match(rRegP);
 5429 
 5430   format %{ %}
 5431   interface(REG_INTER);
 5432 %}
 5433 
 5434 operand rRegL()
 5435 %{
 5436   constraint(ALLOC_IN_RC(long_reg));
 5437   match(RegL);
 5438   match(rax_RegL);
 5439   match(rdx_RegL);
 5440 
 5441   format %{ %}
 5442   interface(REG_INTER);
 5443 %}
 5444 
 5445 // Special Registers
 5446 operand no_rax_rdx_RegL()
 5447 %{
 5448   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5449   match(RegL);
 5450   match(rRegL);
 5451 
 5452   format %{ %}
 5453   interface(REG_INTER);
 5454 %}
 5455 
 5456 operand rax_RegL()
 5457 %{
 5458   constraint(ALLOC_IN_RC(long_rax_reg));
 5459   match(RegL);
 5460   match(rRegL);
 5461 
 5462   format %{ "RAX" %}
 5463   interface(REG_INTER);
 5464 %}
 5465 
 5466 operand rcx_RegL()
 5467 %{
 5468   constraint(ALLOC_IN_RC(long_rcx_reg));
 5469   match(RegL);
 5470   match(rRegL);
 5471 
 5472   format %{ %}
 5473   interface(REG_INTER);
 5474 %}
 5475 
 5476 operand rdx_RegL()
 5477 %{
 5478   constraint(ALLOC_IN_RC(long_rdx_reg));
 5479   match(RegL);
 5480   match(rRegL);
 5481 
 5482   format %{ %}
 5483   interface(REG_INTER);
 5484 %}
 5485 
 5486 operand r11_RegL()
 5487 %{
 5488   constraint(ALLOC_IN_RC(long_r11_reg));
 5489   match(RegL);
 5490   match(rRegL);
 5491 
 5492   format %{ %}
 5493   interface(REG_INTER);
 5494 %}
 5495 
 5496 operand no_rbp_r13_RegL()
 5497 %{
 5498   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5499   match(RegL);
 5500   match(rRegL);
 5501   match(rax_RegL);
 5502   match(rcx_RegL);
 5503   match(rdx_RegL);
 5504 
 5505   format %{ %}
 5506   interface(REG_INTER);
 5507 %}
 5508 
 5509 // Flags register, used as output of compare instructions
 5510 operand rFlagsReg()
 5511 %{
 5512   constraint(ALLOC_IN_RC(int_flags));
 5513   match(RegFlags);
 5514 
 5515   format %{ "RFLAGS" %}
 5516   interface(REG_INTER);
 5517 %}
 5518 
 5519 // Flags register, used as output of FLOATING POINT compare instructions
 5520 operand rFlagsRegU()
 5521 %{
 5522   constraint(ALLOC_IN_RC(int_flags));
 5523   match(RegFlags);
 5524 
 5525   format %{ "RFLAGS_U" %}
 5526   interface(REG_INTER);
 5527 %}
 5528 
 5529 operand rFlagsRegUCF() %{
 5530   constraint(ALLOC_IN_RC(int_flags));
 5531   match(RegFlags);
 5532   predicate(false);
 5533 
 5534   format %{ "RFLAGS_U_CF" %}
 5535   interface(REG_INTER);
 5536 %}
 5537 
 5538 // Float register operands
 5539 operand regF() %{
 5540    constraint(ALLOC_IN_RC(float_reg));
 5541    match(RegF);
 5542 
 5543    format %{ %}
 5544    interface(REG_INTER);
 5545 %}
 5546 
 5547 // Float register operands
 5548 operand legRegF() %{
 5549    constraint(ALLOC_IN_RC(float_reg_legacy));
 5550    match(RegF);
 5551 
 5552    format %{ %}
 5553    interface(REG_INTER);
 5554 %}
 5555 
 5556 // Float register operands
 5557 operand vlRegF() %{
 5558    constraint(ALLOC_IN_RC(float_reg_vl));
 5559    match(RegF);
 5560 
 5561    format %{ %}
 5562    interface(REG_INTER);
 5563 %}
 5564 
 5565 // Double register operands
 5566 operand regD() %{
 5567    constraint(ALLOC_IN_RC(double_reg));
 5568    match(RegD);
 5569 
 5570    format %{ %}
 5571    interface(REG_INTER);
 5572 %}
 5573 
 5574 // Double register operands
 5575 operand legRegD() %{
 5576    constraint(ALLOC_IN_RC(double_reg_legacy));
 5577    match(RegD);
 5578 
 5579    format %{ %}
 5580    interface(REG_INTER);
 5581 %}
 5582 
 5583 // Double register operands
 5584 operand vlRegD() %{
 5585    constraint(ALLOC_IN_RC(double_reg_vl));
 5586    match(RegD);
 5587 
 5588    format %{ %}
 5589    interface(REG_INTER);
 5590 %}
 5591 
 5592 //----------Memory Operands----------------------------------------------------
 5593 // Direct Memory Operand
 5594 // operand direct(immP addr)
 5595 // %{
 5596 //   match(addr);
 5597 
 5598 //   format %{ "[$addr]" %}
 5599 //   interface(MEMORY_INTER) %{
 5600 //     base(0xFFFFFFFF);
 5601 //     index(0x4);
 5602 //     scale(0x0);
 5603 //     disp($addr);
 5604 //   %}
 5605 // %}
 5606 
 5607 // Indirect Memory Operand
 5608 operand indirect(any_RegP reg)
 5609 %{
 5610   constraint(ALLOC_IN_RC(ptr_reg));
 5611   match(reg);
 5612 
 5613   format %{ "[$reg]" %}
 5614   interface(MEMORY_INTER) %{
 5615     base($reg);
 5616     index(0x4);
 5617     scale(0x0);
 5618     disp(0x0);
 5619   %}
 5620 %}
 5621 
 5622 // Indirect Memory Plus Short Offset Operand
 5623 operand indOffset8(any_RegP reg, immL8 off)
 5624 %{
 5625   constraint(ALLOC_IN_RC(ptr_reg));
 5626   match(AddP reg off);
 5627 
 5628   format %{ "[$reg + $off (8-bit)]" %}
 5629   interface(MEMORY_INTER) %{
 5630     base($reg);
 5631     index(0x4);
 5632     scale(0x0);
 5633     disp($off);
 5634   %}
 5635 %}
 5636 
 5637 // Indirect Memory Plus Long Offset Operand
 5638 operand indOffset32(any_RegP reg, immL32 off)
 5639 %{
 5640   constraint(ALLOC_IN_RC(ptr_reg));
 5641   match(AddP reg off);
 5642 
 5643   format %{ "[$reg + $off (32-bit)]" %}
 5644   interface(MEMORY_INTER) %{
 5645     base($reg);
 5646     index(0x4);
 5647     scale(0x0);
 5648     disp($off);
 5649   %}
 5650 %}
 5651 
 5652 // Indirect Memory Plus Index Register Plus Offset Operand
 5653 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5654 %{
 5655   constraint(ALLOC_IN_RC(ptr_reg));
 5656   match(AddP (AddP reg lreg) off);
 5657 
 5658   op_cost(10);
 5659   format %{"[$reg + $off + $lreg]" %}
 5660   interface(MEMORY_INTER) %{
 5661     base($reg);
 5662     index($lreg);
 5663     scale(0x0);
 5664     disp($off);
 5665   %}
 5666 %}
 5667 
 5668 // Indirect Memory Plus Index Register Plus Offset Operand
 5669 operand indIndex(any_RegP reg, rRegL lreg)
 5670 %{
 5671   constraint(ALLOC_IN_RC(ptr_reg));
 5672   match(AddP reg lreg);
 5673 
 5674   op_cost(10);
 5675   format %{"[$reg + $lreg]" %}
 5676   interface(MEMORY_INTER) %{
 5677     base($reg);
 5678     index($lreg);
 5679     scale(0x0);
 5680     disp(0x0);
 5681   %}
 5682 %}
 5683 
 5684 // Indirect Memory Times Scale Plus Index Register
 5685 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5686 %{
 5687   constraint(ALLOC_IN_RC(ptr_reg));
 5688   match(AddP reg (LShiftL lreg scale));
 5689 
 5690   op_cost(10);
 5691   format %{"[$reg + $lreg << $scale]" %}
 5692   interface(MEMORY_INTER) %{
 5693     base($reg);
 5694     index($lreg);
 5695     scale($scale);
 5696     disp(0x0);
 5697   %}
 5698 %}
 5699 
 5700 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5701 %{
 5702   constraint(ALLOC_IN_RC(ptr_reg));
 5703   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5704   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5705 
 5706   op_cost(10);
 5707   format %{"[$reg + pos $idx << $scale]" %}
 5708   interface(MEMORY_INTER) %{
 5709     base($reg);
 5710     index($idx);
 5711     scale($scale);
 5712     disp(0x0);
 5713   %}
 5714 %}
 5715 
 5716 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5717 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5718 %{
 5719   constraint(ALLOC_IN_RC(ptr_reg));
 5720   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5721 
 5722   op_cost(10);
 5723   format %{"[$reg + $off + $lreg << $scale]" %}
 5724   interface(MEMORY_INTER) %{
 5725     base($reg);
 5726     index($lreg);
 5727     scale($scale);
 5728     disp($off);
 5729   %}
 5730 %}
 5731 
 5732 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5733 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5734 %{
 5735   constraint(ALLOC_IN_RC(ptr_reg));
 5736   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5737   match(AddP (AddP reg (ConvI2L idx)) off);
 5738 
 5739   op_cost(10);
 5740   format %{"[$reg + $off + $idx]" %}
 5741   interface(MEMORY_INTER) %{
 5742     base($reg);
 5743     index($idx);
 5744     scale(0x0);
 5745     disp($off);
 5746   %}
 5747 %}
 5748 
 5749 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5750 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5751 %{
 5752   constraint(ALLOC_IN_RC(ptr_reg));
 5753   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5754   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5755 
 5756   op_cost(10);
 5757   format %{"[$reg + $off + $idx << $scale]" %}
 5758   interface(MEMORY_INTER) %{
 5759     base($reg);
 5760     index($idx);
 5761     scale($scale);
 5762     disp($off);
 5763   %}
 5764 %}
 5765 
 5766 // Indirect Narrow Oop Plus Offset Operand
 5767 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5768 // we can't free r12 even with CompressedOops::base() == nullptr.
 5769 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5770   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5771   constraint(ALLOC_IN_RC(ptr_reg));
 5772   match(AddP (DecodeN reg) off);
 5773 
 5774   op_cost(10);
 5775   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5776   interface(MEMORY_INTER) %{
 5777     base(0xc); // R12
 5778     index($reg);
 5779     scale(0x3);
 5780     disp($off);
 5781   %}
 5782 %}
 5783 
 5784 // Indirect Memory Operand
 5785 operand indirectNarrow(rRegN reg)
 5786 %{
 5787   predicate(CompressedOops::shift() == 0);
 5788   constraint(ALLOC_IN_RC(ptr_reg));
 5789   match(DecodeN reg);
 5790 
 5791   format %{ "[$reg]" %}
 5792   interface(MEMORY_INTER) %{
 5793     base($reg);
 5794     index(0x4);
 5795     scale(0x0);
 5796     disp(0x0);
 5797   %}
 5798 %}
 5799 
 5800 // Indirect Memory Plus Short Offset Operand
 5801 operand indOffset8Narrow(rRegN reg, immL8 off)
 5802 %{
 5803   predicate(CompressedOops::shift() == 0);
 5804   constraint(ALLOC_IN_RC(ptr_reg));
 5805   match(AddP (DecodeN reg) off);
 5806 
 5807   format %{ "[$reg + $off (8-bit)]" %}
 5808   interface(MEMORY_INTER) %{
 5809     base($reg);
 5810     index(0x4);
 5811     scale(0x0);
 5812     disp($off);
 5813   %}
 5814 %}
 5815 
 5816 // Indirect Memory Plus Long Offset Operand
 5817 operand indOffset32Narrow(rRegN reg, immL32 off)
 5818 %{
 5819   predicate(CompressedOops::shift() == 0);
 5820   constraint(ALLOC_IN_RC(ptr_reg));
 5821   match(AddP (DecodeN reg) off);
 5822 
 5823   format %{ "[$reg + $off (32-bit)]" %}
 5824   interface(MEMORY_INTER) %{
 5825     base($reg);
 5826     index(0x4);
 5827     scale(0x0);
 5828     disp($off);
 5829   %}
 5830 %}
 5831 
 5832 // Indirect Memory Plus Index Register Plus Offset Operand
 5833 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5834 %{
 5835   predicate(CompressedOops::shift() == 0);
 5836   constraint(ALLOC_IN_RC(ptr_reg));
 5837   match(AddP (AddP (DecodeN reg) lreg) off);
 5838 
 5839   op_cost(10);
 5840   format %{"[$reg + $off + $lreg]" %}
 5841   interface(MEMORY_INTER) %{
 5842     base($reg);
 5843     index($lreg);
 5844     scale(0x0);
 5845     disp($off);
 5846   %}
 5847 %}
 5848 
 5849 // Indirect Memory Plus Index Register Plus Offset Operand
 5850 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5851 %{
 5852   predicate(CompressedOops::shift() == 0);
 5853   constraint(ALLOC_IN_RC(ptr_reg));
 5854   match(AddP (DecodeN reg) lreg);
 5855 
 5856   op_cost(10);
 5857   format %{"[$reg + $lreg]" %}
 5858   interface(MEMORY_INTER) %{
 5859     base($reg);
 5860     index($lreg);
 5861     scale(0x0);
 5862     disp(0x0);
 5863   %}
 5864 %}
 5865 
 5866 // Indirect Memory Times Scale Plus Index Register
 5867 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5868 %{
 5869   predicate(CompressedOops::shift() == 0);
 5870   constraint(ALLOC_IN_RC(ptr_reg));
 5871   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5872 
 5873   op_cost(10);
 5874   format %{"[$reg + $lreg << $scale]" %}
 5875   interface(MEMORY_INTER) %{
 5876     base($reg);
 5877     index($lreg);
 5878     scale($scale);
 5879     disp(0x0);
 5880   %}
 5881 %}
 5882 
 5883 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5884 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5885 %{
 5886   predicate(CompressedOops::shift() == 0);
 5887   constraint(ALLOC_IN_RC(ptr_reg));
 5888   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5889 
 5890   op_cost(10);
 5891   format %{"[$reg + $off + $lreg << $scale]" %}
 5892   interface(MEMORY_INTER) %{
 5893     base($reg);
 5894     index($lreg);
 5895     scale($scale);
 5896     disp($off);
 5897   %}
 5898 %}
 5899 
 5900 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5901 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5902 %{
 5903   constraint(ALLOC_IN_RC(ptr_reg));
 5904   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5905   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5906 
 5907   op_cost(10);
 5908   format %{"[$reg + $off + $idx]" %}
 5909   interface(MEMORY_INTER) %{
 5910     base($reg);
 5911     index($idx);
 5912     scale(0x0);
 5913     disp($off);
 5914   %}
 5915 %}
 5916 
 5917 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5918 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 5919 %{
 5920   constraint(ALLOC_IN_RC(ptr_reg));
 5921   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5922   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 5923 
 5924   op_cost(10);
 5925   format %{"[$reg + $off + $idx << $scale]" %}
 5926   interface(MEMORY_INTER) %{
 5927     base($reg);
 5928     index($idx);
 5929     scale($scale);
 5930     disp($off);
 5931   %}
 5932 %}
 5933 
 5934 //----------Special Memory Operands--------------------------------------------
 5935 // Stack Slot Operand - This operand is used for loading and storing temporary
 5936 //                      values on the stack where a match requires a value to
 5937 //                      flow through memory.
 5938 operand stackSlotP(sRegP reg)
 5939 %{
 5940   constraint(ALLOC_IN_RC(stack_slots));
 5941   // No match rule because this operand is only generated in matching
 5942 
 5943   format %{ "[$reg]" %}
 5944   interface(MEMORY_INTER) %{
 5945     base(0x4);   // RSP
 5946     index(0x4);  // No Index
 5947     scale(0x0);  // No Scale
 5948     disp($reg);  // Stack Offset
 5949   %}
 5950 %}
 5951 
 5952 operand stackSlotI(sRegI reg)
 5953 %{
 5954   constraint(ALLOC_IN_RC(stack_slots));
 5955   // No match rule because this operand is only generated in matching
 5956 
 5957   format %{ "[$reg]" %}
 5958   interface(MEMORY_INTER) %{
 5959     base(0x4);   // RSP
 5960     index(0x4);  // No Index
 5961     scale(0x0);  // No Scale
 5962     disp($reg);  // Stack Offset
 5963   %}
 5964 %}
 5965 
 5966 operand stackSlotF(sRegF reg)
 5967 %{
 5968   constraint(ALLOC_IN_RC(stack_slots));
 5969   // No match rule because this operand is only generated in matching
 5970 
 5971   format %{ "[$reg]" %}
 5972   interface(MEMORY_INTER) %{
 5973     base(0x4);   // RSP
 5974     index(0x4);  // No Index
 5975     scale(0x0);  // No Scale
 5976     disp($reg);  // Stack Offset
 5977   %}
 5978 %}
 5979 
 5980 operand stackSlotD(sRegD reg)
 5981 %{
 5982   constraint(ALLOC_IN_RC(stack_slots));
 5983   // No match rule because this operand is only generated in matching
 5984 
 5985   format %{ "[$reg]" %}
 5986   interface(MEMORY_INTER) %{
 5987     base(0x4);   // RSP
 5988     index(0x4);  // No Index
 5989     scale(0x0);  // No Scale
 5990     disp($reg);  // Stack Offset
 5991   %}
 5992 %}
 5993 operand stackSlotL(sRegL reg)
 5994 %{
 5995   constraint(ALLOC_IN_RC(stack_slots));
 5996   // No match rule because this operand is only generated in matching
 5997 
 5998   format %{ "[$reg]" %}
 5999   interface(MEMORY_INTER) %{
 6000     base(0x4);   // RSP
 6001     index(0x4);  // No Index
 6002     scale(0x0);  // No Scale
 6003     disp($reg);  // Stack Offset
 6004   %}
 6005 %}
 6006 
 6007 //----------Conditional Branch Operands----------------------------------------
 6008 // Comparison Op  - This is the operation of the comparison, and is limited to
 6009 //                  the following set of codes:
 6010 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 6011 //
 6012 // Other attributes of the comparison, such as unsignedness, are specified
 6013 // by the comparison instruction that sets a condition code flags register.
 6014 // That result is represented by a flags operand whose subtype is appropriate
 6015 // to the unsignedness (etc.) of the comparison.
 6016 //
 6017 // Later, the instruction which matches both the Comparison Op (a Bool) and
 6018 // the flags (produced by the Cmp) specifies the coding of the comparison op
 6019 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 6020 
 6021 // Comparison Code
 6022 operand cmpOp()
 6023 %{
 6024   match(Bool);
 6025 
 6026   format %{ "" %}
 6027   interface(COND_INTER) %{
 6028     equal(0x4, "e");
 6029     not_equal(0x5, "ne");
 6030     less(0xC, "l");
 6031     greater_equal(0xD, "ge");
 6032     less_equal(0xE, "le");
 6033     greater(0xF, "g");
 6034     overflow(0x0, "o");
 6035     no_overflow(0x1, "no");
 6036   %}
 6037 %}
 6038 
 6039 // Comparison Code, unsigned compare.  Used by FP also, with
 6040 // C2 (unordered) turned into GT or LT already.  The other bits
 6041 // C0 and C3 are turned into Carry & Zero flags.
 6042 operand cmpOpU()
 6043 %{
 6044   match(Bool);
 6045 
 6046   format %{ "" %}
 6047   interface(COND_INTER) %{
 6048     equal(0x4, "e");
 6049     not_equal(0x5, "ne");
 6050     less(0x2, "b");
 6051     greater_equal(0x3, "ae");
 6052     less_equal(0x6, "be");
 6053     greater(0x7, "a");
 6054     overflow(0x0, "o");
 6055     no_overflow(0x1, "no");
 6056   %}
 6057 %}
 6058 
 6059 
 6060 // Floating comparisons that don't require any fixup for the unordered case,
 6061 // If both inputs of the comparison are the same, ZF is always set so we
 6062 // don't need to use cmpOpUCF2 for eq/ne
 6063 operand cmpOpUCF() %{
 6064   match(Bool);
 6065   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 6066             n->as_Bool()->_test._test == BoolTest::ge ||
 6067             n->as_Bool()->_test._test == BoolTest::le ||
 6068             n->as_Bool()->_test._test == BoolTest::gt ||
 6069             n->in(1)->in(1) == n->in(1)->in(2));
 6070   format %{ "" %}
 6071   interface(COND_INTER) %{
 6072     equal(0xb, "np");
 6073     not_equal(0xa, "p");
 6074     less(0x2, "b");
 6075     greater_equal(0x3, "ae");
 6076     less_equal(0x6, "be");
 6077     greater(0x7, "a");
 6078     overflow(0x0, "o");
 6079     no_overflow(0x1, "no");
 6080   %}
 6081 %}
 6082 
 6083 
 6084 // Floating comparisons that can be fixed up with extra conditional jumps
 6085 operand cmpOpUCF2() %{
 6086   match(Bool);
 6087   predicate((n->as_Bool()->_test._test == BoolTest::ne ||
 6088              n->as_Bool()->_test._test == BoolTest::eq) &&
 6089             n->in(1)->in(1) != n->in(1)->in(2));
 6090   format %{ "" %}
 6091   interface(COND_INTER) %{
 6092     equal(0x4, "e");
 6093     not_equal(0x5, "ne");
 6094     less(0x2, "b");
 6095     greater_equal(0x3, "ae");
 6096     less_equal(0x6, "be");
 6097     greater(0x7, "a");
 6098     overflow(0x0, "o");
 6099     no_overflow(0x1, "no");
 6100   %}
 6101 %}
 6102 
 6103 // Operands for bound floating pointer register arguments
 6104 operand rxmm0() %{
 6105   constraint(ALLOC_IN_RC(xmm0_reg));
 6106   match(VecX);
 6107   format%{%}
 6108   interface(REG_INTER);
 6109 %}
 6110 
 6111 // Vectors
 6112 
 6113 // Dummy generic vector class. Should be used for all vector operands.
 6114 // Replaced with vec[SDXYZ] during post-selection pass.
 6115 operand vec() %{
 6116   constraint(ALLOC_IN_RC(dynamic));
 6117   match(VecX);
 6118   match(VecY);
 6119   match(VecZ);
 6120   match(VecS);
 6121   match(VecD);
 6122 
 6123   format %{ %}
 6124   interface(REG_INTER);
 6125 %}
 6126 
 6127 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6128 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6129 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6130 // runtime code generation via reg_class_dynamic.
 6131 operand legVec() %{
 6132   constraint(ALLOC_IN_RC(dynamic));
 6133   match(VecX);
 6134   match(VecY);
 6135   match(VecZ);
 6136   match(VecS);
 6137   match(VecD);
 6138 
 6139   format %{ %}
 6140   interface(REG_INTER);
 6141 %}
 6142 
 6143 // Replaces vec during post-selection cleanup. See above.
 6144 operand vecS() %{
 6145   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6146   match(VecS);
 6147 
 6148   format %{ %}
 6149   interface(REG_INTER);
 6150 %}
 6151 
 6152 // Replaces legVec during post-selection cleanup. See above.
 6153 operand legVecS() %{
 6154   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6155   match(VecS);
 6156 
 6157   format %{ %}
 6158   interface(REG_INTER);
 6159 %}
 6160 
 6161 // Replaces vec during post-selection cleanup. See above.
 6162 operand vecD() %{
 6163   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6164   match(VecD);
 6165 
 6166   format %{ %}
 6167   interface(REG_INTER);
 6168 %}
 6169 
 6170 // Replaces legVec during post-selection cleanup. See above.
 6171 operand legVecD() %{
 6172   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6173   match(VecD);
 6174 
 6175   format %{ %}
 6176   interface(REG_INTER);
 6177 %}
 6178 
 6179 // Replaces vec during post-selection cleanup. See above.
 6180 operand vecX() %{
 6181   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6182   match(VecX);
 6183 
 6184   format %{ %}
 6185   interface(REG_INTER);
 6186 %}
 6187 
 6188 // Replaces legVec during post-selection cleanup. See above.
 6189 operand legVecX() %{
 6190   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6191   match(VecX);
 6192 
 6193   format %{ %}
 6194   interface(REG_INTER);
 6195 %}
 6196 
 6197 // Replaces vec during post-selection cleanup. See above.
 6198 operand vecY() %{
 6199   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6200   match(VecY);
 6201 
 6202   format %{ %}
 6203   interface(REG_INTER);
 6204 %}
 6205 
 6206 // Replaces legVec during post-selection cleanup. See above.
 6207 operand legVecY() %{
 6208   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6209   match(VecY);
 6210 
 6211   format %{ %}
 6212   interface(REG_INTER);
 6213 %}
 6214 
 6215 // Replaces vec during post-selection cleanup. See above.
 6216 operand vecZ() %{
 6217   constraint(ALLOC_IN_RC(vectorz_reg));
 6218   match(VecZ);
 6219 
 6220   format %{ %}
 6221   interface(REG_INTER);
 6222 %}
 6223 
 6224 // Replaces legVec during post-selection cleanup. See above.
 6225 operand legVecZ() %{
 6226   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6227   match(VecZ);
 6228 
 6229   format %{ %}
 6230   interface(REG_INTER);
 6231 %}
 6232 
 6233 //----------OPERAND CLASSES----------------------------------------------------
 6234 // Operand Classes are groups of operands that are used as to simplify
 6235 // instruction definitions by not requiring the AD writer to specify separate
 6236 // instructions for every form of operand when the instruction accepts
 6237 // multiple operand types with the same basic encoding and format.  The classic
 6238 // case of this is memory operands.
 6239 
 6240 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6241                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6242                indCompressedOopOffset,
 6243                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6244                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6245                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6246 
 6247 //----------PIPELINE-----------------------------------------------------------
 6248 // Rules which define the behavior of the target architectures pipeline.
 6249 pipeline %{
 6250 
 6251 //----------ATTRIBUTES---------------------------------------------------------
 6252 attributes %{
 6253   variable_size_instructions;        // Fixed size instructions
 6254   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6255   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6256   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6257   instruction_fetch_units = 1;       // of 16 bytes
 6258 %}
 6259 
 6260 //----------RESOURCES----------------------------------------------------------
 6261 // Resources are the functional units available to the machine
 6262 
 6263 // Generic P2/P3 pipeline
 6264 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6265 // 3 instructions decoded per cycle.
 6266 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6267 // 3 ALU op, only ALU0 handles mul instructions.
 6268 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6269            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6270            BR, FPU,
 6271            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6272 
 6273 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6274 // Pipeline Description specifies the stages in the machine's pipeline
 6275 
 6276 // Generic P2/P3 pipeline
 6277 pipe_desc(S0, S1, S2, S3, S4, S5);
 6278 
 6279 //----------PIPELINE CLASSES---------------------------------------------------
 6280 // Pipeline Classes describe the stages in which input and output are
 6281 // referenced by the hardware pipeline.
 6282 
 6283 // Naming convention: ialu or fpu
 6284 // Then: _reg
 6285 // Then: _reg if there is a 2nd register
 6286 // Then: _long if it's a pair of instructions implementing a long
 6287 // Then: _fat if it requires the big decoder
 6288 //   Or: _mem if it requires the big decoder and a memory unit.
 6289 
 6290 // Integer ALU reg operation
 6291 pipe_class ialu_reg(rRegI dst)
 6292 %{
 6293     single_instruction;
 6294     dst    : S4(write);
 6295     dst    : S3(read);
 6296     DECODE : S0;        // any decoder
 6297     ALU    : S3;        // any alu
 6298 %}
 6299 
 6300 // Long ALU reg operation
 6301 pipe_class ialu_reg_long(rRegL dst)
 6302 %{
 6303     instruction_count(2);
 6304     dst    : S4(write);
 6305     dst    : S3(read);
 6306     DECODE : S0(2);     // any 2 decoders
 6307     ALU    : S3(2);     // both alus
 6308 %}
 6309 
 6310 // Integer ALU reg operation using big decoder
 6311 pipe_class ialu_reg_fat(rRegI dst)
 6312 %{
 6313     single_instruction;
 6314     dst    : S4(write);
 6315     dst    : S3(read);
 6316     D0     : S0;        // big decoder only
 6317     ALU    : S3;        // any alu
 6318 %}
 6319 
 6320 // Integer ALU reg-reg operation
 6321 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6322 %{
 6323     single_instruction;
 6324     dst    : S4(write);
 6325     src    : S3(read);
 6326     DECODE : S0;        // any decoder
 6327     ALU    : S3;        // any alu
 6328 %}
 6329 
 6330 // Integer ALU reg-reg operation
 6331 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6332 %{
 6333     single_instruction;
 6334     dst    : S4(write);
 6335     src    : S3(read);
 6336     D0     : S0;        // big decoder only
 6337     ALU    : S3;        // any alu
 6338 %}
 6339 
 6340 // Integer ALU reg-mem operation
 6341 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6342 %{
 6343     single_instruction;
 6344     dst    : S5(write);
 6345     mem    : S3(read);
 6346     D0     : S0;        // big decoder only
 6347     ALU    : S4;        // any alu
 6348     MEM    : S3;        // any mem
 6349 %}
 6350 
 6351 // Integer mem operation (prefetch)
 6352 pipe_class ialu_mem(memory mem)
 6353 %{
 6354     single_instruction;
 6355     mem    : S3(read);
 6356     D0     : S0;        // big decoder only
 6357     MEM    : S3;        // any mem
 6358 %}
 6359 
 6360 // Integer Store to Memory
 6361 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6362 %{
 6363     single_instruction;
 6364     mem    : S3(read);
 6365     src    : S5(read);
 6366     D0     : S0;        // big decoder only
 6367     ALU    : S4;        // any alu
 6368     MEM    : S3;
 6369 %}
 6370 
 6371 // // Long Store to Memory
 6372 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6373 // %{
 6374 //     instruction_count(2);
 6375 //     mem    : S3(read);
 6376 //     src    : S5(read);
 6377 //     D0     : S0(2);          // big decoder only; twice
 6378 //     ALU    : S4(2);     // any 2 alus
 6379 //     MEM    : S3(2);  // Both mems
 6380 // %}
 6381 
 6382 // Integer Store to Memory
 6383 pipe_class ialu_mem_imm(memory mem)
 6384 %{
 6385     single_instruction;
 6386     mem    : S3(read);
 6387     D0     : S0;        // big decoder only
 6388     ALU    : S4;        // any alu
 6389     MEM    : S3;
 6390 %}
 6391 
 6392 // Integer ALU0 reg-reg operation
 6393 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6394 %{
 6395     single_instruction;
 6396     dst    : S4(write);
 6397     src    : S3(read);
 6398     D0     : S0;        // Big decoder only
 6399     ALU0   : S3;        // only alu0
 6400 %}
 6401 
 6402 // Integer ALU0 reg-mem operation
 6403 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6404 %{
 6405     single_instruction;
 6406     dst    : S5(write);
 6407     mem    : S3(read);
 6408     D0     : S0;        // big decoder only
 6409     ALU0   : S4;        // ALU0 only
 6410     MEM    : S3;        // any mem
 6411 %}
 6412 
 6413 // Integer ALU reg-reg operation
 6414 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6415 %{
 6416     single_instruction;
 6417     cr     : S4(write);
 6418     src1   : S3(read);
 6419     src2   : S3(read);
 6420     DECODE : S0;        // any decoder
 6421     ALU    : S3;        // any alu
 6422 %}
 6423 
 6424 // Integer ALU reg-imm operation
 6425 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6426 %{
 6427     single_instruction;
 6428     cr     : S4(write);
 6429     src1   : S3(read);
 6430     DECODE : S0;        // any decoder
 6431     ALU    : S3;        // any alu
 6432 %}
 6433 
 6434 // Integer ALU reg-mem operation
 6435 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6436 %{
 6437     single_instruction;
 6438     cr     : S4(write);
 6439     src1   : S3(read);
 6440     src2   : S3(read);
 6441     D0     : S0;        // big decoder only
 6442     ALU    : S4;        // any alu
 6443     MEM    : S3;
 6444 %}
 6445 
 6446 // Conditional move reg-reg
 6447 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6448 %{
 6449     instruction_count(4);
 6450     y      : S4(read);
 6451     q      : S3(read);
 6452     p      : S3(read);
 6453     DECODE : S0(4);     // any decoder
 6454 %}
 6455 
 6456 // Conditional move reg-reg
 6457 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6458 %{
 6459     single_instruction;
 6460     dst    : S4(write);
 6461     src    : S3(read);
 6462     cr     : S3(read);
 6463     DECODE : S0;        // any decoder
 6464 %}
 6465 
 6466 // Conditional move reg-mem
 6467 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6468 %{
 6469     single_instruction;
 6470     dst    : S4(write);
 6471     src    : S3(read);
 6472     cr     : S3(read);
 6473     DECODE : S0;        // any decoder
 6474     MEM    : S3;
 6475 %}
 6476 
 6477 // Conditional move reg-reg long
 6478 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6479 %{
 6480     single_instruction;
 6481     dst    : S4(write);
 6482     src    : S3(read);
 6483     cr     : S3(read);
 6484     DECODE : S0(2);     // any 2 decoders
 6485 %}
 6486 
 6487 // Float reg-reg operation
 6488 pipe_class fpu_reg(regD dst)
 6489 %{
 6490     instruction_count(2);
 6491     dst    : S3(read);
 6492     DECODE : S0(2);     // any 2 decoders
 6493     FPU    : S3;
 6494 %}
 6495 
 6496 // Float reg-reg operation
 6497 pipe_class fpu_reg_reg(regD dst, regD src)
 6498 %{
 6499     instruction_count(2);
 6500     dst    : S4(write);
 6501     src    : S3(read);
 6502     DECODE : S0(2);     // any 2 decoders
 6503     FPU    : S3;
 6504 %}
 6505 
 6506 // Float reg-reg operation
 6507 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6508 %{
 6509     instruction_count(3);
 6510     dst    : S4(write);
 6511     src1   : S3(read);
 6512     src2   : S3(read);
 6513     DECODE : S0(3);     // any 3 decoders
 6514     FPU    : S3(2);
 6515 %}
 6516 
 6517 // Float reg-reg operation
 6518 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6519 %{
 6520     instruction_count(4);
 6521     dst    : S4(write);
 6522     src1   : S3(read);
 6523     src2   : S3(read);
 6524     src3   : S3(read);
 6525     DECODE : S0(4);     // any 3 decoders
 6526     FPU    : S3(2);
 6527 %}
 6528 
 6529 // Float reg-reg operation
 6530 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6531 %{
 6532     instruction_count(4);
 6533     dst    : S4(write);
 6534     src1   : S3(read);
 6535     src2   : S3(read);
 6536     src3   : S3(read);
 6537     DECODE : S1(3);     // any 3 decoders
 6538     D0     : S0;        // Big decoder only
 6539     FPU    : S3(2);
 6540     MEM    : S3;
 6541 %}
 6542 
 6543 // Float reg-mem operation
 6544 pipe_class fpu_reg_mem(regD dst, memory mem)
 6545 %{
 6546     instruction_count(2);
 6547     dst    : S5(write);
 6548     mem    : S3(read);
 6549     D0     : S0;        // big decoder only
 6550     DECODE : S1;        // any decoder for FPU POP
 6551     FPU    : S4;
 6552     MEM    : S3;        // any mem
 6553 %}
 6554 
 6555 // Float reg-mem operation
 6556 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6557 %{
 6558     instruction_count(3);
 6559     dst    : S5(write);
 6560     src1   : S3(read);
 6561     mem    : S3(read);
 6562     D0     : S0;        // big decoder only
 6563     DECODE : S1(2);     // any decoder for FPU POP
 6564     FPU    : S4;
 6565     MEM    : S3;        // any mem
 6566 %}
 6567 
 6568 // Float mem-reg operation
 6569 pipe_class fpu_mem_reg(memory mem, regD src)
 6570 %{
 6571     instruction_count(2);
 6572     src    : S5(read);
 6573     mem    : S3(read);
 6574     DECODE : S0;        // any decoder for FPU PUSH
 6575     D0     : S1;        // big decoder only
 6576     FPU    : S4;
 6577     MEM    : S3;        // any mem
 6578 %}
 6579 
 6580 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6581 %{
 6582     instruction_count(3);
 6583     src1   : S3(read);
 6584     src2   : S3(read);
 6585     mem    : S3(read);
 6586     DECODE : S0(2);     // any decoder for FPU PUSH
 6587     D0     : S1;        // big decoder only
 6588     FPU    : S4;
 6589     MEM    : S3;        // any mem
 6590 %}
 6591 
 6592 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6593 %{
 6594     instruction_count(3);
 6595     src1   : S3(read);
 6596     src2   : S3(read);
 6597     mem    : S4(read);
 6598     DECODE : S0;        // any decoder for FPU PUSH
 6599     D0     : S0(2);     // big decoder only
 6600     FPU    : S4;
 6601     MEM    : S3(2);     // any mem
 6602 %}
 6603 
 6604 pipe_class fpu_mem_mem(memory dst, memory src1)
 6605 %{
 6606     instruction_count(2);
 6607     src1   : S3(read);
 6608     dst    : S4(read);
 6609     D0     : S0(2);     // big decoder only
 6610     MEM    : S3(2);     // any mem
 6611 %}
 6612 
 6613 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6614 %{
 6615     instruction_count(3);
 6616     src1   : S3(read);
 6617     src2   : S3(read);
 6618     dst    : S4(read);
 6619     D0     : S0(3);     // big decoder only
 6620     FPU    : S4;
 6621     MEM    : S3(3);     // any mem
 6622 %}
 6623 
 6624 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6625 %{
 6626     instruction_count(3);
 6627     src1   : S4(read);
 6628     mem    : S4(read);
 6629     DECODE : S0;        // any decoder for FPU PUSH
 6630     D0     : S0(2);     // big decoder only
 6631     FPU    : S4;
 6632     MEM    : S3(2);     // any mem
 6633 %}
 6634 
 6635 // Float load constant
 6636 pipe_class fpu_reg_con(regD dst)
 6637 %{
 6638     instruction_count(2);
 6639     dst    : S5(write);
 6640     D0     : S0;        // big decoder only for the load
 6641     DECODE : S1;        // any decoder for FPU POP
 6642     FPU    : S4;
 6643     MEM    : S3;        // any mem
 6644 %}
 6645 
 6646 // Float load constant
 6647 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6648 %{
 6649     instruction_count(3);
 6650     dst    : S5(write);
 6651     src    : S3(read);
 6652     D0     : S0;        // big decoder only for the load
 6653     DECODE : S1(2);     // any decoder for FPU POP
 6654     FPU    : S4;
 6655     MEM    : S3;        // any mem
 6656 %}
 6657 
 6658 // UnConditional branch
 6659 pipe_class pipe_jmp(label labl)
 6660 %{
 6661     single_instruction;
 6662     BR   : S3;
 6663 %}
 6664 
 6665 // Conditional branch
 6666 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6667 %{
 6668     single_instruction;
 6669     cr    : S1(read);
 6670     BR    : S3;
 6671 %}
 6672 
 6673 // Allocation idiom
 6674 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6675 %{
 6676     instruction_count(1); force_serialization;
 6677     fixed_latency(6);
 6678     heap_ptr : S3(read);
 6679     DECODE   : S0(3);
 6680     D0       : S2;
 6681     MEM      : S3;
 6682     ALU      : S3(2);
 6683     dst      : S5(write);
 6684     BR       : S5;
 6685 %}
 6686 
 6687 // Generic big/slow expanded idiom
 6688 pipe_class pipe_slow()
 6689 %{
 6690     instruction_count(10); multiple_bundles; force_serialization;
 6691     fixed_latency(100);
 6692     D0  : S0(2);
 6693     MEM : S3(2);
 6694 %}
 6695 
 6696 // The real do-nothing guy
 6697 pipe_class empty()
 6698 %{
 6699     instruction_count(0);
 6700 %}
 6701 
 6702 // Define the class for the Nop node
 6703 define
 6704 %{
 6705    MachNop = empty;
 6706 %}
 6707 
 6708 %}
 6709 
 6710 //----------INSTRUCTIONS-------------------------------------------------------
 6711 //
 6712 // match      -- States which machine-independent subtree may be replaced
 6713 //               by this instruction.
 6714 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6715 //               selection to identify a minimum cost tree of machine
 6716 //               instructions that matches a tree of machine-independent
 6717 //               instructions.
 6718 // format     -- A string providing the disassembly for this instruction.
 6719 //               The value of an instruction's operand may be inserted
 6720 //               by referring to it with a '$' prefix.
 6721 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6722 //               to within an encode class as $primary, $secondary, and $tertiary
 6723 //               rrspectively.  The primary opcode is commonly used to
 6724 //               indicate the type of machine instruction, while secondary
 6725 //               and tertiary are often used for prefix options or addressing
 6726 //               modes.
 6727 // ins_encode -- A list of encode classes with parameters. The encode class
 6728 //               name must have been defined in an 'enc_class' specification
 6729 //               in the encode section of the architecture description.
 6730 
 6731 // ============================================================================
 6732 
 6733 instruct ShouldNotReachHere() %{
 6734   match(Halt);
 6735   format %{ "stop\t# ShouldNotReachHere" %}
 6736   ins_encode %{
 6737     if (is_reachable()) {
 6738       const char* str = __ code_string(_halt_reason);
 6739       __ stop(str);
 6740     }
 6741   %}
 6742   ins_pipe(pipe_slow);
 6743 %}
 6744 
 6745 // ============================================================================
 6746 
 6747 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6748 // Load Float
 6749 instruct MoveF2VL(vlRegF dst, regF src) %{
 6750   match(Set dst src);
 6751   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6752   ins_encode %{
 6753     ShouldNotReachHere();
 6754   %}
 6755   ins_pipe( fpu_reg_reg );
 6756 %}
 6757 
 6758 // Load Float
 6759 instruct MoveF2LEG(legRegF dst, regF src) %{
 6760   match(Set dst src);
 6761   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6762   ins_encode %{
 6763     ShouldNotReachHere();
 6764   %}
 6765   ins_pipe( fpu_reg_reg );
 6766 %}
 6767 
 6768 // Load Float
 6769 instruct MoveVL2F(regF dst, vlRegF src) %{
 6770   match(Set dst src);
 6771   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6772   ins_encode %{
 6773     ShouldNotReachHere();
 6774   %}
 6775   ins_pipe( fpu_reg_reg );
 6776 %}
 6777 
 6778 // Load Float
 6779 instruct MoveLEG2F(regF dst, legRegF src) %{
 6780   match(Set dst src);
 6781   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6782   ins_encode %{
 6783     ShouldNotReachHere();
 6784   %}
 6785   ins_pipe( fpu_reg_reg );
 6786 %}
 6787 
 6788 // Load Double
 6789 instruct MoveD2VL(vlRegD dst, regD src) %{
 6790   match(Set dst src);
 6791   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6792   ins_encode %{
 6793     ShouldNotReachHere();
 6794   %}
 6795   ins_pipe( fpu_reg_reg );
 6796 %}
 6797 
 6798 // Load Double
 6799 instruct MoveD2LEG(legRegD dst, regD src) %{
 6800   match(Set dst src);
 6801   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6802   ins_encode %{
 6803     ShouldNotReachHere();
 6804   %}
 6805   ins_pipe( fpu_reg_reg );
 6806 %}
 6807 
 6808 // Load Double
 6809 instruct MoveVL2D(regD dst, vlRegD src) %{
 6810   match(Set dst src);
 6811   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6812   ins_encode %{
 6813     ShouldNotReachHere();
 6814   %}
 6815   ins_pipe( fpu_reg_reg );
 6816 %}
 6817 
 6818 // Load Double
 6819 instruct MoveLEG2D(regD dst, legRegD src) %{
 6820   match(Set dst src);
 6821   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6822   ins_encode %{
 6823     ShouldNotReachHere();
 6824   %}
 6825   ins_pipe( fpu_reg_reg );
 6826 %}
 6827 
 6828 //----------Load/Store/Move Instructions---------------------------------------
 6829 //----------Load Instructions--------------------------------------------------
 6830 
 6831 // Load Byte (8 bit signed)
 6832 instruct loadB(rRegI dst, memory mem)
 6833 %{
 6834   match(Set dst (LoadB mem));
 6835 
 6836   ins_cost(125);
 6837   format %{ "movsbl  $dst, $mem\t# byte" %}
 6838 
 6839   ins_encode %{
 6840     __ movsbl($dst$$Register, $mem$$Address);
 6841   %}
 6842 
 6843   ins_pipe(ialu_reg_mem);
 6844 %}
 6845 
 6846 // Load Byte (8 bit signed) into Long Register
 6847 instruct loadB2L(rRegL dst, memory mem)
 6848 %{
 6849   match(Set dst (ConvI2L (LoadB mem)));
 6850 
 6851   ins_cost(125);
 6852   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6853 
 6854   ins_encode %{
 6855     __ movsbq($dst$$Register, $mem$$Address);
 6856   %}
 6857 
 6858   ins_pipe(ialu_reg_mem);
 6859 %}
 6860 
 6861 // Load Unsigned Byte (8 bit UNsigned)
 6862 instruct loadUB(rRegI dst, memory mem)
 6863 %{
 6864   match(Set dst (LoadUB mem));
 6865 
 6866   ins_cost(125);
 6867   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6868 
 6869   ins_encode %{
 6870     __ movzbl($dst$$Register, $mem$$Address);
 6871   %}
 6872 
 6873   ins_pipe(ialu_reg_mem);
 6874 %}
 6875 
 6876 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6877 instruct loadUB2L(rRegL dst, memory mem)
 6878 %{
 6879   match(Set dst (ConvI2L (LoadUB mem)));
 6880 
 6881   ins_cost(125);
 6882   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 6883 
 6884   ins_encode %{
 6885     __ movzbq($dst$$Register, $mem$$Address);
 6886   %}
 6887 
 6888   ins_pipe(ialu_reg_mem);
 6889 %}
 6890 
 6891 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 6892 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 6893   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 6894   effect(KILL cr);
 6895 
 6896   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 6897             "andl    $dst, right_n_bits($mask, 8)" %}
 6898   ins_encode %{
 6899     Register Rdst = $dst$$Register;
 6900     __ movzbq(Rdst, $mem$$Address);
 6901     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 6902   %}
 6903   ins_pipe(ialu_reg_mem);
 6904 %}
 6905 
 6906 // Load Short (16 bit signed)
 6907 instruct loadS(rRegI dst, memory mem)
 6908 %{
 6909   match(Set dst (LoadS mem));
 6910 
 6911   ins_cost(125);
 6912   format %{ "movswl $dst, $mem\t# short" %}
 6913 
 6914   ins_encode %{
 6915     __ movswl($dst$$Register, $mem$$Address);
 6916   %}
 6917 
 6918   ins_pipe(ialu_reg_mem);
 6919 %}
 6920 
 6921 // Load Short (16 bit signed) to Byte (8 bit signed)
 6922 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6923   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 6924 
 6925   ins_cost(125);
 6926   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 6927   ins_encode %{
 6928     __ movsbl($dst$$Register, $mem$$Address);
 6929   %}
 6930   ins_pipe(ialu_reg_mem);
 6931 %}
 6932 
 6933 // Load Short (16 bit signed) into Long Register
 6934 instruct loadS2L(rRegL dst, memory mem)
 6935 %{
 6936   match(Set dst (ConvI2L (LoadS mem)));
 6937 
 6938   ins_cost(125);
 6939   format %{ "movswq $dst, $mem\t# short -> long" %}
 6940 
 6941   ins_encode %{
 6942     __ movswq($dst$$Register, $mem$$Address);
 6943   %}
 6944 
 6945   ins_pipe(ialu_reg_mem);
 6946 %}
 6947 
 6948 // Load Unsigned Short/Char (16 bit UNsigned)
 6949 instruct loadUS(rRegI dst, memory mem)
 6950 %{
 6951   match(Set dst (LoadUS mem));
 6952 
 6953   ins_cost(125);
 6954   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 6955 
 6956   ins_encode %{
 6957     __ movzwl($dst$$Register, $mem$$Address);
 6958   %}
 6959 
 6960   ins_pipe(ialu_reg_mem);
 6961 %}
 6962 
 6963 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 6964 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6965   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 6966 
 6967   ins_cost(125);
 6968   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 6969   ins_encode %{
 6970     __ movsbl($dst$$Register, $mem$$Address);
 6971   %}
 6972   ins_pipe(ialu_reg_mem);
 6973 %}
 6974 
 6975 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 6976 instruct loadUS2L(rRegL dst, memory mem)
 6977 %{
 6978   match(Set dst (ConvI2L (LoadUS mem)));
 6979 
 6980   ins_cost(125);
 6981   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 6982 
 6983   ins_encode %{
 6984     __ movzwq($dst$$Register, $mem$$Address);
 6985   %}
 6986 
 6987   ins_pipe(ialu_reg_mem);
 6988 %}
 6989 
 6990 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 6991 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 6992   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 6993 
 6994   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 6995   ins_encode %{
 6996     __ movzbq($dst$$Register, $mem$$Address);
 6997   %}
 6998   ins_pipe(ialu_reg_mem);
 6999 %}
 7000 
 7001 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 7002 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7003   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7004   effect(KILL cr);
 7005 
 7006   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 7007             "andl    $dst, right_n_bits($mask, 16)" %}
 7008   ins_encode %{
 7009     Register Rdst = $dst$$Register;
 7010     __ movzwq(Rdst, $mem$$Address);
 7011     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 7012   %}
 7013   ins_pipe(ialu_reg_mem);
 7014 %}
 7015 
 7016 // Load Integer
 7017 instruct loadI(rRegI dst, memory mem)
 7018 %{
 7019   match(Set dst (LoadI mem));
 7020 
 7021   ins_cost(125);
 7022   format %{ "movl    $dst, $mem\t# int" %}
 7023 
 7024   ins_encode %{
 7025     __ movl($dst$$Register, $mem$$Address);
 7026   %}
 7027 
 7028   ins_pipe(ialu_reg_mem);
 7029 %}
 7030 
 7031 // Load Integer (32 bit signed) to Byte (8 bit signed)
 7032 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7033   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 7034 
 7035   ins_cost(125);
 7036   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 7037   ins_encode %{
 7038     __ movsbl($dst$$Register, $mem$$Address);
 7039   %}
 7040   ins_pipe(ialu_reg_mem);
 7041 %}
 7042 
 7043 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 7044 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 7045   match(Set dst (AndI (LoadI mem) mask));
 7046 
 7047   ins_cost(125);
 7048   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 7049   ins_encode %{
 7050     __ movzbl($dst$$Register, $mem$$Address);
 7051   %}
 7052   ins_pipe(ialu_reg_mem);
 7053 %}
 7054 
 7055 // Load Integer (32 bit signed) to Short (16 bit signed)
 7056 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 7057   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 7058 
 7059   ins_cost(125);
 7060   format %{ "movswl  $dst, $mem\t# int -> short" %}
 7061   ins_encode %{
 7062     __ movswl($dst$$Register, $mem$$Address);
 7063   %}
 7064   ins_pipe(ialu_reg_mem);
 7065 %}
 7066 
 7067 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 7068 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 7069   match(Set dst (AndI (LoadI mem) mask));
 7070 
 7071   ins_cost(125);
 7072   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7073   ins_encode %{
 7074     __ movzwl($dst$$Register, $mem$$Address);
 7075   %}
 7076   ins_pipe(ialu_reg_mem);
 7077 %}
 7078 
 7079 // Load Integer into Long Register
 7080 instruct loadI2L(rRegL dst, memory mem)
 7081 %{
 7082   match(Set dst (ConvI2L (LoadI mem)));
 7083 
 7084   ins_cost(125);
 7085   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7086 
 7087   ins_encode %{
 7088     __ movslq($dst$$Register, $mem$$Address);
 7089   %}
 7090 
 7091   ins_pipe(ialu_reg_mem);
 7092 %}
 7093 
 7094 // Load Integer with mask 0xFF into Long Register
 7095 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7096   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7097 
 7098   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7099   ins_encode %{
 7100     __ movzbq($dst$$Register, $mem$$Address);
 7101   %}
 7102   ins_pipe(ialu_reg_mem);
 7103 %}
 7104 
 7105 // Load Integer with mask 0xFFFF into Long Register
 7106 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7107   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7108 
 7109   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7110   ins_encode %{
 7111     __ movzwq($dst$$Register, $mem$$Address);
 7112   %}
 7113   ins_pipe(ialu_reg_mem);
 7114 %}
 7115 
 7116 // Load Integer with a 31-bit mask into Long Register
 7117 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7118   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7119   effect(KILL cr);
 7120 
 7121   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7122             "andl    $dst, $mask" %}
 7123   ins_encode %{
 7124     Register Rdst = $dst$$Register;
 7125     __ movl(Rdst, $mem$$Address);
 7126     __ andl(Rdst, $mask$$constant);
 7127   %}
 7128   ins_pipe(ialu_reg_mem);
 7129 %}
 7130 
 7131 // Load Unsigned Integer into Long Register
 7132 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7133 %{
 7134   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7135 
 7136   ins_cost(125);
 7137   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7138 
 7139   ins_encode %{
 7140     __ movl($dst$$Register, $mem$$Address);
 7141   %}
 7142 
 7143   ins_pipe(ialu_reg_mem);
 7144 %}
 7145 
 7146 // Load Long
 7147 instruct loadL(rRegL dst, memory mem)
 7148 %{
 7149   match(Set dst (LoadL mem));
 7150 
 7151   ins_cost(125);
 7152   format %{ "movq    $dst, $mem\t# long" %}
 7153 
 7154   ins_encode %{
 7155     __ movq($dst$$Register, $mem$$Address);
 7156   %}
 7157 
 7158   ins_pipe(ialu_reg_mem); // XXX
 7159 %}
 7160 
 7161 // Load Range
 7162 instruct loadRange(rRegI dst, memory mem)
 7163 %{
 7164   match(Set dst (LoadRange mem));
 7165 
 7166   ins_cost(125); // XXX
 7167   format %{ "movl    $dst, $mem\t# range" %}
 7168   ins_encode %{
 7169     __ movl($dst$$Register, $mem$$Address);
 7170   %}
 7171   ins_pipe(ialu_reg_mem);
 7172 %}
 7173 
 7174 // Load Pointer
 7175 instruct loadP(rRegP dst, memory mem)
 7176 %{
 7177   match(Set dst (LoadP mem));
 7178   predicate(n->as_Load()->barrier_data() == 0);
 7179 
 7180   ins_cost(125); // XXX
 7181   format %{ "movq    $dst, $mem\t# ptr" %}
 7182   ins_encode %{
 7183     __ movq($dst$$Register, $mem$$Address);
 7184   %}
 7185   ins_pipe(ialu_reg_mem); // XXX
 7186 %}
 7187 
 7188 // Load Compressed Pointer
 7189 instruct loadN(rRegN dst, memory mem)
 7190 %{
 7191    predicate(n->as_Load()->barrier_data() == 0);
 7192    match(Set dst (LoadN mem));
 7193 
 7194    ins_cost(125); // XXX
 7195    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7196    ins_encode %{
 7197      __ movl($dst$$Register, $mem$$Address);
 7198    %}
 7199    ins_pipe(ialu_reg_mem); // XXX
 7200 %}
 7201 
 7202 
 7203 // Load Klass Pointer
 7204 instruct loadKlass(rRegP dst, memory mem)
 7205 %{
 7206   match(Set dst (LoadKlass mem));
 7207 
 7208   ins_cost(125); // XXX
 7209   format %{ "movq    $dst, $mem\t# class" %}
 7210   ins_encode %{
 7211     __ movq($dst$$Register, $mem$$Address);
 7212   %}
 7213   ins_pipe(ialu_reg_mem); // XXX
 7214 %}
 7215 
 7216 // Load narrow Klass Pointer
 7217 instruct loadNKlass(rRegN dst, memory mem)
 7218 %{
 7219   predicate(!UseCompactObjectHeaders);
 7220   match(Set dst (LoadNKlass mem));
 7221 
 7222   ins_cost(125); // XXX
 7223   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7224   ins_encode %{
 7225     __ movl($dst$$Register, $mem$$Address);
 7226   %}
 7227   ins_pipe(ialu_reg_mem); // XXX
 7228 %}
 7229 
 7230 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7231 %{
 7232   predicate(UseCompactObjectHeaders);
 7233   match(Set dst (LoadNKlass mem));
 7234   effect(KILL cr);
 7235   ins_cost(125);
 7236   format %{
 7237     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7238     "shrl    $dst, markWord::klass_shift_at_offset"
 7239   %}
 7240   ins_encode %{
 7241     if (UseAPX) {
 7242       __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
 7243     }
 7244     else {
 7245       __ movl($dst$$Register, $mem$$Address);
 7246       __ shrl($dst$$Register, markWord::klass_shift_at_offset);
 7247     }
 7248   %}
 7249   ins_pipe(ialu_reg_mem);
 7250 %}
 7251 
 7252 // Load Float
 7253 instruct loadF(regF dst, memory mem)
 7254 %{
 7255   match(Set dst (LoadF mem));
 7256 
 7257   ins_cost(145); // XXX
 7258   format %{ "movss   $dst, $mem\t# float" %}
 7259   ins_encode %{
 7260     __ movflt($dst$$XMMRegister, $mem$$Address);
 7261   %}
 7262   ins_pipe(pipe_slow); // XXX
 7263 %}
 7264 
 7265 // Load Double
 7266 instruct loadD_partial(regD dst, memory mem)
 7267 %{
 7268   predicate(!UseXmmLoadAndClearUpper);
 7269   match(Set dst (LoadD mem));
 7270 
 7271   ins_cost(145); // XXX
 7272   format %{ "movlpd  $dst, $mem\t# double" %}
 7273   ins_encode %{
 7274     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7275   %}
 7276   ins_pipe(pipe_slow); // XXX
 7277 %}
 7278 
 7279 instruct loadD(regD dst, memory mem)
 7280 %{
 7281   predicate(UseXmmLoadAndClearUpper);
 7282   match(Set dst (LoadD mem));
 7283 
 7284   ins_cost(145); // XXX
 7285   format %{ "movsd   $dst, $mem\t# double" %}
 7286   ins_encode %{
 7287     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7288   %}
 7289   ins_pipe(pipe_slow); // XXX
 7290 %}
 7291 
 7292 // max = java.lang.Math.max(float a, float b)
 7293 instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
 7294   predicate(VM_Version::supports_avx10_2());
 7295   match(Set dst (MaxF a b));
 7296   format %{ "maxF $dst, $a, $b" %}
 7297   ins_encode %{
 7298     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
 7299   %}
 7300   ins_pipe( pipe_slow );
 7301 %}
 7302 
 7303 // max = java.lang.Math.max(float a, float b)
 7304 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7305   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7306   match(Set dst (MaxF a b));
 7307   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7308   format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7309   ins_encode %{
 7310     __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7311   %}
 7312   ins_pipe( pipe_slow );
 7313 %}
 7314 
 7315 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7316   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7317   match(Set dst (MaxF a b));
 7318   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7319 
 7320   format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
 7321   ins_encode %{
 7322     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7323                     false /*min*/, true /*single*/);
 7324   %}
 7325   ins_pipe( pipe_slow );
 7326 %}
 7327 
 7328 // max = java.lang.Math.max(double a, double b)
 7329 instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
 7330   predicate(VM_Version::supports_avx10_2());
 7331   match(Set dst (MaxD a b));
 7332   format %{ "maxD $dst, $a, $b" %}
 7333   ins_encode %{
 7334     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
 7335   %}
 7336   ins_pipe( pipe_slow );
 7337 %}
 7338 
 7339 // max = java.lang.Math.max(double a, double b)
 7340 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7341   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7342   match(Set dst (MaxD a b));
 7343   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7344   format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7345   ins_encode %{
 7346     __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7347   %}
 7348   ins_pipe( pipe_slow );
 7349 %}
 7350 
 7351 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7352   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7353   match(Set dst (MaxD a b));
 7354   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7355 
 7356   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7357   ins_encode %{
 7358     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7359                     false /*min*/, false /*single*/);
 7360   %}
 7361   ins_pipe( pipe_slow );
 7362 %}
 7363 
 7364 // max = java.lang.Math.min(float a, float b)
 7365 instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
 7366   predicate(VM_Version::supports_avx10_2());
 7367   match(Set dst (MinF a b));
 7368   format %{ "minF $dst, $a, $b" %}
 7369   ins_encode %{
 7370     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
 7371   %}
 7372   ins_pipe( pipe_slow );
 7373 %}
 7374 
 7375 // min = java.lang.Math.min(float a, float b)
 7376 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7377   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7378   match(Set dst (MinF a b));
 7379   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7380   format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7381   ins_encode %{
 7382     __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7383   %}
 7384   ins_pipe( pipe_slow );
 7385 %}
 7386 
 7387 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7388   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7389   match(Set dst (MinF a b));
 7390   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7391 
 7392   format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7393   ins_encode %{
 7394     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7395                     true /*min*/, true /*single*/);
 7396   %}
 7397   ins_pipe( pipe_slow );
 7398 %}
 7399 
 7400 // max = java.lang.Math.min(double a, double b)
 7401 instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
 7402   predicate(VM_Version::supports_avx10_2());
 7403   match(Set dst (MinD a b));
 7404   format %{ "minD $dst, $a, $b" %}
 7405   ins_encode %{
 7406     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
 7407   %}
 7408   ins_pipe( pipe_slow );
 7409 %}
 7410 
 7411 // min = java.lang.Math.min(double a, double b)
 7412 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7413   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7414   match(Set dst (MinD a b));
 7415   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7416     format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7417   ins_encode %{
 7418     __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7419   %}
 7420   ins_pipe( pipe_slow );
 7421 %}
 7422 
 7423 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7424   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7425   match(Set dst (MinD a b));
 7426   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7427 
 7428   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7429   ins_encode %{
 7430     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7431                     true /*min*/, false /*single*/);
 7432   %}
 7433   ins_pipe( pipe_slow );
 7434 %}
 7435 
 7436 // Load Effective Address
 7437 instruct leaP8(rRegP dst, indOffset8 mem)
 7438 %{
 7439   match(Set dst mem);
 7440 
 7441   ins_cost(110); // XXX
 7442   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7443   ins_encode %{
 7444     __ leaq($dst$$Register, $mem$$Address);
 7445   %}
 7446   ins_pipe(ialu_reg_reg_fat);
 7447 %}
 7448 
 7449 instruct leaP32(rRegP dst, indOffset32 mem)
 7450 %{
 7451   match(Set dst mem);
 7452 
 7453   ins_cost(110);
 7454   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7455   ins_encode %{
 7456     __ leaq($dst$$Register, $mem$$Address);
 7457   %}
 7458   ins_pipe(ialu_reg_reg_fat);
 7459 %}
 7460 
 7461 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7462 %{
 7463   match(Set dst mem);
 7464 
 7465   ins_cost(110);
 7466   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7467   ins_encode %{
 7468     __ leaq($dst$$Register, $mem$$Address);
 7469   %}
 7470   ins_pipe(ialu_reg_reg_fat);
 7471 %}
 7472 
 7473 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7474 %{
 7475   match(Set dst mem);
 7476 
 7477   ins_cost(110);
 7478   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7479   ins_encode %{
 7480     __ leaq($dst$$Register, $mem$$Address);
 7481   %}
 7482   ins_pipe(ialu_reg_reg_fat);
 7483 %}
 7484 
 7485 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7486 %{
 7487   match(Set dst mem);
 7488 
 7489   ins_cost(110);
 7490   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7491   ins_encode %{
 7492     __ leaq($dst$$Register, $mem$$Address);
 7493   %}
 7494   ins_pipe(ialu_reg_reg_fat);
 7495 %}
 7496 
 7497 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7498 %{
 7499   match(Set dst mem);
 7500 
 7501   ins_cost(110);
 7502   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7503   ins_encode %{
 7504     __ leaq($dst$$Register, $mem$$Address);
 7505   %}
 7506   ins_pipe(ialu_reg_reg_fat);
 7507 %}
 7508 
 7509 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7510 %{
 7511   match(Set dst mem);
 7512 
 7513   ins_cost(110);
 7514   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7515   ins_encode %{
 7516     __ leaq($dst$$Register, $mem$$Address);
 7517   %}
 7518   ins_pipe(ialu_reg_reg_fat);
 7519 %}
 7520 
 7521 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7522 %{
 7523   match(Set dst mem);
 7524 
 7525   ins_cost(110);
 7526   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7527   ins_encode %{
 7528     __ leaq($dst$$Register, $mem$$Address);
 7529   %}
 7530   ins_pipe(ialu_reg_reg_fat);
 7531 %}
 7532 
 7533 // Load Effective Address which uses Narrow (32-bits) oop
 7534 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7535 %{
 7536   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7537   match(Set dst mem);
 7538 
 7539   ins_cost(110);
 7540   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7541   ins_encode %{
 7542     __ leaq($dst$$Register, $mem$$Address);
 7543   %}
 7544   ins_pipe(ialu_reg_reg_fat);
 7545 %}
 7546 
 7547 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7548 %{
 7549   predicate(CompressedOops::shift() == 0);
 7550   match(Set dst mem);
 7551 
 7552   ins_cost(110); // XXX
 7553   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7554   ins_encode %{
 7555     __ leaq($dst$$Register, $mem$$Address);
 7556   %}
 7557   ins_pipe(ialu_reg_reg_fat);
 7558 %}
 7559 
 7560 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7561 %{
 7562   predicate(CompressedOops::shift() == 0);
 7563   match(Set dst mem);
 7564 
 7565   ins_cost(110);
 7566   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7567   ins_encode %{
 7568     __ leaq($dst$$Register, $mem$$Address);
 7569   %}
 7570   ins_pipe(ialu_reg_reg_fat);
 7571 %}
 7572 
 7573 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7574 %{
 7575   predicate(CompressedOops::shift() == 0);
 7576   match(Set dst mem);
 7577 
 7578   ins_cost(110);
 7579   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7580   ins_encode %{
 7581     __ leaq($dst$$Register, $mem$$Address);
 7582   %}
 7583   ins_pipe(ialu_reg_reg_fat);
 7584 %}
 7585 
 7586 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7587 %{
 7588   predicate(CompressedOops::shift() == 0);
 7589   match(Set dst mem);
 7590 
 7591   ins_cost(110);
 7592   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7593   ins_encode %{
 7594     __ leaq($dst$$Register, $mem$$Address);
 7595   %}
 7596   ins_pipe(ialu_reg_reg_fat);
 7597 %}
 7598 
 7599 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7600 %{
 7601   predicate(CompressedOops::shift() == 0);
 7602   match(Set dst mem);
 7603 
 7604   ins_cost(110);
 7605   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7606   ins_encode %{
 7607     __ leaq($dst$$Register, $mem$$Address);
 7608   %}
 7609   ins_pipe(ialu_reg_reg_fat);
 7610 %}
 7611 
 7612 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7613 %{
 7614   predicate(CompressedOops::shift() == 0);
 7615   match(Set dst mem);
 7616 
 7617   ins_cost(110);
 7618   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7619   ins_encode %{
 7620     __ leaq($dst$$Register, $mem$$Address);
 7621   %}
 7622   ins_pipe(ialu_reg_reg_fat);
 7623 %}
 7624 
 7625 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7626 %{
 7627   predicate(CompressedOops::shift() == 0);
 7628   match(Set dst mem);
 7629 
 7630   ins_cost(110);
 7631   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7632   ins_encode %{
 7633     __ leaq($dst$$Register, $mem$$Address);
 7634   %}
 7635   ins_pipe(ialu_reg_reg_fat);
 7636 %}
 7637 
 7638 instruct loadConI(rRegI dst, immI src)
 7639 %{
 7640   match(Set dst src);
 7641 
 7642   format %{ "movl    $dst, $src\t# int" %}
 7643   ins_encode %{
 7644     __ movl($dst$$Register, $src$$constant);
 7645   %}
 7646   ins_pipe(ialu_reg_fat); // XXX
 7647 %}
 7648 
 7649 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7650 %{
 7651   match(Set dst src);
 7652   effect(KILL cr);
 7653 
 7654   ins_cost(50);
 7655   format %{ "xorl    $dst, $dst\t# int" %}
 7656   ins_encode %{
 7657     __ xorl($dst$$Register, $dst$$Register);
 7658   %}
 7659   ins_pipe(ialu_reg);
 7660 %}
 7661 
 7662 instruct loadConL(rRegL dst, immL src)
 7663 %{
 7664   match(Set dst src);
 7665 
 7666   ins_cost(150);
 7667   format %{ "movq    $dst, $src\t# long" %}
 7668   ins_encode %{
 7669     __ mov64($dst$$Register, $src$$constant);
 7670   %}
 7671   ins_pipe(ialu_reg);
 7672 %}
 7673 
 7674 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7675 %{
 7676   match(Set dst src);
 7677   effect(KILL cr);
 7678 
 7679   ins_cost(50);
 7680   format %{ "xorl    $dst, $dst\t# long" %}
 7681   ins_encode %{
 7682     __ xorl($dst$$Register, $dst$$Register);
 7683   %}
 7684   ins_pipe(ialu_reg); // XXX
 7685 %}
 7686 
 7687 instruct loadConUL32(rRegL dst, immUL32 src)
 7688 %{
 7689   match(Set dst src);
 7690 
 7691   ins_cost(60);
 7692   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7693   ins_encode %{
 7694     __ movl($dst$$Register, $src$$constant);
 7695   %}
 7696   ins_pipe(ialu_reg);
 7697 %}
 7698 
 7699 instruct loadConL32(rRegL dst, immL32 src)
 7700 %{
 7701   match(Set dst src);
 7702 
 7703   ins_cost(70);
 7704   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7705   ins_encode %{
 7706     __ movq($dst$$Register, $src$$constant);
 7707   %}
 7708   ins_pipe(ialu_reg);
 7709 %}
 7710 
 7711 instruct loadConP(rRegP dst, immP con) %{
 7712   match(Set dst con);
 7713 
 7714   format %{ "movq    $dst, $con\t# ptr" %}
 7715   ins_encode %{
 7716     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7717   %}
 7718   ins_pipe(ialu_reg_fat); // XXX
 7719 %}
 7720 
 7721 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7722 %{
 7723   match(Set dst src);
 7724   effect(KILL cr);
 7725 
 7726   ins_cost(50);
 7727   format %{ "xorl    $dst, $dst\t# ptr" %}
 7728   ins_encode %{
 7729     __ xorl($dst$$Register, $dst$$Register);
 7730   %}
 7731   ins_pipe(ialu_reg);
 7732 %}
 7733 
 7734 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7735 %{
 7736   match(Set dst src);
 7737   effect(KILL cr);
 7738 
 7739   ins_cost(60);
 7740   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7741   ins_encode %{
 7742     __ movl($dst$$Register, $src$$constant);
 7743   %}
 7744   ins_pipe(ialu_reg);
 7745 %}
 7746 
 7747 instruct loadConF(regF dst, immF con) %{
 7748   match(Set dst con);
 7749   ins_cost(125);
 7750   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7751   ins_encode %{
 7752     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7753   %}
 7754   ins_pipe(pipe_slow);
 7755 %}
 7756 
 7757 instruct loadConH(regF dst, immH con) %{
 7758   match(Set dst con);
 7759   ins_cost(125);
 7760   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7761   ins_encode %{
 7762     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7763   %}
 7764   ins_pipe(pipe_slow);
 7765 %}
 7766 
 7767 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7768   match(Set dst src);
 7769   effect(KILL cr);
 7770   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7771   ins_encode %{
 7772     __ xorq($dst$$Register, $dst$$Register);
 7773   %}
 7774   ins_pipe(ialu_reg);
 7775 %}
 7776 
 7777 instruct loadConN(rRegN dst, immN src) %{
 7778   match(Set dst src);
 7779 
 7780   ins_cost(125);
 7781   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7782   ins_encode %{
 7783     address con = (address)$src$$constant;
 7784     if (con == nullptr) {
 7785       ShouldNotReachHere();
 7786     } else {
 7787       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7788     }
 7789   %}
 7790   ins_pipe(ialu_reg_fat); // XXX
 7791 %}
 7792 
 7793 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7794   match(Set dst src);
 7795 
 7796   ins_cost(125);
 7797   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7798   ins_encode %{
 7799     address con = (address)$src$$constant;
 7800     if (con == nullptr) {
 7801       ShouldNotReachHere();
 7802     } else {
 7803       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7804     }
 7805   %}
 7806   ins_pipe(ialu_reg_fat); // XXX
 7807 %}
 7808 
 7809 instruct loadConF0(regF dst, immF0 src)
 7810 %{
 7811   match(Set dst src);
 7812   ins_cost(100);
 7813 
 7814   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7815   ins_encode %{
 7816     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7817   %}
 7818   ins_pipe(pipe_slow);
 7819 %}
 7820 
 7821 // Use the same format since predicate() can not be used here.
 7822 instruct loadConD(regD dst, immD con) %{
 7823   match(Set dst con);
 7824   ins_cost(125);
 7825   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7826   ins_encode %{
 7827     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7828   %}
 7829   ins_pipe(pipe_slow);
 7830 %}
 7831 
 7832 instruct loadConD0(regD dst, immD0 src)
 7833 %{
 7834   match(Set dst src);
 7835   ins_cost(100);
 7836 
 7837   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7838   ins_encode %{
 7839     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7840   %}
 7841   ins_pipe(pipe_slow);
 7842 %}
 7843 
 7844 instruct loadSSI(rRegI dst, stackSlotI src)
 7845 %{
 7846   match(Set dst src);
 7847 
 7848   ins_cost(125);
 7849   format %{ "movl    $dst, $src\t# int stk" %}
 7850   ins_encode %{
 7851     __ movl($dst$$Register, $src$$Address);
 7852   %}
 7853   ins_pipe(ialu_reg_mem);
 7854 %}
 7855 
 7856 instruct loadSSL(rRegL dst, stackSlotL src)
 7857 %{
 7858   match(Set dst src);
 7859 
 7860   ins_cost(125);
 7861   format %{ "movq    $dst, $src\t# long stk" %}
 7862   ins_encode %{
 7863     __ movq($dst$$Register, $src$$Address);
 7864   %}
 7865   ins_pipe(ialu_reg_mem);
 7866 %}
 7867 
 7868 instruct loadSSP(rRegP dst, stackSlotP src)
 7869 %{
 7870   match(Set dst src);
 7871 
 7872   ins_cost(125);
 7873   format %{ "movq    $dst, $src\t# ptr stk" %}
 7874   ins_encode %{
 7875     __ movq($dst$$Register, $src$$Address);
 7876   %}
 7877   ins_pipe(ialu_reg_mem);
 7878 %}
 7879 
 7880 instruct loadSSF(regF dst, stackSlotF src)
 7881 %{
 7882   match(Set dst src);
 7883 
 7884   ins_cost(125);
 7885   format %{ "movss   $dst, $src\t# float stk" %}
 7886   ins_encode %{
 7887     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 7888   %}
 7889   ins_pipe(pipe_slow); // XXX
 7890 %}
 7891 
 7892 // Use the same format since predicate() can not be used here.
 7893 instruct loadSSD(regD dst, stackSlotD src)
 7894 %{
 7895   match(Set dst src);
 7896 
 7897   ins_cost(125);
 7898   format %{ "movsd   $dst, $src\t# double stk" %}
 7899   ins_encode  %{
 7900     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 7901   %}
 7902   ins_pipe(pipe_slow); // XXX
 7903 %}
 7904 
 7905 // Prefetch instructions for allocation.
 7906 // Must be safe to execute with invalid address (cannot fault).
 7907 
 7908 instruct prefetchAlloc( memory mem ) %{
 7909   predicate(AllocatePrefetchInstr==3);
 7910   match(PrefetchAllocation mem);
 7911   ins_cost(125);
 7912 
 7913   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 7914   ins_encode %{
 7915     __ prefetchw($mem$$Address);
 7916   %}
 7917   ins_pipe(ialu_mem);
 7918 %}
 7919 
 7920 instruct prefetchAllocNTA( memory mem ) %{
 7921   predicate(AllocatePrefetchInstr==0);
 7922   match(PrefetchAllocation mem);
 7923   ins_cost(125);
 7924 
 7925   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 7926   ins_encode %{
 7927     __ prefetchnta($mem$$Address);
 7928   %}
 7929   ins_pipe(ialu_mem);
 7930 %}
 7931 
 7932 instruct prefetchAllocT0( memory mem ) %{
 7933   predicate(AllocatePrefetchInstr==1);
 7934   match(PrefetchAllocation mem);
 7935   ins_cost(125);
 7936 
 7937   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 7938   ins_encode %{
 7939     __ prefetcht0($mem$$Address);
 7940   %}
 7941   ins_pipe(ialu_mem);
 7942 %}
 7943 
 7944 instruct prefetchAllocT2( memory mem ) %{
 7945   predicate(AllocatePrefetchInstr==2);
 7946   match(PrefetchAllocation mem);
 7947   ins_cost(125);
 7948 
 7949   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 7950   ins_encode %{
 7951     __ prefetcht2($mem$$Address);
 7952   %}
 7953   ins_pipe(ialu_mem);
 7954 %}
 7955 
 7956 //----------Store Instructions-------------------------------------------------
 7957 
 7958 // Store Byte
 7959 instruct storeB(memory mem, rRegI src)
 7960 %{
 7961   match(Set mem (StoreB mem src));
 7962 
 7963   ins_cost(125); // XXX
 7964   format %{ "movb    $mem, $src\t# byte" %}
 7965   ins_encode %{
 7966     __ movb($mem$$Address, $src$$Register);
 7967   %}
 7968   ins_pipe(ialu_mem_reg);
 7969 %}
 7970 
 7971 // Store Char/Short
 7972 instruct storeC(memory mem, rRegI src)
 7973 %{
 7974   match(Set mem (StoreC mem src));
 7975 
 7976   ins_cost(125); // XXX
 7977   format %{ "movw    $mem, $src\t# char/short" %}
 7978   ins_encode %{
 7979     __ movw($mem$$Address, $src$$Register);
 7980   %}
 7981   ins_pipe(ialu_mem_reg);
 7982 %}
 7983 
 7984 // Store Integer
 7985 instruct storeI(memory mem, rRegI src)
 7986 %{
 7987   match(Set mem (StoreI mem src));
 7988 
 7989   ins_cost(125); // XXX
 7990   format %{ "movl    $mem, $src\t# int" %}
 7991   ins_encode %{
 7992     __ movl($mem$$Address, $src$$Register);
 7993   %}
 7994   ins_pipe(ialu_mem_reg);
 7995 %}
 7996 
 7997 // Store Long
 7998 instruct storeL(memory mem, rRegL src)
 7999 %{
 8000   match(Set mem (StoreL mem src));
 8001 
 8002   ins_cost(125); // XXX
 8003   format %{ "movq    $mem, $src\t# long" %}
 8004   ins_encode %{
 8005     __ movq($mem$$Address, $src$$Register);
 8006   %}
 8007   ins_pipe(ialu_mem_reg); // XXX
 8008 %}
 8009 
 8010 // Store Pointer
 8011 instruct storeP(memory mem, any_RegP src)
 8012 %{
 8013   predicate(n->as_Store()->barrier_data() == 0);
 8014   match(Set mem (StoreP mem src));
 8015 
 8016   ins_cost(125); // XXX
 8017   format %{ "movq    $mem, $src\t# ptr" %}
 8018   ins_encode %{
 8019     __ movq($mem$$Address, $src$$Register);
 8020   %}
 8021   ins_pipe(ialu_mem_reg);
 8022 %}
 8023 
 8024 instruct storeImmP0(memory mem, immP0 zero)
 8025 %{
 8026   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 8027   match(Set mem (StoreP mem zero));
 8028 
 8029   ins_cost(125); // XXX
 8030   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 8031   ins_encode %{
 8032     __ movq($mem$$Address, r12);
 8033   %}
 8034   ins_pipe(ialu_mem_reg);
 8035 %}
 8036 
 8037 // Store Null Pointer, mark word, or other simple pointer constant.
 8038 instruct storeImmP(memory mem, immP31 src)
 8039 %{
 8040   predicate(n->as_Store()->barrier_data() == 0);
 8041   match(Set mem (StoreP mem src));
 8042 
 8043   ins_cost(150); // XXX
 8044   format %{ "movq    $mem, $src\t# ptr" %}
 8045   ins_encode %{
 8046     __ movq($mem$$Address, $src$$constant);
 8047   %}
 8048   ins_pipe(ialu_mem_imm);
 8049 %}
 8050 
 8051 // Store Compressed Pointer
 8052 instruct storeN(memory mem, rRegN src)
 8053 %{
 8054   predicate(n->as_Store()->barrier_data() == 0);
 8055   match(Set mem (StoreN mem src));
 8056 
 8057   ins_cost(125); // XXX
 8058   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8059   ins_encode %{
 8060     __ movl($mem$$Address, $src$$Register);
 8061   %}
 8062   ins_pipe(ialu_mem_reg);
 8063 %}
 8064 
 8065 instruct storeNKlass(memory mem, rRegN src)
 8066 %{
 8067   match(Set mem (StoreNKlass mem src));
 8068 
 8069   ins_cost(125); // XXX
 8070   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8071   ins_encode %{
 8072     __ movl($mem$$Address, $src$$Register);
 8073   %}
 8074   ins_pipe(ialu_mem_reg);
 8075 %}
 8076 
 8077 instruct storeImmN0(memory mem, immN0 zero)
 8078 %{
 8079   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8080   match(Set mem (StoreN mem zero));
 8081 
 8082   ins_cost(125); // XXX
 8083   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8084   ins_encode %{
 8085     __ movl($mem$$Address, r12);
 8086   %}
 8087   ins_pipe(ialu_mem_reg);
 8088 %}
 8089 
 8090 instruct storeImmN(memory mem, immN src)
 8091 %{
 8092   predicate(n->as_Store()->barrier_data() == 0);
 8093   match(Set mem (StoreN mem src));
 8094 
 8095   ins_cost(150); // XXX
 8096   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8097   ins_encode %{
 8098     address con = (address)$src$$constant;
 8099     if (con == nullptr) {
 8100       __ movl($mem$$Address, 0);
 8101     } else {
 8102       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8103     }
 8104   %}
 8105   ins_pipe(ialu_mem_imm);
 8106 %}
 8107 
 8108 instruct storeImmNKlass(memory mem, immNKlass src)
 8109 %{
 8110   match(Set mem (StoreNKlass mem src));
 8111 
 8112   ins_cost(150); // XXX
 8113   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8114   ins_encode %{
 8115     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8116   %}
 8117   ins_pipe(ialu_mem_imm);
 8118 %}
 8119 
 8120 // Store Integer Immediate
 8121 instruct storeImmI0(memory mem, immI_0 zero)
 8122 %{
 8123   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8124   match(Set mem (StoreI mem zero));
 8125 
 8126   ins_cost(125); // XXX
 8127   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8128   ins_encode %{
 8129     __ movl($mem$$Address, r12);
 8130   %}
 8131   ins_pipe(ialu_mem_reg);
 8132 %}
 8133 
 8134 instruct storeImmI(memory mem, immI src)
 8135 %{
 8136   match(Set mem (StoreI mem src));
 8137 
 8138   ins_cost(150);
 8139   format %{ "movl    $mem, $src\t# int" %}
 8140   ins_encode %{
 8141     __ movl($mem$$Address, $src$$constant);
 8142   %}
 8143   ins_pipe(ialu_mem_imm);
 8144 %}
 8145 
 8146 // Store Long Immediate
 8147 instruct storeImmL0(memory mem, immL0 zero)
 8148 %{
 8149   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8150   match(Set mem (StoreL mem zero));
 8151 
 8152   ins_cost(125); // XXX
 8153   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8154   ins_encode %{
 8155     __ movq($mem$$Address, r12);
 8156   %}
 8157   ins_pipe(ialu_mem_reg);
 8158 %}
 8159 
 8160 instruct storeImmL(memory mem, immL32 src)
 8161 %{
 8162   match(Set mem (StoreL mem src));
 8163 
 8164   ins_cost(150);
 8165   format %{ "movq    $mem, $src\t# long" %}
 8166   ins_encode %{
 8167     __ movq($mem$$Address, $src$$constant);
 8168   %}
 8169   ins_pipe(ialu_mem_imm);
 8170 %}
 8171 
 8172 // Store Short/Char Immediate
 8173 instruct storeImmC0(memory mem, immI_0 zero)
 8174 %{
 8175   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8176   match(Set mem (StoreC mem zero));
 8177 
 8178   ins_cost(125); // XXX
 8179   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8180   ins_encode %{
 8181     __ movw($mem$$Address, r12);
 8182   %}
 8183   ins_pipe(ialu_mem_reg);
 8184 %}
 8185 
 8186 instruct storeImmI16(memory mem, immI16 src)
 8187 %{
 8188   predicate(UseStoreImmI16);
 8189   match(Set mem (StoreC mem src));
 8190 
 8191   ins_cost(150);
 8192   format %{ "movw    $mem, $src\t# short/char" %}
 8193   ins_encode %{
 8194     __ movw($mem$$Address, $src$$constant);
 8195   %}
 8196   ins_pipe(ialu_mem_imm);
 8197 %}
 8198 
 8199 // Store Byte Immediate
 8200 instruct storeImmB0(memory mem, immI_0 zero)
 8201 %{
 8202   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8203   match(Set mem (StoreB mem zero));
 8204 
 8205   ins_cost(125); // XXX
 8206   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8207   ins_encode %{
 8208     __ movb($mem$$Address, r12);
 8209   %}
 8210   ins_pipe(ialu_mem_reg);
 8211 %}
 8212 
 8213 instruct storeImmB(memory mem, immI8 src)
 8214 %{
 8215   match(Set mem (StoreB mem src));
 8216 
 8217   ins_cost(150); // XXX
 8218   format %{ "movb    $mem, $src\t# byte" %}
 8219   ins_encode %{
 8220     __ movb($mem$$Address, $src$$constant);
 8221   %}
 8222   ins_pipe(ialu_mem_imm);
 8223 %}
 8224 
 8225 // Store Float
 8226 instruct storeF(memory mem, regF src)
 8227 %{
 8228   match(Set mem (StoreF mem src));
 8229 
 8230   ins_cost(95); // XXX
 8231   format %{ "movss   $mem, $src\t# float" %}
 8232   ins_encode %{
 8233     __ movflt($mem$$Address, $src$$XMMRegister);
 8234   %}
 8235   ins_pipe(pipe_slow); // XXX
 8236 %}
 8237 
 8238 // Store immediate Float value (it is faster than store from XMM register)
 8239 instruct storeF0(memory mem, immF0 zero)
 8240 %{
 8241   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8242   match(Set mem (StoreF mem zero));
 8243 
 8244   ins_cost(25); // XXX
 8245   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8246   ins_encode %{
 8247     __ movl($mem$$Address, r12);
 8248   %}
 8249   ins_pipe(ialu_mem_reg);
 8250 %}
 8251 
 8252 instruct storeF_imm(memory mem, immF src)
 8253 %{
 8254   match(Set mem (StoreF mem src));
 8255 
 8256   ins_cost(50);
 8257   format %{ "movl    $mem, $src\t# float" %}
 8258   ins_encode %{
 8259     __ movl($mem$$Address, jint_cast($src$$constant));
 8260   %}
 8261   ins_pipe(ialu_mem_imm);
 8262 %}
 8263 
 8264 // Store Double
 8265 instruct storeD(memory mem, regD src)
 8266 %{
 8267   match(Set mem (StoreD mem src));
 8268 
 8269   ins_cost(95); // XXX
 8270   format %{ "movsd   $mem, $src\t# double" %}
 8271   ins_encode %{
 8272     __ movdbl($mem$$Address, $src$$XMMRegister);
 8273   %}
 8274   ins_pipe(pipe_slow); // XXX
 8275 %}
 8276 
 8277 // Store immediate double 0.0 (it is faster than store from XMM register)
 8278 instruct storeD0_imm(memory mem, immD0 src)
 8279 %{
 8280   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8281   match(Set mem (StoreD mem src));
 8282 
 8283   ins_cost(50);
 8284   format %{ "movq    $mem, $src\t# double 0." %}
 8285   ins_encode %{
 8286     __ movq($mem$$Address, $src$$constant);
 8287   %}
 8288   ins_pipe(ialu_mem_imm);
 8289 %}
 8290 
 8291 instruct storeD0(memory mem, immD0 zero)
 8292 %{
 8293   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8294   match(Set mem (StoreD mem zero));
 8295 
 8296   ins_cost(25); // XXX
 8297   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8298   ins_encode %{
 8299     __ movq($mem$$Address, r12);
 8300   %}
 8301   ins_pipe(ialu_mem_reg);
 8302 %}
 8303 
 8304 instruct storeSSI(stackSlotI dst, rRegI src)
 8305 %{
 8306   match(Set dst src);
 8307 
 8308   ins_cost(100);
 8309   format %{ "movl    $dst, $src\t# int stk" %}
 8310   ins_encode %{
 8311     __ movl($dst$$Address, $src$$Register);
 8312   %}
 8313   ins_pipe( ialu_mem_reg );
 8314 %}
 8315 
 8316 instruct storeSSL(stackSlotL dst, rRegL src)
 8317 %{
 8318   match(Set dst src);
 8319 
 8320   ins_cost(100);
 8321   format %{ "movq    $dst, $src\t# long stk" %}
 8322   ins_encode %{
 8323     __ movq($dst$$Address, $src$$Register);
 8324   %}
 8325   ins_pipe(ialu_mem_reg);
 8326 %}
 8327 
 8328 instruct storeSSP(stackSlotP dst, rRegP src)
 8329 %{
 8330   match(Set dst src);
 8331 
 8332   ins_cost(100);
 8333   format %{ "movq    $dst, $src\t# ptr stk" %}
 8334   ins_encode %{
 8335     __ movq($dst$$Address, $src$$Register);
 8336   %}
 8337   ins_pipe(ialu_mem_reg);
 8338 %}
 8339 
 8340 instruct storeSSF(stackSlotF dst, regF src)
 8341 %{
 8342   match(Set dst src);
 8343 
 8344   ins_cost(95); // XXX
 8345   format %{ "movss   $dst, $src\t# float stk" %}
 8346   ins_encode %{
 8347     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8348   %}
 8349   ins_pipe(pipe_slow); // XXX
 8350 %}
 8351 
 8352 instruct storeSSD(stackSlotD dst, regD src)
 8353 %{
 8354   match(Set dst src);
 8355 
 8356   ins_cost(95); // XXX
 8357   format %{ "movsd   $dst, $src\t# double stk" %}
 8358   ins_encode %{
 8359     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8360   %}
 8361   ins_pipe(pipe_slow); // XXX
 8362 %}
 8363 
 8364 instruct cacheWB(indirect addr)
 8365 %{
 8366   predicate(VM_Version::supports_data_cache_line_flush());
 8367   match(CacheWB addr);
 8368 
 8369   ins_cost(100);
 8370   format %{"cache wb $addr" %}
 8371   ins_encode %{
 8372     assert($addr->index_position() < 0, "should be");
 8373     assert($addr$$disp == 0, "should be");
 8374     __ cache_wb(Address($addr$$base$$Register, 0));
 8375   %}
 8376   ins_pipe(pipe_slow); // XXX
 8377 %}
 8378 
 8379 instruct cacheWBPreSync()
 8380 %{
 8381   predicate(VM_Version::supports_data_cache_line_flush());
 8382   match(CacheWBPreSync);
 8383 
 8384   ins_cost(100);
 8385   format %{"cache wb presync" %}
 8386   ins_encode %{
 8387     __ cache_wbsync(true);
 8388   %}
 8389   ins_pipe(pipe_slow); // XXX
 8390 %}
 8391 
 8392 instruct cacheWBPostSync()
 8393 %{
 8394   predicate(VM_Version::supports_data_cache_line_flush());
 8395   match(CacheWBPostSync);
 8396 
 8397   ins_cost(100);
 8398   format %{"cache wb postsync" %}
 8399   ins_encode %{
 8400     __ cache_wbsync(false);
 8401   %}
 8402   ins_pipe(pipe_slow); // XXX
 8403 %}
 8404 
 8405 //----------BSWAP Instructions-------------------------------------------------
 8406 instruct bytes_reverse_int(rRegI dst) %{
 8407   match(Set dst (ReverseBytesI dst));
 8408 
 8409   format %{ "bswapl  $dst" %}
 8410   ins_encode %{
 8411     __ bswapl($dst$$Register);
 8412   %}
 8413   ins_pipe( ialu_reg );
 8414 %}
 8415 
 8416 instruct bytes_reverse_long(rRegL dst) %{
 8417   match(Set dst (ReverseBytesL dst));
 8418 
 8419   format %{ "bswapq  $dst" %}
 8420   ins_encode %{
 8421     __ bswapq($dst$$Register);
 8422   %}
 8423   ins_pipe( ialu_reg);
 8424 %}
 8425 
 8426 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8427   match(Set dst (ReverseBytesUS dst));
 8428   effect(KILL cr);
 8429 
 8430   format %{ "bswapl  $dst\n\t"
 8431             "shrl    $dst,16\n\t" %}
 8432   ins_encode %{
 8433     __ bswapl($dst$$Register);
 8434     __ shrl($dst$$Register, 16);
 8435   %}
 8436   ins_pipe( ialu_reg );
 8437 %}
 8438 
 8439 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8440   match(Set dst (ReverseBytesS dst));
 8441   effect(KILL cr);
 8442 
 8443   format %{ "bswapl  $dst\n\t"
 8444             "sar     $dst,16\n\t" %}
 8445   ins_encode %{
 8446     __ bswapl($dst$$Register);
 8447     __ sarl($dst$$Register, 16);
 8448   %}
 8449   ins_pipe( ialu_reg );
 8450 %}
 8451 
 8452 //---------- Zeros Count Instructions ------------------------------------------
 8453 
 8454 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8455   predicate(UseCountLeadingZerosInstruction);
 8456   match(Set dst (CountLeadingZerosI src));
 8457   effect(KILL cr);
 8458 
 8459   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8460   ins_encode %{
 8461     __ lzcntl($dst$$Register, $src$$Register);
 8462   %}
 8463   ins_pipe(ialu_reg);
 8464 %}
 8465 
 8466 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8467   predicate(UseCountLeadingZerosInstruction);
 8468   match(Set dst (CountLeadingZerosI (LoadI src)));
 8469   effect(KILL cr);
 8470   ins_cost(175);
 8471   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8472   ins_encode %{
 8473     __ lzcntl($dst$$Register, $src$$Address);
 8474   %}
 8475   ins_pipe(ialu_reg_mem);
 8476 %}
 8477 
 8478 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8479   predicate(!UseCountLeadingZerosInstruction);
 8480   match(Set dst (CountLeadingZerosI src));
 8481   effect(KILL cr);
 8482 
 8483   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8484             "jnz     skip\n\t"
 8485             "movl    $dst, -1\n"
 8486       "skip:\n\t"
 8487             "negl    $dst\n\t"
 8488             "addl    $dst, 31" %}
 8489   ins_encode %{
 8490     Register Rdst = $dst$$Register;
 8491     Register Rsrc = $src$$Register;
 8492     Label skip;
 8493     __ bsrl(Rdst, Rsrc);
 8494     __ jccb(Assembler::notZero, skip);
 8495     __ movl(Rdst, -1);
 8496     __ bind(skip);
 8497     __ negl(Rdst);
 8498     __ addl(Rdst, BitsPerInt - 1);
 8499   %}
 8500   ins_pipe(ialu_reg);
 8501 %}
 8502 
 8503 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8504   predicate(UseCountLeadingZerosInstruction);
 8505   match(Set dst (CountLeadingZerosL src));
 8506   effect(KILL cr);
 8507 
 8508   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8509   ins_encode %{
 8510     __ lzcntq($dst$$Register, $src$$Register);
 8511   %}
 8512   ins_pipe(ialu_reg);
 8513 %}
 8514 
 8515 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8516   predicate(UseCountLeadingZerosInstruction);
 8517   match(Set dst (CountLeadingZerosL (LoadL src)));
 8518   effect(KILL cr);
 8519   ins_cost(175);
 8520   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8521   ins_encode %{
 8522     __ lzcntq($dst$$Register, $src$$Address);
 8523   %}
 8524   ins_pipe(ialu_reg_mem);
 8525 %}
 8526 
 8527 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8528   predicate(!UseCountLeadingZerosInstruction);
 8529   match(Set dst (CountLeadingZerosL src));
 8530   effect(KILL cr);
 8531 
 8532   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8533             "jnz     skip\n\t"
 8534             "movl    $dst, -1\n"
 8535       "skip:\n\t"
 8536             "negl    $dst\n\t"
 8537             "addl    $dst, 63" %}
 8538   ins_encode %{
 8539     Register Rdst = $dst$$Register;
 8540     Register Rsrc = $src$$Register;
 8541     Label skip;
 8542     __ bsrq(Rdst, Rsrc);
 8543     __ jccb(Assembler::notZero, skip);
 8544     __ movl(Rdst, -1);
 8545     __ bind(skip);
 8546     __ negl(Rdst);
 8547     __ addl(Rdst, BitsPerLong - 1);
 8548   %}
 8549   ins_pipe(ialu_reg);
 8550 %}
 8551 
 8552 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8553   predicate(UseCountTrailingZerosInstruction);
 8554   match(Set dst (CountTrailingZerosI src));
 8555   effect(KILL cr);
 8556 
 8557   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8558   ins_encode %{
 8559     __ tzcntl($dst$$Register, $src$$Register);
 8560   %}
 8561   ins_pipe(ialu_reg);
 8562 %}
 8563 
 8564 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8565   predicate(UseCountTrailingZerosInstruction);
 8566   match(Set dst (CountTrailingZerosI (LoadI src)));
 8567   effect(KILL cr);
 8568   ins_cost(175);
 8569   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8570   ins_encode %{
 8571     __ tzcntl($dst$$Register, $src$$Address);
 8572   %}
 8573   ins_pipe(ialu_reg_mem);
 8574 %}
 8575 
 8576 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8577   predicate(!UseCountTrailingZerosInstruction);
 8578   match(Set dst (CountTrailingZerosI src));
 8579   effect(KILL cr);
 8580 
 8581   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8582             "jnz     done\n\t"
 8583             "movl    $dst, 32\n"
 8584       "done:" %}
 8585   ins_encode %{
 8586     Register Rdst = $dst$$Register;
 8587     Label done;
 8588     __ bsfl(Rdst, $src$$Register);
 8589     __ jccb(Assembler::notZero, done);
 8590     __ movl(Rdst, BitsPerInt);
 8591     __ bind(done);
 8592   %}
 8593   ins_pipe(ialu_reg);
 8594 %}
 8595 
 8596 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8597   predicate(UseCountTrailingZerosInstruction);
 8598   match(Set dst (CountTrailingZerosL src));
 8599   effect(KILL cr);
 8600 
 8601   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8602   ins_encode %{
 8603     __ tzcntq($dst$$Register, $src$$Register);
 8604   %}
 8605   ins_pipe(ialu_reg);
 8606 %}
 8607 
 8608 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8609   predicate(UseCountTrailingZerosInstruction);
 8610   match(Set dst (CountTrailingZerosL (LoadL src)));
 8611   effect(KILL cr);
 8612   ins_cost(175);
 8613   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8614   ins_encode %{
 8615     __ tzcntq($dst$$Register, $src$$Address);
 8616   %}
 8617   ins_pipe(ialu_reg_mem);
 8618 %}
 8619 
 8620 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8621   predicate(!UseCountTrailingZerosInstruction);
 8622   match(Set dst (CountTrailingZerosL src));
 8623   effect(KILL cr);
 8624 
 8625   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8626             "jnz     done\n\t"
 8627             "movl    $dst, 64\n"
 8628       "done:" %}
 8629   ins_encode %{
 8630     Register Rdst = $dst$$Register;
 8631     Label done;
 8632     __ bsfq(Rdst, $src$$Register);
 8633     __ jccb(Assembler::notZero, done);
 8634     __ movl(Rdst, BitsPerLong);
 8635     __ bind(done);
 8636   %}
 8637   ins_pipe(ialu_reg);
 8638 %}
 8639 
 8640 //--------------- Reverse Operation Instructions ----------------
 8641 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8642   predicate(!VM_Version::supports_gfni());
 8643   match(Set dst (ReverseI src));
 8644   effect(TEMP dst, TEMP rtmp, KILL cr);
 8645   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8646   ins_encode %{
 8647     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8648   %}
 8649   ins_pipe( ialu_reg );
 8650 %}
 8651 
 8652 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8653   predicate(VM_Version::supports_gfni());
 8654   match(Set dst (ReverseI src));
 8655   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8656   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8657   ins_encode %{
 8658     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8659   %}
 8660   ins_pipe( ialu_reg );
 8661 %}
 8662 
 8663 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8664   predicate(!VM_Version::supports_gfni());
 8665   match(Set dst (ReverseL src));
 8666   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8667   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8668   ins_encode %{
 8669     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8670   %}
 8671   ins_pipe( ialu_reg );
 8672 %}
 8673 
 8674 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8675   predicate(VM_Version::supports_gfni());
 8676   match(Set dst (ReverseL src));
 8677   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8678   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8679   ins_encode %{
 8680     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8681   %}
 8682   ins_pipe( ialu_reg );
 8683 %}
 8684 
 8685 //---------- Population Count Instructions -------------------------------------
 8686 
 8687 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8688   predicate(UsePopCountInstruction);
 8689   match(Set dst (PopCountI src));
 8690   effect(KILL cr);
 8691 
 8692   format %{ "popcnt  $dst, $src" %}
 8693   ins_encode %{
 8694     __ popcntl($dst$$Register, $src$$Register);
 8695   %}
 8696   ins_pipe(ialu_reg);
 8697 %}
 8698 
 8699 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8700   predicate(UsePopCountInstruction);
 8701   match(Set dst (PopCountI (LoadI mem)));
 8702   effect(KILL cr);
 8703 
 8704   format %{ "popcnt  $dst, $mem" %}
 8705   ins_encode %{
 8706     __ popcntl($dst$$Register, $mem$$Address);
 8707   %}
 8708   ins_pipe(ialu_reg);
 8709 %}
 8710 
 8711 // Note: Long.bitCount(long) returns an int.
 8712 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8713   predicate(UsePopCountInstruction);
 8714   match(Set dst (PopCountL src));
 8715   effect(KILL cr);
 8716 
 8717   format %{ "popcnt  $dst, $src" %}
 8718   ins_encode %{
 8719     __ popcntq($dst$$Register, $src$$Register);
 8720   %}
 8721   ins_pipe(ialu_reg);
 8722 %}
 8723 
 8724 // Note: Long.bitCount(long) returns an int.
 8725 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8726   predicate(UsePopCountInstruction);
 8727   match(Set dst (PopCountL (LoadL mem)));
 8728   effect(KILL cr);
 8729 
 8730   format %{ "popcnt  $dst, $mem" %}
 8731   ins_encode %{
 8732     __ popcntq($dst$$Register, $mem$$Address);
 8733   %}
 8734   ins_pipe(ialu_reg);
 8735 %}
 8736 
 8737 
 8738 //----------MemBar Instructions-----------------------------------------------
 8739 // Memory barrier flavors
 8740 
 8741 instruct membar_acquire()
 8742 %{
 8743   match(MemBarAcquire);
 8744   match(LoadFence);
 8745   ins_cost(0);
 8746 
 8747   size(0);
 8748   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8749   ins_encode();
 8750   ins_pipe(empty);
 8751 %}
 8752 
 8753 instruct membar_acquire_lock()
 8754 %{
 8755   match(MemBarAcquireLock);
 8756   ins_cost(0);
 8757 
 8758   size(0);
 8759   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8760   ins_encode();
 8761   ins_pipe(empty);
 8762 %}
 8763 
 8764 instruct membar_release()
 8765 %{
 8766   match(MemBarRelease);
 8767   match(StoreFence);
 8768   ins_cost(0);
 8769 
 8770   size(0);
 8771   format %{ "MEMBAR-release ! (empty encoding)" %}
 8772   ins_encode();
 8773   ins_pipe(empty);
 8774 %}
 8775 
 8776 instruct membar_release_lock()
 8777 %{
 8778   match(MemBarReleaseLock);
 8779   ins_cost(0);
 8780 
 8781   size(0);
 8782   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8783   ins_encode();
 8784   ins_pipe(empty);
 8785 %}
 8786 
 8787 instruct membar_volatile(rFlagsReg cr) %{
 8788   match(MemBarVolatile);
 8789   effect(KILL cr);
 8790   ins_cost(400);
 8791 
 8792   format %{
 8793     $$template
 8794     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8795   %}
 8796   ins_encode %{
 8797     __ membar(Assembler::StoreLoad);
 8798   %}
 8799   ins_pipe(pipe_slow);
 8800 %}
 8801 
 8802 instruct unnecessary_membar_volatile()
 8803 %{
 8804   match(MemBarVolatile);
 8805   predicate(Matcher::post_store_load_barrier(n));
 8806   ins_cost(0);
 8807 
 8808   size(0);
 8809   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8810   ins_encode();
 8811   ins_pipe(empty);
 8812 %}
 8813 
 8814 instruct membar_storestore() %{
 8815   match(MemBarStoreStore);
 8816   match(StoreStoreFence);
 8817   ins_cost(0);
 8818 
 8819   size(0);
 8820   format %{ "MEMBAR-storestore (empty encoding)" %}
 8821   ins_encode( );
 8822   ins_pipe(empty);
 8823 %}
 8824 
 8825 //----------Move Instructions--------------------------------------------------
 8826 
 8827 instruct castX2P(rRegP dst, rRegL src)
 8828 %{
 8829   match(Set dst (CastX2P src));
 8830 
 8831   format %{ "movq    $dst, $src\t# long->ptr" %}
 8832   ins_encode %{
 8833     if ($dst$$reg != $src$$reg) {
 8834       __ movptr($dst$$Register, $src$$Register);
 8835     }
 8836   %}
 8837   ins_pipe(ialu_reg_reg); // XXX
 8838 %}
 8839 
 8840 instruct castP2X(rRegL dst, rRegP src)
 8841 %{
 8842   match(Set dst (CastP2X src));
 8843 
 8844   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8845   ins_encode %{
 8846     if ($dst$$reg != $src$$reg) {
 8847       __ movptr($dst$$Register, $src$$Register);
 8848     }
 8849   %}
 8850   ins_pipe(ialu_reg_reg); // XXX
 8851 %}
 8852 
 8853 // Convert oop into int for vectors alignment masking
 8854 instruct convP2I(rRegI dst, rRegP src)
 8855 %{
 8856   match(Set dst (ConvL2I (CastP2X src)));
 8857 
 8858   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8859   ins_encode %{
 8860     __ movl($dst$$Register, $src$$Register);
 8861   %}
 8862   ins_pipe(ialu_reg_reg); // XXX
 8863 %}
 8864 
 8865 // Convert compressed oop into int for vectors alignment masking
 8866 // in case of 32bit oops (heap < 4Gb).
 8867 instruct convN2I(rRegI dst, rRegN src)
 8868 %{
 8869   predicate(CompressedOops::shift() == 0);
 8870   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 8871 
 8872   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 8873   ins_encode %{
 8874     __ movl($dst$$Register, $src$$Register);
 8875   %}
 8876   ins_pipe(ialu_reg_reg); // XXX
 8877 %}
 8878 
 8879 // Convert oop pointer into compressed form
 8880 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 8881   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 8882   match(Set dst (EncodeP src));
 8883   effect(KILL cr);
 8884   format %{ "encode_heap_oop $dst,$src" %}
 8885   ins_encode %{
 8886     Register s = $src$$Register;
 8887     Register d = $dst$$Register;
 8888     if (s != d) {
 8889       __ movq(d, s);
 8890     }
 8891     __ encode_heap_oop(d);
 8892   %}
 8893   ins_pipe(ialu_reg_long);
 8894 %}
 8895 
 8896 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8897   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 8898   match(Set dst (EncodeP src));
 8899   effect(KILL cr);
 8900   format %{ "encode_heap_oop_not_null $dst,$src" %}
 8901   ins_encode %{
 8902     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 8903   %}
 8904   ins_pipe(ialu_reg_long);
 8905 %}
 8906 
 8907 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 8908   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 8909             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 8910   match(Set dst (DecodeN src));
 8911   effect(KILL cr);
 8912   format %{ "decode_heap_oop $dst,$src" %}
 8913   ins_encode %{
 8914     Register s = $src$$Register;
 8915     Register d = $dst$$Register;
 8916     if (s != d) {
 8917       __ movq(d, s);
 8918     }
 8919     __ decode_heap_oop(d);
 8920   %}
 8921   ins_pipe(ialu_reg_long);
 8922 %}
 8923 
 8924 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 8925   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 8926             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 8927   match(Set dst (DecodeN src));
 8928   effect(KILL cr);
 8929   format %{ "decode_heap_oop_not_null $dst,$src" %}
 8930   ins_encode %{
 8931     Register s = $src$$Register;
 8932     Register d = $dst$$Register;
 8933     if (s != d) {
 8934       __ decode_heap_oop_not_null(d, s);
 8935     } else {
 8936       __ decode_heap_oop_not_null(d);
 8937     }
 8938   %}
 8939   ins_pipe(ialu_reg_long);
 8940 %}
 8941 
 8942 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8943   match(Set dst (EncodePKlass src));
 8944   effect(TEMP dst, KILL cr);
 8945   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 8946   ins_encode %{
 8947     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 8948   %}
 8949   ins_pipe(ialu_reg_long);
 8950 %}
 8951 
 8952 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 8953   match(Set dst (DecodeNKlass src));
 8954   effect(TEMP dst, KILL cr);
 8955   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 8956   ins_encode %{
 8957     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 8958   %}
 8959   ins_pipe(ialu_reg_long);
 8960 %}
 8961 
 8962 //----------Conditional Move---------------------------------------------------
 8963 // Jump
 8964 // dummy instruction for generating temp registers
 8965 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 8966   match(Jump (LShiftL switch_val shift));
 8967   ins_cost(350);
 8968   predicate(false);
 8969   effect(TEMP dest);
 8970 
 8971   format %{ "leaq    $dest, [$constantaddress]\n\t"
 8972             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 8973   ins_encode %{
 8974     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 8975     // to do that and the compiler is using that register as one it can allocate.
 8976     // So we build it all by hand.
 8977     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 8978     // ArrayAddress dispatch(table, index);
 8979     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 8980     __ lea($dest$$Register, $constantaddress);
 8981     __ jmp(dispatch);
 8982   %}
 8983   ins_pipe(pipe_jmp);
 8984 %}
 8985 
 8986 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 8987   match(Jump (AddL (LShiftL switch_val shift) offset));
 8988   ins_cost(350);
 8989   effect(TEMP dest);
 8990 
 8991   format %{ "leaq    $dest, [$constantaddress]\n\t"
 8992             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 8993   ins_encode %{
 8994     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 8995     // to do that and the compiler is using that register as one it can allocate.
 8996     // So we build it all by hand.
 8997     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 8998     // ArrayAddress dispatch(table, index);
 8999     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9000     __ lea($dest$$Register, $constantaddress);
 9001     __ jmp(dispatch);
 9002   %}
 9003   ins_pipe(pipe_jmp);
 9004 %}
 9005 
 9006 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 9007   match(Jump switch_val);
 9008   ins_cost(350);
 9009   effect(TEMP dest);
 9010 
 9011   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9012             "jmp     [$dest + $switch_val]\n\t" %}
 9013   ins_encode %{
 9014     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9015     // to do that and the compiler is using that register as one it can allocate.
 9016     // So we build it all by hand.
 9017     // Address index(noreg, switch_reg, Address::times_1);
 9018     // ArrayAddress dispatch(table, index);
 9019     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 9020     __ lea($dest$$Register, $constantaddress);
 9021     __ jmp(dispatch);
 9022   %}
 9023   ins_pipe(pipe_jmp);
 9024 %}
 9025 
 9026 // Conditional move
 9027 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 9028 %{
 9029   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9030   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9031 
 9032   ins_cost(100); // XXX
 9033   format %{ "setbn$cop $dst\t# signed, int" %}
 9034   ins_encode %{
 9035     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9036     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9037   %}
 9038   ins_pipe(ialu_reg);
 9039 %}
 9040 
 9041 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 9042 %{
 9043   predicate(!UseAPX);
 9044   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9045 
 9046   ins_cost(200); // XXX
 9047   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9048   ins_encode %{
 9049     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9050   %}
 9051   ins_pipe(pipe_cmov_reg);
 9052 %}
 9053 
 9054 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 9055 %{
 9056   predicate(UseAPX);
 9057   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9058 
 9059   ins_cost(200);
 9060   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9061   ins_encode %{
 9062     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9063   %}
 9064   ins_pipe(pipe_cmov_reg);
 9065 %}
 9066 
 9067 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 9068 %{
 9069   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9070   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9071 
 9072   ins_cost(100); // XXX
 9073   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9074   ins_encode %{
 9075     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9076     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9077   %}
 9078   ins_pipe(ialu_reg);
 9079 %}
 9080 
 9081 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9082   predicate(!UseAPX);
 9083   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9084 
 9085   ins_cost(200); // XXX
 9086   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9087   ins_encode %{
 9088     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9089   %}
 9090   ins_pipe(pipe_cmov_reg);
 9091 %}
 9092 
 9093 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9094   predicate(UseAPX);
 9095   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9096 
 9097   ins_cost(200);
 9098   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9099   ins_encode %{
 9100     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9101   %}
 9102   ins_pipe(pipe_cmov_reg);
 9103 %}
 9104 
 9105 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9106 %{
 9107   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9108   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9109 
 9110   ins_cost(100); // XXX
 9111   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9112   ins_encode %{
 9113     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9114     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9115   %}
 9116   ins_pipe(ialu_reg);
 9117 %}
 9118 
 9119 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9120   predicate(!UseAPX);
 9121   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9122   ins_cost(200);
 9123   expand %{
 9124     cmovI_regU(cop, cr, dst, src);
 9125   %}
 9126 %}
 9127 
 9128 instruct cmovI_regUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, rRegI src2) %{
 9129   predicate(UseAPX);
 9130   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9131   ins_cost(200);
 9132   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9133   ins_encode %{
 9134     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9135   %}
 9136   ins_pipe(pipe_cmov_reg);
 9137 %}
 9138 
 9139 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9140   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9141   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9142 
 9143   ins_cost(200); // XXX
 9144   format %{ "cmovpl  $dst, $src\n\t"
 9145             "cmovnel $dst, $src" %}
 9146   ins_encode %{
 9147     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9148     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9149   %}
 9150   ins_pipe(pipe_cmov_reg);
 9151 %}
 9152 
 9153 instruct cmovI_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
 9154   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9155   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9156   effect(TEMP dst);
 9157 
 9158   ins_cost(200);
 9159   format %{ "ecmovpl  $dst, $src1, $src2\n\t"
 9160             "cmovnel  $dst, $src2" %}
 9161   ins_encode %{
 9162     __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9163     __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9164   %}
 9165   ins_pipe(pipe_cmov_reg);
 9166 %}
 9167 
 9168 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9169 // inputs of the CMove
 9170 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9171   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9172   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9173   effect(TEMP dst);
 9174 
 9175   ins_cost(200); // XXX
 9176   format %{ "cmovpl  $dst, $src\n\t"
 9177             "cmovnel $dst, $src" %}
 9178   ins_encode %{
 9179     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9180     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9181   %}
 9182   ins_pipe(pipe_cmov_reg);
 9183 %}
 9184 
 9185 // We need this special handling for only eq / neq comparison since NaN == NaN is false,
 9186 // and parity flag bit is set if any of the operand is a NaN.
 9187 instruct cmovI_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
 9188   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9189   match(Set dst (CMoveI (Binary cop cr) (Binary src2 src1)));
 9190   effect(TEMP dst);
 9191 
 9192   ins_cost(200);
 9193   format %{ "ecmovpl  $dst, $src1, $src2\n\t"
 9194             "cmovnel  $dst, $src2" %}
 9195   ins_encode %{
 9196     __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9197     __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9198   %}
 9199   ins_pipe(pipe_cmov_reg);
 9200 %}
 9201 
 9202 // Conditional move
 9203 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9204   predicate(!UseAPX);
 9205   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9206 
 9207   ins_cost(250); // XXX
 9208   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9209   ins_encode %{
 9210     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9211   %}
 9212   ins_pipe(pipe_cmov_mem);
 9213 %}
 9214 
 9215 // Conditional move
 9216 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
 9217 %{
 9218   predicate(UseAPX);
 9219   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9220 
 9221   ins_cost(250);
 9222   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9223   ins_encode %{
 9224     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9225   %}
 9226   ins_pipe(pipe_cmov_mem);
 9227 %}
 9228 
 9229 // Conditional move
 9230 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9231 %{
 9232   predicate(!UseAPX);
 9233   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9234 
 9235   ins_cost(250); // XXX
 9236   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9237   ins_encode %{
 9238     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9239   %}
 9240   ins_pipe(pipe_cmov_mem);
 9241 %}
 9242 
 9243 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9244   predicate(!UseAPX);
 9245   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9246   ins_cost(250);
 9247   expand %{
 9248     cmovI_memU(cop, cr, dst, src);
 9249   %}
 9250 %}
 9251 
 9252 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
 9253 %{
 9254   predicate(UseAPX);
 9255   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9256 
 9257   ins_cost(250);
 9258   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9259   ins_encode %{
 9260     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9261   %}
 9262   ins_pipe(pipe_cmov_mem);
 9263 %}
 9264 
 9265 instruct cmovI_rReg_rReg_memUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, memory src2)
 9266 %{
 9267   predicate(UseAPX);
 9268   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9269   ins_cost(250);
 9270   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9271   ins_encode %{
 9272     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9273   %}
 9274   ins_pipe(pipe_cmov_mem);
 9275 %}
 9276 
 9277 // Conditional move
 9278 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9279 %{
 9280   predicate(!UseAPX);
 9281   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9282 
 9283   ins_cost(200); // XXX
 9284   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9285   ins_encode %{
 9286     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9287   %}
 9288   ins_pipe(pipe_cmov_reg);
 9289 %}
 9290 
 9291 // Conditional move ndd
 9292 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9293 %{
 9294   predicate(UseAPX);
 9295   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9296 
 9297   ins_cost(200);
 9298   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9299   ins_encode %{
 9300     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9301   %}
 9302   ins_pipe(pipe_cmov_reg);
 9303 %}
 9304 
 9305 // Conditional move
 9306 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9307 %{
 9308   predicate(!UseAPX);
 9309   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9310 
 9311   ins_cost(200); // XXX
 9312   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9313   ins_encode %{
 9314     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9315   %}
 9316   ins_pipe(pipe_cmov_reg);
 9317 %}
 9318 
 9319 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9320   predicate(!UseAPX);
 9321   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9322   ins_cost(200);
 9323   expand %{
 9324     cmovN_regU(cop, cr, dst, src);
 9325   %}
 9326 %}
 9327 
 9328 // Conditional move ndd
 9329 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9330 %{
 9331   predicate(UseAPX);
 9332   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9333 
 9334   ins_cost(200);
 9335   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9336   ins_encode %{
 9337     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9338   %}
 9339   ins_pipe(pipe_cmov_reg);
 9340 %}
 9341 
 9342 instruct cmovN_regUCF_ndd(rRegN dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegN src1, rRegN src2) %{
 9343   predicate(UseAPX);
 9344   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9345   ins_cost(200);
 9346   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9347   ins_encode %{
 9348     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9349   %}
 9350   ins_pipe(pipe_cmov_reg);
 9351 %}
 9352 
 9353 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9354   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9355   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9356 
 9357   ins_cost(200); // XXX
 9358   format %{ "cmovpl  $dst, $src\n\t"
 9359             "cmovnel $dst, $src" %}
 9360   ins_encode %{
 9361     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9362     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9363   %}
 9364   ins_pipe(pipe_cmov_reg);
 9365 %}
 9366 
 9367 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9368 // inputs of the CMove
 9369 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9370   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9371   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9372 
 9373   ins_cost(200); // XXX
 9374   format %{ "cmovpl  $dst, $src\n\t"
 9375             "cmovnel $dst, $src" %}
 9376   ins_encode %{
 9377     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9378     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9379   %}
 9380   ins_pipe(pipe_cmov_reg);
 9381 %}
 9382 
 9383 // Conditional move
 9384 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9385 %{
 9386   predicate(!UseAPX);
 9387   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9388 
 9389   ins_cost(200); // XXX
 9390   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9391   ins_encode %{
 9392     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9393   %}
 9394   ins_pipe(pipe_cmov_reg);  // XXX
 9395 %}
 9396 
 9397 // Conditional move ndd
 9398 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9399 %{
 9400   predicate(UseAPX);
 9401   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9402 
 9403   ins_cost(200);
 9404   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9405   ins_encode %{
 9406     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9407   %}
 9408   ins_pipe(pipe_cmov_reg);
 9409 %}
 9410 
 9411 // Conditional move
 9412 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9413 %{
 9414   predicate(!UseAPX);
 9415   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9416 
 9417   ins_cost(200); // XXX
 9418   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9419   ins_encode %{
 9420     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9421   %}
 9422   ins_pipe(pipe_cmov_reg); // XXX
 9423 %}
 9424 
 9425 // Conditional move ndd
 9426 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9427 %{
 9428   predicate(UseAPX);
 9429   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9430 
 9431   ins_cost(200);
 9432   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9433   ins_encode %{
 9434     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9435   %}
 9436   ins_pipe(pipe_cmov_reg);
 9437 %}
 9438 
 9439 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9440   predicate(!UseAPX);
 9441   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9442   ins_cost(200);
 9443   expand %{
 9444     cmovP_regU(cop, cr, dst, src);
 9445   %}
 9446 %}
 9447 
 9448 instruct cmovP_regUCF_ndd(rRegP dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegP src1, rRegP src2) %{
 9449   predicate(UseAPX);
 9450   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9451   ins_cost(200);
 9452   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9453   ins_encode %{
 9454     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9455   %}
 9456   ins_pipe(pipe_cmov_reg);
 9457 %}
 9458 
 9459 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9460   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9461   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9462 
 9463   ins_cost(200); // XXX
 9464   format %{ "cmovpq  $dst, $src\n\t"
 9465             "cmovneq $dst, $src" %}
 9466   ins_encode %{
 9467     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9468     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9469   %}
 9470   ins_pipe(pipe_cmov_reg);
 9471 %}
 9472 
 9473 instruct cmovP_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
 9474   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9475   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9476   effect(TEMP dst);
 9477 
 9478   ins_cost(200);
 9479   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9480             "cmovneq  $dst, $src2" %}
 9481   ins_encode %{
 9482     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9483     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9484   %}
 9485   ins_pipe(pipe_cmov_reg);
 9486 %}
 9487 
 9488 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9489 // inputs of the CMove
 9490 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9491   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9492   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9493 
 9494   ins_cost(200); // XXX
 9495   format %{ "cmovpq  $dst, $src\n\t"
 9496             "cmovneq $dst, $src" %}
 9497   ins_encode %{
 9498     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9499     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9500   %}
 9501   ins_pipe(pipe_cmov_reg);
 9502 %}
 9503 
 9504 instruct cmovP_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
 9505   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9506   match(Set dst (CMoveP (Binary cop cr) (Binary src2 src1)));
 9507   effect(TEMP dst);
 9508 
 9509   ins_cost(200);
 9510   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9511             "cmovneq  $dst, $src2" %}
 9512   ins_encode %{
 9513     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9514     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9515   %}
 9516   ins_pipe(pipe_cmov_reg);
 9517 %}
 9518 
 9519 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9520 %{
 9521   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9522   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9523 
 9524   ins_cost(100); // XXX
 9525   format %{ "setbn$cop $dst\t# signed, long" %}
 9526   ins_encode %{
 9527     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9528     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9529   %}
 9530   ins_pipe(ialu_reg);
 9531 %}
 9532 
 9533 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9534 %{
 9535   predicate(!UseAPX);
 9536   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9537 
 9538   ins_cost(200); // XXX
 9539   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9540   ins_encode %{
 9541     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9542   %}
 9543   ins_pipe(pipe_cmov_reg);  // XXX
 9544 %}
 9545 
 9546 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9547 %{
 9548   predicate(UseAPX);
 9549   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9550 
 9551   ins_cost(200);
 9552   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9553   ins_encode %{
 9554     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9555   %}
 9556   ins_pipe(pipe_cmov_reg);
 9557 %}
 9558 
 9559 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9560 %{
 9561   predicate(!UseAPX);
 9562   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9563 
 9564   ins_cost(200); // XXX
 9565   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9566   ins_encode %{
 9567     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9568   %}
 9569   ins_pipe(pipe_cmov_mem);  // XXX
 9570 %}
 9571 
 9572 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
 9573 %{
 9574   predicate(UseAPX);
 9575   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9576 
 9577   ins_cost(200);
 9578   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9579   ins_encode %{
 9580     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9581   %}
 9582   ins_pipe(pipe_cmov_mem);
 9583 %}
 9584 
 9585 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9586 %{
 9587   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9588   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9589 
 9590   ins_cost(100); // XXX
 9591   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9592   ins_encode %{
 9593     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9594     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9595   %}
 9596   ins_pipe(ialu_reg);
 9597 %}
 9598 
 9599 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9600 %{
 9601   predicate(!UseAPX);
 9602   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9603 
 9604   ins_cost(200); // XXX
 9605   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9606   ins_encode %{
 9607     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9608   %}
 9609   ins_pipe(pipe_cmov_reg); // XXX
 9610 %}
 9611 
 9612 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9613 %{
 9614   predicate(UseAPX);
 9615   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9616 
 9617   ins_cost(200);
 9618   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9619   ins_encode %{
 9620     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9621   %}
 9622   ins_pipe(pipe_cmov_reg);
 9623 %}
 9624 
 9625 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9626 %{
 9627   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9628   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9629 
 9630   ins_cost(100); // XXX
 9631   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9632   ins_encode %{
 9633     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9634     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9635   %}
 9636   ins_pipe(ialu_reg);
 9637 %}
 9638 
 9639 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9640   predicate(!UseAPX);
 9641   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9642   ins_cost(200);
 9643   expand %{
 9644     cmovL_regU(cop, cr, dst, src);
 9645   %}
 9646 %}
 9647 
 9648 instruct cmovL_regUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, rRegL src2)
 9649 %{
 9650   predicate(UseAPX);
 9651   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9652   ins_cost(200);
 9653   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9654   ins_encode %{
 9655     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9656   %}
 9657   ins_pipe(pipe_cmov_reg);
 9658 %}
 9659 
 9660 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9661   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9662   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9663 
 9664   ins_cost(200); // XXX
 9665   format %{ "cmovpq  $dst, $src\n\t"
 9666             "cmovneq $dst, $src" %}
 9667   ins_encode %{
 9668     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9669     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9670   %}
 9671   ins_pipe(pipe_cmov_reg);
 9672 %}
 9673 
 9674 instruct cmovL_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
 9675   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9676   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9677   effect(TEMP dst);
 9678 
 9679   ins_cost(200);
 9680   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9681             "cmovneq  $dst, $src2" %}
 9682   ins_encode %{
 9683     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9684     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9685   %}
 9686   ins_pipe(pipe_cmov_reg);
 9687 %}
 9688 
 9689 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9690 // inputs of the CMove
 9691 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9692   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9693   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9694 
 9695   ins_cost(200); // XXX
 9696   format %{ "cmovpq  $dst, $src\n\t"
 9697             "cmovneq $dst, $src" %}
 9698   ins_encode %{
 9699     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9700     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9701   %}
 9702   ins_pipe(pipe_cmov_reg);
 9703 %}
 9704 
 9705 instruct cmovL_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
 9706   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9707   match(Set dst (CMoveL (Binary cop cr) (Binary src2 src1)));
 9708   effect(TEMP dst);
 9709 
 9710   ins_cost(200);
 9711   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9712             "cmovneq $dst, $src2" %}
 9713   ins_encode %{
 9714     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9715     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9716   %}
 9717   ins_pipe(pipe_cmov_reg);
 9718 %}
 9719 
 9720 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9721 %{
 9722   predicate(!UseAPX);
 9723   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9724 
 9725   ins_cost(200); // XXX
 9726   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9727   ins_encode %{
 9728     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9729   %}
 9730   ins_pipe(pipe_cmov_mem); // XXX
 9731 %}
 9732 
 9733 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9734   predicate(!UseAPX);
 9735   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9736   ins_cost(200);
 9737   expand %{
 9738     cmovL_memU(cop, cr, dst, src);
 9739   %}
 9740 %}
 9741 
 9742 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
 9743 %{
 9744   predicate(UseAPX);
 9745   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9746 
 9747   ins_cost(200);
 9748   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9749   ins_encode %{
 9750     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9751   %}
 9752   ins_pipe(pipe_cmov_mem);
 9753 %}
 9754 
 9755 instruct cmovL_rReg_rReg_memUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, memory src2)
 9756 %{
 9757   predicate(UseAPX);
 9758   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9759   ins_cost(200);
 9760   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9761   ins_encode %{
 9762     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9763   %}
 9764   ins_pipe(pipe_cmov_mem);
 9765 %}
 9766 
 9767 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9768 %{
 9769   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9770 
 9771   ins_cost(200); // XXX
 9772   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9773             "movss     $dst, $src\n"
 9774     "skip:" %}
 9775   ins_encode %{
 9776     Label Lskip;
 9777     // Invert sense of branch from sense of CMOV
 9778     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9779     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9780     __ bind(Lskip);
 9781   %}
 9782   ins_pipe(pipe_slow);
 9783 %}
 9784 
 9785 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9786 %{
 9787   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9788 
 9789   ins_cost(200); // XXX
 9790   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9791             "movss     $dst, $src\n"
 9792     "skip:" %}
 9793   ins_encode %{
 9794     Label Lskip;
 9795     // Invert sense of branch from sense of CMOV
 9796     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9797     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9798     __ bind(Lskip);
 9799   %}
 9800   ins_pipe(pipe_slow);
 9801 %}
 9802 
 9803 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9804   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9805   ins_cost(200);
 9806   expand %{
 9807     cmovF_regU(cop, cr, dst, src);
 9808   %}
 9809 %}
 9810 
 9811 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9812 %{
 9813   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9814 
 9815   ins_cost(200); // XXX
 9816   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9817             "movsd     $dst, $src\n"
 9818     "skip:" %}
 9819   ins_encode %{
 9820     Label Lskip;
 9821     // Invert sense of branch from sense of CMOV
 9822     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9823     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9824     __ bind(Lskip);
 9825   %}
 9826   ins_pipe(pipe_slow);
 9827 %}
 9828 
 9829 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9830 %{
 9831   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9832 
 9833   ins_cost(200); // XXX
 9834   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9835             "movsd     $dst, $src\n"
 9836     "skip:" %}
 9837   ins_encode %{
 9838     Label Lskip;
 9839     // Invert sense of branch from sense of CMOV
 9840     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9841     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9842     __ bind(Lskip);
 9843   %}
 9844   ins_pipe(pipe_slow);
 9845 %}
 9846 
 9847 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9848   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9849   ins_cost(200);
 9850   expand %{
 9851     cmovD_regU(cop, cr, dst, src);
 9852   %}
 9853 %}
 9854 
 9855 //----------Arithmetic Instructions--------------------------------------------
 9856 //----------Addition Instructions----------------------------------------------
 9857 
 9858 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9859 %{
 9860   predicate(!UseAPX);
 9861   match(Set dst (AddI dst src));
 9862   effect(KILL cr);
 9863   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9864   format %{ "addl    $dst, $src\t# int" %}
 9865   ins_encode %{
 9866     __ addl($dst$$Register, $src$$Register);
 9867   %}
 9868   ins_pipe(ialu_reg_reg);
 9869 %}
 9870 
 9871 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
 9872 %{
 9873   predicate(UseAPX);
 9874   match(Set dst (AddI src1 src2));
 9875   effect(KILL cr);
 9876   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
 9877 
 9878   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9879   ins_encode %{
 9880     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
 9881   %}
 9882   ins_pipe(ialu_reg_reg);
 9883 %}
 9884 
 9885 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9886 %{
 9887   predicate(!UseAPX);
 9888   match(Set dst (AddI dst src));
 9889   effect(KILL cr);
 9890   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9891 
 9892   format %{ "addl    $dst, $src\t# int" %}
 9893   ins_encode %{
 9894     __ addl($dst$$Register, $src$$constant);
 9895   %}
 9896   ins_pipe( ialu_reg );
 9897 %}
 9898 
 9899 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
 9900 %{
 9901   predicate(UseAPX);
 9902   match(Set dst (AddI src1 src2));
 9903   effect(KILL cr);
 9904   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
 9905 
 9906   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9907   ins_encode %{
 9908     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
 9909   %}
 9910   ins_pipe( ialu_reg );
 9911 %}
 9912 
 9913 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
 9914 %{
 9915   predicate(UseAPX);
 9916   match(Set dst (AddI (LoadI src1) src2));
 9917   effect(KILL cr);
 9918   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9919 
 9920   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9921   ins_encode %{
 9922     __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
 9923   %}
 9924   ins_pipe( ialu_reg );
 9925 %}
 9926 
 9927 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 9928 %{
 9929   predicate(!UseAPX);
 9930   match(Set dst (AddI dst (LoadI src)));
 9931   effect(KILL cr);
 9932   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9933 
 9934   ins_cost(150); // XXX
 9935   format %{ "addl    $dst, $src\t# int" %}
 9936   ins_encode %{
 9937     __ addl($dst$$Register, $src$$Address);
 9938   %}
 9939   ins_pipe(ialu_reg_mem);
 9940 %}
 9941 
 9942 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
 9943 %{
 9944   predicate(UseAPX);
 9945   match(Set dst (AddI src1 (LoadI src2)));
 9946   effect(KILL cr);
 9947   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
 9948 
 9949   ins_cost(150);
 9950   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9951   ins_encode %{
 9952     __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
 9953   %}
 9954   ins_pipe(ialu_reg_mem);
 9955 %}
 9956 
 9957 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9958 %{
 9959   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 9960   effect(KILL cr);
 9961   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9962 
 9963   ins_cost(150); // XXX
 9964   format %{ "addl    $dst, $src\t# int" %}
 9965   ins_encode %{
 9966     __ addl($dst$$Address, $src$$Register);
 9967   %}
 9968   ins_pipe(ialu_mem_reg);
 9969 %}
 9970 
 9971 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
 9972 %{
 9973   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 9974   effect(KILL cr);
 9975   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9976 
 9977 
 9978   ins_cost(125); // XXX
 9979   format %{ "addl    $dst, $src\t# int" %}
 9980   ins_encode %{
 9981     __ addl($dst$$Address, $src$$constant);
 9982   %}
 9983   ins_pipe(ialu_mem_imm);
 9984 %}
 9985 
 9986 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
 9987 %{
 9988   predicate(!UseAPX && UseIncDec);
 9989   match(Set dst (AddI dst src));
 9990   effect(KILL cr);
 9991 
 9992   format %{ "incl    $dst\t# int" %}
 9993   ins_encode %{
 9994     __ incrementl($dst$$Register);
 9995   %}
 9996   ins_pipe(ialu_reg);
 9997 %}
 9998 
 9999 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10000 %{
10001   predicate(UseAPX && UseIncDec);
10002   match(Set dst (AddI src val));
10003   effect(KILL cr);
10004   flag(PD::Flag_ndd_demotable_opr1);
10005 
10006   format %{ "eincl    $dst, $src\t# int ndd" %}
10007   ins_encode %{
10008     __ eincl($dst$$Register, $src$$Register, false);
10009   %}
10010   ins_pipe(ialu_reg);
10011 %}
10012 
10013 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10014 %{
10015   predicate(UseAPX && UseIncDec);
10016   match(Set dst (AddI (LoadI src) val));
10017   effect(KILL cr);
10018 
10019   format %{ "eincl    $dst, $src\t# int ndd" %}
10020   ins_encode %{
10021     __ eincl($dst$$Register, $src$$Address, false);
10022   %}
10023   ins_pipe(ialu_reg);
10024 %}
10025 
10026 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10027 %{
10028   predicate(UseIncDec);
10029   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10030   effect(KILL cr);
10031 
10032   ins_cost(125); // XXX
10033   format %{ "incl    $dst\t# int" %}
10034   ins_encode %{
10035     __ incrementl($dst$$Address);
10036   %}
10037   ins_pipe(ialu_mem_imm);
10038 %}
10039 
10040 // XXX why does that use AddI
10041 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10042 %{
10043   predicate(!UseAPX && UseIncDec);
10044   match(Set dst (AddI dst src));
10045   effect(KILL cr);
10046 
10047   format %{ "decl    $dst\t# int" %}
10048   ins_encode %{
10049     __ decrementl($dst$$Register);
10050   %}
10051   ins_pipe(ialu_reg);
10052 %}
10053 
10054 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10055 %{
10056   predicate(UseAPX && UseIncDec);
10057   match(Set dst (AddI src val));
10058   effect(KILL cr);
10059   flag(PD::Flag_ndd_demotable_opr1);
10060 
10061   format %{ "edecl    $dst, $src\t# int ndd" %}
10062   ins_encode %{
10063     __ edecl($dst$$Register, $src$$Register, false);
10064   %}
10065   ins_pipe(ialu_reg);
10066 %}
10067 
10068 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10069 %{
10070   predicate(UseAPX && UseIncDec);
10071   match(Set dst (AddI (LoadI src) val));
10072   effect(KILL cr);
10073 
10074   format %{ "edecl    $dst, $src\t# int ndd" %}
10075   ins_encode %{
10076     __ edecl($dst$$Register, $src$$Address, false);
10077   %}
10078   ins_pipe(ialu_reg);
10079 %}
10080 
10081 // XXX why does that use AddI
10082 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10083 %{
10084   predicate(UseIncDec);
10085   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10086   effect(KILL cr);
10087 
10088   ins_cost(125); // XXX
10089   format %{ "decl    $dst\t# int" %}
10090   ins_encode %{
10091     __ decrementl($dst$$Address);
10092   %}
10093   ins_pipe(ialu_mem_imm);
10094 %}
10095 
10096 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10097 %{
10098   predicate(VM_Version::supports_fast_2op_lea());
10099   match(Set dst (AddI (LShiftI index scale) disp));
10100 
10101   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10102   ins_encode %{
10103     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10104     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10105   %}
10106   ins_pipe(ialu_reg_reg);
10107 %}
10108 
10109 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10110 %{
10111   predicate(VM_Version::supports_fast_3op_lea());
10112   match(Set dst (AddI (AddI base index) disp));
10113 
10114   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10115   ins_encode %{
10116     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10117   %}
10118   ins_pipe(ialu_reg_reg);
10119 %}
10120 
10121 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10122 %{
10123   predicate(VM_Version::supports_fast_2op_lea());
10124   match(Set dst (AddI base (LShiftI index scale)));
10125 
10126   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10127   ins_encode %{
10128     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10129     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10130   %}
10131   ins_pipe(ialu_reg_reg);
10132 %}
10133 
10134 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10135 %{
10136   predicate(VM_Version::supports_fast_3op_lea());
10137   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10138 
10139   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10140   ins_encode %{
10141     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10142     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10143   %}
10144   ins_pipe(ialu_reg_reg);
10145 %}
10146 
10147 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10148 %{
10149   predicate(!UseAPX);
10150   match(Set dst (AddL dst src));
10151   effect(KILL cr);
10152   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10153 
10154   format %{ "addq    $dst, $src\t# long" %}
10155   ins_encode %{
10156     __ addq($dst$$Register, $src$$Register);
10157   %}
10158   ins_pipe(ialu_reg_reg);
10159 %}
10160 
10161 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10162 %{
10163   predicate(UseAPX);
10164   match(Set dst (AddL src1 src2));
10165   effect(KILL cr);
10166   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10167 
10168   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10169   ins_encode %{
10170     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10171   %}
10172   ins_pipe(ialu_reg_reg);
10173 %}
10174 
10175 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10176 %{
10177   predicate(!UseAPX);
10178   match(Set dst (AddL dst src));
10179   effect(KILL cr);
10180   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10181 
10182   format %{ "addq    $dst, $src\t# long" %}
10183   ins_encode %{
10184     __ addq($dst$$Register, $src$$constant);
10185   %}
10186   ins_pipe( ialu_reg );
10187 %}
10188 
10189 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10190 %{
10191   predicate(UseAPX);
10192   match(Set dst (AddL src1 src2));
10193   effect(KILL cr);
10194   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10195 
10196   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10197   ins_encode %{
10198     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10199   %}
10200   ins_pipe( ialu_reg );
10201 %}
10202 
10203 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10204 %{
10205   predicate(UseAPX);
10206   match(Set dst (AddL (LoadL src1) src2));
10207   effect(KILL cr);
10208   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10209 
10210   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10211   ins_encode %{
10212     __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10213   %}
10214   ins_pipe( ialu_reg );
10215 %}
10216 
10217 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10218 %{
10219   predicate(!UseAPX);
10220   match(Set dst (AddL dst (LoadL src)));
10221   effect(KILL cr);
10222   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10223 
10224   ins_cost(150); // XXX
10225   format %{ "addq    $dst, $src\t# long" %}
10226   ins_encode %{
10227     __ addq($dst$$Register, $src$$Address);
10228   %}
10229   ins_pipe(ialu_reg_mem);
10230 %}
10231 
10232 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10233 %{
10234   predicate(UseAPX);
10235   match(Set dst (AddL src1 (LoadL src2)));
10236   effect(KILL cr);
10237   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10238 
10239   ins_cost(150);
10240   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10241   ins_encode %{
10242     __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10243   %}
10244   ins_pipe(ialu_reg_mem);
10245 %}
10246 
10247 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10248 %{
10249   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10250   effect(KILL cr);
10251   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10252 
10253   ins_cost(150); // XXX
10254   format %{ "addq    $dst, $src\t# long" %}
10255   ins_encode %{
10256     __ addq($dst$$Address, $src$$Register);
10257   %}
10258   ins_pipe(ialu_mem_reg);
10259 %}
10260 
10261 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10262 %{
10263   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10264   effect(KILL cr);
10265   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10266 
10267   ins_cost(125); // XXX
10268   format %{ "addq    $dst, $src\t# long" %}
10269   ins_encode %{
10270     __ addq($dst$$Address, $src$$constant);
10271   %}
10272   ins_pipe(ialu_mem_imm);
10273 %}
10274 
10275 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10276 %{
10277   predicate(!UseAPX && UseIncDec);
10278   match(Set dst (AddL dst src));
10279   effect(KILL cr);
10280 
10281   format %{ "incq    $dst\t# long" %}
10282   ins_encode %{
10283     __ incrementq($dst$$Register);
10284   %}
10285   ins_pipe(ialu_reg);
10286 %}
10287 
10288 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10289 %{
10290   predicate(UseAPX && UseIncDec);
10291   match(Set dst (AddL src val));
10292   effect(KILL cr);
10293   flag(PD::Flag_ndd_demotable_opr1);
10294 
10295   format %{ "eincq    $dst, $src\t# long ndd" %}
10296   ins_encode %{
10297     __ eincq($dst$$Register, $src$$Register, false);
10298   %}
10299   ins_pipe(ialu_reg);
10300 %}
10301 
10302 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10303 %{
10304   predicate(UseAPX && UseIncDec);
10305   match(Set dst (AddL (LoadL src) val));
10306   effect(KILL cr);
10307 
10308   format %{ "eincq    $dst, $src\t# long ndd" %}
10309   ins_encode %{
10310     __ eincq($dst$$Register, $src$$Address, false);
10311   %}
10312   ins_pipe(ialu_reg);
10313 %}
10314 
10315 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10316 %{
10317   predicate(UseIncDec);
10318   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10319   effect(KILL cr);
10320 
10321   ins_cost(125); // XXX
10322   format %{ "incq    $dst\t# long" %}
10323   ins_encode %{
10324     __ incrementq($dst$$Address);
10325   %}
10326   ins_pipe(ialu_mem_imm);
10327 %}
10328 
10329 // XXX why does that use AddL
10330 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10331 %{
10332   predicate(!UseAPX && UseIncDec);
10333   match(Set dst (AddL dst src));
10334   effect(KILL cr);
10335 
10336   format %{ "decq    $dst\t# long" %}
10337   ins_encode %{
10338     __ decrementq($dst$$Register);
10339   %}
10340   ins_pipe(ialu_reg);
10341 %}
10342 
10343 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10344 %{
10345   predicate(UseAPX && UseIncDec);
10346   match(Set dst (AddL src val));
10347   effect(KILL cr);
10348   flag(PD::Flag_ndd_demotable_opr1);
10349 
10350   format %{ "edecq    $dst, $src\t# long ndd" %}
10351   ins_encode %{
10352     __ edecq($dst$$Register, $src$$Register, false);
10353   %}
10354   ins_pipe(ialu_reg);
10355 %}
10356 
10357 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10358 %{
10359   predicate(UseAPX && UseIncDec);
10360   match(Set dst (AddL (LoadL src) val));
10361   effect(KILL cr);
10362 
10363   format %{ "edecq    $dst, $src\t# long ndd" %}
10364   ins_encode %{
10365     __ edecq($dst$$Register, $src$$Address, false);
10366   %}
10367   ins_pipe(ialu_reg);
10368 %}
10369 
10370 // XXX why does that use AddL
10371 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10372 %{
10373   predicate(UseIncDec);
10374   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10375   effect(KILL cr);
10376 
10377   ins_cost(125); // XXX
10378   format %{ "decq    $dst\t# long" %}
10379   ins_encode %{
10380     __ decrementq($dst$$Address);
10381   %}
10382   ins_pipe(ialu_mem_imm);
10383 %}
10384 
10385 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10386 %{
10387   predicate(VM_Version::supports_fast_2op_lea());
10388   match(Set dst (AddL (LShiftL index scale) disp));
10389 
10390   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10391   ins_encode %{
10392     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10393     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10394   %}
10395   ins_pipe(ialu_reg_reg);
10396 %}
10397 
10398 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10399 %{
10400   predicate(VM_Version::supports_fast_3op_lea());
10401   match(Set dst (AddL (AddL base index) disp));
10402 
10403   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10404   ins_encode %{
10405     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10406   %}
10407   ins_pipe(ialu_reg_reg);
10408 %}
10409 
10410 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10411 %{
10412   predicate(VM_Version::supports_fast_2op_lea());
10413   match(Set dst (AddL base (LShiftL index scale)));
10414 
10415   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10416   ins_encode %{
10417     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10418     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10419   %}
10420   ins_pipe(ialu_reg_reg);
10421 %}
10422 
10423 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10424 %{
10425   predicate(VM_Version::supports_fast_3op_lea());
10426   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10427 
10428   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10429   ins_encode %{
10430     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10431     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10432   %}
10433   ins_pipe(ialu_reg_reg);
10434 %}
10435 
10436 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10437 %{
10438   match(Set dst (AddP dst src));
10439   effect(KILL cr);
10440   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10441 
10442   format %{ "addq    $dst, $src\t# ptr" %}
10443   ins_encode %{
10444     __ addq($dst$$Register, $src$$Register);
10445   %}
10446   ins_pipe(ialu_reg_reg);
10447 %}
10448 
10449 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10450 %{
10451   match(Set dst (AddP dst src));
10452   effect(KILL cr);
10453   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10454 
10455   format %{ "addq    $dst, $src\t# ptr" %}
10456   ins_encode %{
10457     __ addq($dst$$Register, $src$$constant);
10458   %}
10459   ins_pipe( ialu_reg );
10460 %}
10461 
10462 // XXX addP mem ops ????
10463 
10464 instruct checkCastPP(rRegP dst)
10465 %{
10466   match(Set dst (CheckCastPP dst));
10467 
10468   size(0);
10469   format %{ "# checkcastPP of $dst" %}
10470   ins_encode(/* empty encoding */);
10471   ins_pipe(empty);
10472 %}
10473 
10474 instruct castPP(rRegP dst)
10475 %{
10476   match(Set dst (CastPP dst));
10477 
10478   size(0);
10479   format %{ "# castPP of $dst" %}
10480   ins_encode(/* empty encoding */);
10481   ins_pipe(empty);
10482 %}
10483 
10484 instruct castII(rRegI dst)
10485 %{
10486   predicate(VerifyConstraintCasts == 0);
10487   match(Set dst (CastII dst));
10488 
10489   size(0);
10490   format %{ "# castII of $dst" %}
10491   ins_encode(/* empty encoding */);
10492   ins_cost(0);
10493   ins_pipe(empty);
10494 %}
10495 
10496 instruct castII_checked(rRegI dst, rFlagsReg cr)
10497 %{
10498   predicate(VerifyConstraintCasts > 0);
10499   match(Set dst (CastII dst));
10500 
10501   effect(KILL cr);
10502   format %{ "# cast_checked_II $dst" %}
10503   ins_encode %{
10504     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10505   %}
10506   ins_pipe(pipe_slow);
10507 %}
10508 
10509 instruct castLL(rRegL dst)
10510 %{
10511   predicate(VerifyConstraintCasts == 0);
10512   match(Set dst (CastLL dst));
10513 
10514   size(0);
10515   format %{ "# castLL of $dst" %}
10516   ins_encode(/* empty encoding */);
10517   ins_cost(0);
10518   ins_pipe(empty);
10519 %}
10520 
10521 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10522 %{
10523   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10524   match(Set dst (CastLL dst));
10525 
10526   effect(KILL cr);
10527   format %{ "# cast_checked_LL $dst" %}
10528   ins_encode %{
10529     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10530   %}
10531   ins_pipe(pipe_slow);
10532 %}
10533 
10534 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10535 %{
10536   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10537   match(Set dst (CastLL dst));
10538 
10539   effect(KILL cr, TEMP tmp);
10540   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10541   ins_encode %{
10542     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10543   %}
10544   ins_pipe(pipe_slow);
10545 %}
10546 
10547 instruct castFF(regF dst)
10548 %{
10549   match(Set dst (CastFF dst));
10550 
10551   size(0);
10552   format %{ "# castFF of $dst" %}
10553   ins_encode(/* empty encoding */);
10554   ins_cost(0);
10555   ins_pipe(empty);
10556 %}
10557 
10558 instruct castHH(regF dst)
10559 %{
10560   match(Set dst (CastHH dst));
10561 
10562   size(0);
10563   format %{ "# castHH of $dst" %}
10564   ins_encode(/* empty encoding */);
10565   ins_cost(0);
10566   ins_pipe(empty);
10567 %}
10568 
10569 instruct castDD(regD dst)
10570 %{
10571   match(Set dst (CastDD dst));
10572 
10573   size(0);
10574   format %{ "# castDD of $dst" %}
10575   ins_encode(/* empty encoding */);
10576   ins_cost(0);
10577   ins_pipe(empty);
10578 %}
10579 
10580 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10581 instruct compareAndSwapP(rRegI res,
10582                          memory mem_ptr,
10583                          rax_RegP oldval, rRegP newval,
10584                          rFlagsReg cr)
10585 %{
10586   predicate(n->as_LoadStore()->barrier_data() == 0);
10587   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10588   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10589   effect(KILL cr, KILL oldval);
10590 
10591   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10592             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10593             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10594   ins_encode %{
10595     __ lock();
10596     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10597     __ setcc(Assembler::equal, $res$$Register);
10598   %}
10599   ins_pipe( pipe_cmpxchg );
10600 %}
10601 
10602 instruct compareAndSwapL(rRegI res,
10603                          memory mem_ptr,
10604                          rax_RegL oldval, rRegL newval,
10605                          rFlagsReg cr)
10606 %{
10607   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10608   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10609   effect(KILL cr, KILL oldval);
10610 
10611   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10612             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10613             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10614   ins_encode %{
10615     __ lock();
10616     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10617     __ setcc(Assembler::equal, $res$$Register);
10618   %}
10619   ins_pipe( pipe_cmpxchg );
10620 %}
10621 
10622 instruct compareAndSwapI(rRegI res,
10623                          memory mem_ptr,
10624                          rax_RegI oldval, rRegI newval,
10625                          rFlagsReg cr)
10626 %{
10627   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10628   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10629   effect(KILL cr, KILL oldval);
10630 
10631   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10632             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10633             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10634   ins_encode %{
10635     __ lock();
10636     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10637     __ setcc(Assembler::equal, $res$$Register);
10638   %}
10639   ins_pipe( pipe_cmpxchg );
10640 %}
10641 
10642 instruct compareAndSwapB(rRegI res,
10643                          memory mem_ptr,
10644                          rax_RegI oldval, rRegI newval,
10645                          rFlagsReg cr)
10646 %{
10647   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10648   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10649   effect(KILL cr, KILL oldval);
10650 
10651   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10652             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10653             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10654   ins_encode %{
10655     __ lock();
10656     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10657     __ setcc(Assembler::equal, $res$$Register);
10658   %}
10659   ins_pipe( pipe_cmpxchg );
10660 %}
10661 
10662 instruct compareAndSwapS(rRegI res,
10663                          memory mem_ptr,
10664                          rax_RegI oldval, rRegI newval,
10665                          rFlagsReg cr)
10666 %{
10667   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10668   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10669   effect(KILL cr, KILL oldval);
10670 
10671   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10672             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10673             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10674   ins_encode %{
10675     __ lock();
10676     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10677     __ setcc(Assembler::equal, $res$$Register);
10678   %}
10679   ins_pipe( pipe_cmpxchg );
10680 %}
10681 
10682 instruct compareAndSwapN(rRegI res,
10683                           memory mem_ptr,
10684                           rax_RegN oldval, rRegN newval,
10685                           rFlagsReg cr) %{
10686   predicate(n->as_LoadStore()->barrier_data() == 0);
10687   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10688   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10689   effect(KILL cr, KILL oldval);
10690 
10691   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10692             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10693             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10694   ins_encode %{
10695     __ lock();
10696     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10697     __ setcc(Assembler::equal, $res$$Register);
10698   %}
10699   ins_pipe( pipe_cmpxchg );
10700 %}
10701 
10702 instruct compareAndExchangeB(
10703                          memory mem_ptr,
10704                          rax_RegI oldval, rRegI newval,
10705                          rFlagsReg cr)
10706 %{
10707   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10708   effect(KILL cr);
10709 
10710   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10711             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10712   ins_encode %{
10713     __ lock();
10714     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10715   %}
10716   ins_pipe( pipe_cmpxchg );
10717 %}
10718 
10719 instruct compareAndExchangeS(
10720                          memory mem_ptr,
10721                          rax_RegI oldval, rRegI newval,
10722                          rFlagsReg cr)
10723 %{
10724   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10725   effect(KILL cr);
10726 
10727   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10728             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10729   ins_encode %{
10730     __ lock();
10731     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10732   %}
10733   ins_pipe( pipe_cmpxchg );
10734 %}
10735 
10736 instruct compareAndExchangeI(
10737                          memory mem_ptr,
10738                          rax_RegI oldval, rRegI newval,
10739                          rFlagsReg cr)
10740 %{
10741   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10742   effect(KILL cr);
10743 
10744   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10745             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10746   ins_encode %{
10747     __ lock();
10748     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10749   %}
10750   ins_pipe( pipe_cmpxchg );
10751 %}
10752 
10753 instruct compareAndExchangeL(
10754                          memory mem_ptr,
10755                          rax_RegL oldval, rRegL newval,
10756                          rFlagsReg cr)
10757 %{
10758   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10759   effect(KILL cr);
10760 
10761   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10762             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10763   ins_encode %{
10764     __ lock();
10765     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10766   %}
10767   ins_pipe( pipe_cmpxchg );
10768 %}
10769 
10770 instruct compareAndExchangeN(
10771                           memory mem_ptr,
10772                           rax_RegN oldval, rRegN newval,
10773                           rFlagsReg cr) %{
10774   predicate(n->as_LoadStore()->barrier_data() == 0);
10775   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10776   effect(KILL cr);
10777 
10778   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10779             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10780   ins_encode %{
10781     __ lock();
10782     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10783   %}
10784   ins_pipe( pipe_cmpxchg );
10785 %}
10786 
10787 instruct compareAndExchangeP(
10788                          memory mem_ptr,
10789                          rax_RegP oldval, rRegP newval,
10790                          rFlagsReg cr)
10791 %{
10792   predicate(n->as_LoadStore()->barrier_data() == 0);
10793   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10794   effect(KILL cr);
10795 
10796   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10797             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10798   ins_encode %{
10799     __ lock();
10800     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10801   %}
10802   ins_pipe( pipe_cmpxchg );
10803 %}
10804 
10805 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10806   predicate(n->as_LoadStore()->result_not_used());
10807   match(Set dummy (GetAndAddB mem add));
10808   effect(KILL cr);
10809   format %{ "addb_lock   $mem, $add" %}
10810   ins_encode %{
10811     __ lock();
10812     __ addb($mem$$Address, $add$$Register);
10813   %}
10814   ins_pipe(pipe_cmpxchg);
10815 %}
10816 
10817 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10818   predicate(n->as_LoadStore()->result_not_used());
10819   match(Set dummy (GetAndAddB mem add));
10820   effect(KILL cr);
10821   format %{ "addb_lock   $mem, $add" %}
10822   ins_encode %{
10823     __ lock();
10824     __ addb($mem$$Address, $add$$constant);
10825   %}
10826   ins_pipe(pipe_cmpxchg);
10827 %}
10828 
10829 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10830   predicate(!n->as_LoadStore()->result_not_used());
10831   match(Set newval (GetAndAddB mem newval));
10832   effect(KILL cr);
10833   format %{ "xaddb_lock  $mem, $newval" %}
10834   ins_encode %{
10835     __ lock();
10836     __ xaddb($mem$$Address, $newval$$Register);
10837   %}
10838   ins_pipe(pipe_cmpxchg);
10839 %}
10840 
10841 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10842   predicate(n->as_LoadStore()->result_not_used());
10843   match(Set dummy (GetAndAddS mem add));
10844   effect(KILL cr);
10845   format %{ "addw_lock   $mem, $add" %}
10846   ins_encode %{
10847     __ lock();
10848     __ addw($mem$$Address, $add$$Register);
10849   %}
10850   ins_pipe(pipe_cmpxchg);
10851 %}
10852 
10853 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10854   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10855   match(Set dummy (GetAndAddS mem add));
10856   effect(KILL cr);
10857   format %{ "addw_lock   $mem, $add" %}
10858   ins_encode %{
10859     __ lock();
10860     __ addw($mem$$Address, $add$$constant);
10861   %}
10862   ins_pipe(pipe_cmpxchg);
10863 %}
10864 
10865 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10866   predicate(!n->as_LoadStore()->result_not_used());
10867   match(Set newval (GetAndAddS mem newval));
10868   effect(KILL cr);
10869   format %{ "xaddw_lock  $mem, $newval" %}
10870   ins_encode %{
10871     __ lock();
10872     __ xaddw($mem$$Address, $newval$$Register);
10873   %}
10874   ins_pipe(pipe_cmpxchg);
10875 %}
10876 
10877 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10878   predicate(n->as_LoadStore()->result_not_used());
10879   match(Set dummy (GetAndAddI mem add));
10880   effect(KILL cr);
10881   format %{ "addl_lock   $mem, $add" %}
10882   ins_encode %{
10883     __ lock();
10884     __ addl($mem$$Address, $add$$Register);
10885   %}
10886   ins_pipe(pipe_cmpxchg);
10887 %}
10888 
10889 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10890   predicate(n->as_LoadStore()->result_not_used());
10891   match(Set dummy (GetAndAddI mem add));
10892   effect(KILL cr);
10893   format %{ "addl_lock   $mem, $add" %}
10894   ins_encode %{
10895     __ lock();
10896     __ addl($mem$$Address, $add$$constant);
10897   %}
10898   ins_pipe(pipe_cmpxchg);
10899 %}
10900 
10901 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10902   predicate(!n->as_LoadStore()->result_not_used());
10903   match(Set newval (GetAndAddI mem newval));
10904   effect(KILL cr);
10905   format %{ "xaddl_lock  $mem, $newval" %}
10906   ins_encode %{
10907     __ lock();
10908     __ xaddl($mem$$Address, $newval$$Register);
10909   %}
10910   ins_pipe(pipe_cmpxchg);
10911 %}
10912 
10913 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10914   predicate(n->as_LoadStore()->result_not_used());
10915   match(Set dummy (GetAndAddL mem add));
10916   effect(KILL cr);
10917   format %{ "addq_lock   $mem, $add" %}
10918   ins_encode %{
10919     __ lock();
10920     __ addq($mem$$Address, $add$$Register);
10921   %}
10922   ins_pipe(pipe_cmpxchg);
10923 %}
10924 
10925 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10926   predicate(n->as_LoadStore()->result_not_used());
10927   match(Set dummy (GetAndAddL mem add));
10928   effect(KILL cr);
10929   format %{ "addq_lock   $mem, $add" %}
10930   ins_encode %{
10931     __ lock();
10932     __ addq($mem$$Address, $add$$constant);
10933   %}
10934   ins_pipe(pipe_cmpxchg);
10935 %}
10936 
10937 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10938   predicate(!n->as_LoadStore()->result_not_used());
10939   match(Set newval (GetAndAddL mem newval));
10940   effect(KILL cr);
10941   format %{ "xaddq_lock  $mem, $newval" %}
10942   ins_encode %{
10943     __ lock();
10944     __ xaddq($mem$$Address, $newval$$Register);
10945   %}
10946   ins_pipe(pipe_cmpxchg);
10947 %}
10948 
10949 instruct xchgB( memory mem, rRegI newval) %{
10950   match(Set newval (GetAndSetB mem newval));
10951   format %{ "XCHGB  $newval,[$mem]" %}
10952   ins_encode %{
10953     __ xchgb($newval$$Register, $mem$$Address);
10954   %}
10955   ins_pipe( pipe_cmpxchg );
10956 %}
10957 
10958 instruct xchgS( memory mem, rRegI newval) %{
10959   match(Set newval (GetAndSetS mem newval));
10960   format %{ "XCHGW  $newval,[$mem]" %}
10961   ins_encode %{
10962     __ xchgw($newval$$Register, $mem$$Address);
10963   %}
10964   ins_pipe( pipe_cmpxchg );
10965 %}
10966 
10967 instruct xchgI( memory mem, rRegI newval) %{
10968   match(Set newval (GetAndSetI mem newval));
10969   format %{ "XCHGL  $newval,[$mem]" %}
10970   ins_encode %{
10971     __ xchgl($newval$$Register, $mem$$Address);
10972   %}
10973   ins_pipe( pipe_cmpxchg );
10974 %}
10975 
10976 instruct xchgL( memory mem, rRegL newval) %{
10977   match(Set newval (GetAndSetL mem newval));
10978   format %{ "XCHGL  $newval,[$mem]" %}
10979   ins_encode %{
10980     __ xchgq($newval$$Register, $mem$$Address);
10981   %}
10982   ins_pipe( pipe_cmpxchg );
10983 %}
10984 
10985 instruct xchgP( memory mem, rRegP newval) %{
10986   match(Set newval (GetAndSetP mem newval));
10987   predicate(n->as_LoadStore()->barrier_data() == 0);
10988   format %{ "XCHGQ  $newval,[$mem]" %}
10989   ins_encode %{
10990     __ xchgq($newval$$Register, $mem$$Address);
10991   %}
10992   ins_pipe( pipe_cmpxchg );
10993 %}
10994 
10995 instruct xchgN( memory mem, rRegN newval) %{
10996   predicate(n->as_LoadStore()->barrier_data() == 0);
10997   match(Set newval (GetAndSetN mem newval));
10998   format %{ "XCHGL  $newval,$mem]" %}
10999   ins_encode %{
11000     __ xchgl($newval$$Register, $mem$$Address);
11001   %}
11002   ins_pipe( pipe_cmpxchg );
11003 %}
11004 
11005 //----------Abs Instructions-------------------------------------------
11006 
11007 // Integer Absolute Instructions
11008 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11009 %{
11010   match(Set dst (AbsI src));
11011   effect(TEMP dst, KILL cr);
11012   format %{ "xorl    $dst, $dst\t# abs int\n\t"
11013             "subl    $dst, $src\n\t"
11014             "cmovll  $dst, $src" %}
11015   ins_encode %{
11016     __ xorl($dst$$Register, $dst$$Register);
11017     __ subl($dst$$Register, $src$$Register);
11018     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11019   %}
11020 
11021   ins_pipe(ialu_reg_reg);
11022 %}
11023 
11024 // Long Absolute Instructions
11025 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11026 %{
11027   match(Set dst (AbsL src));
11028   effect(TEMP dst, KILL cr);
11029   format %{ "xorl    $dst, $dst\t# abs long\n\t"
11030             "subq    $dst, $src\n\t"
11031             "cmovlq  $dst, $src" %}
11032   ins_encode %{
11033     __ xorl($dst$$Register, $dst$$Register);
11034     __ subq($dst$$Register, $src$$Register);
11035     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11036   %}
11037 
11038   ins_pipe(ialu_reg_reg);
11039 %}
11040 
11041 //----------Subtraction Instructions-------------------------------------------
11042 
11043 // Integer Subtraction Instructions
11044 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11045 %{
11046   predicate(!UseAPX);
11047   match(Set dst (SubI dst src));
11048   effect(KILL cr);
11049   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11050 
11051   format %{ "subl    $dst, $src\t# int" %}
11052   ins_encode %{
11053     __ subl($dst$$Register, $src$$Register);
11054   %}
11055   ins_pipe(ialu_reg_reg);
11056 %}
11057 
11058 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11059 %{
11060   predicate(UseAPX);
11061   match(Set dst (SubI src1 src2));
11062   effect(KILL cr);
11063   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11064 
11065   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11066   ins_encode %{
11067     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11068   %}
11069   ins_pipe(ialu_reg_reg);
11070 %}
11071 
11072 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11073 %{
11074   predicate(UseAPX);
11075   match(Set dst (SubI src1 src2));
11076   effect(KILL cr);
11077   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11078 
11079   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11080   ins_encode %{
11081     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11082   %}
11083   ins_pipe(ialu_reg_reg);
11084 %}
11085 
11086 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11087 %{
11088   predicate(UseAPX);
11089   match(Set dst (SubI (LoadI src1) src2));
11090   effect(KILL cr);
11091   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11092 
11093   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11094   ins_encode %{
11095     __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11096   %}
11097   ins_pipe(ialu_reg_reg);
11098 %}
11099 
11100 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11101 %{
11102   predicate(!UseAPX);
11103   match(Set dst (SubI dst (LoadI src)));
11104   effect(KILL cr);
11105   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11106 
11107   ins_cost(150);
11108   format %{ "subl    $dst, $src\t# int" %}
11109   ins_encode %{
11110     __ subl($dst$$Register, $src$$Address);
11111   %}
11112   ins_pipe(ialu_reg_mem);
11113 %}
11114 
11115 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11116 %{
11117   predicate(UseAPX);
11118   match(Set dst (SubI src1 (LoadI src2)));
11119   effect(KILL cr);
11120   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11121 
11122   ins_cost(150);
11123   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11124   ins_encode %{
11125     __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11126   %}
11127   ins_pipe(ialu_reg_mem);
11128 %}
11129 
11130 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11131 %{
11132   predicate(UseAPX);
11133   match(Set dst (SubI (LoadI src1) src2));
11134   effect(KILL cr);
11135   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11136 
11137   ins_cost(150);
11138   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11139   ins_encode %{
11140     __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11141   %}
11142   ins_pipe(ialu_reg_mem);
11143 %}
11144 
11145 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11146 %{
11147   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11148   effect(KILL cr);
11149   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11150 
11151   ins_cost(150);
11152   format %{ "subl    $dst, $src\t# int" %}
11153   ins_encode %{
11154     __ subl($dst$$Address, $src$$Register);
11155   %}
11156   ins_pipe(ialu_mem_reg);
11157 %}
11158 
11159 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11160 %{
11161   predicate(!UseAPX);
11162   match(Set dst (SubL dst src));
11163   effect(KILL cr);
11164   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11165 
11166   format %{ "subq    $dst, $src\t# long" %}
11167   ins_encode %{
11168     __ subq($dst$$Register, $src$$Register);
11169   %}
11170   ins_pipe(ialu_reg_reg);
11171 %}
11172 
11173 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11174 %{
11175   predicate(UseAPX);
11176   match(Set dst (SubL src1 src2));
11177   effect(KILL cr);
11178   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11179 
11180   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11181   ins_encode %{
11182     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11183   %}
11184   ins_pipe(ialu_reg_reg);
11185 %}
11186 
11187 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11188 %{
11189   predicate(UseAPX);
11190   match(Set dst (SubL src1 src2));
11191   effect(KILL cr);
11192   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11193 
11194   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11195   ins_encode %{
11196     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11197   %}
11198   ins_pipe(ialu_reg_reg);
11199 %}
11200 
11201 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11202 %{
11203   predicate(UseAPX);
11204   match(Set dst (SubL (LoadL src1) src2));
11205   effect(KILL cr);
11206   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11207 
11208   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11209   ins_encode %{
11210     __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11211   %}
11212   ins_pipe(ialu_reg_reg);
11213 %}
11214 
11215 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11216 %{
11217   predicate(!UseAPX);
11218   match(Set dst (SubL dst (LoadL src)));
11219   effect(KILL cr);
11220   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11221 
11222   ins_cost(150);
11223   format %{ "subq    $dst, $src\t# long" %}
11224   ins_encode %{
11225     __ subq($dst$$Register, $src$$Address);
11226   %}
11227   ins_pipe(ialu_reg_mem);
11228 %}
11229 
11230 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11231 %{
11232   predicate(UseAPX);
11233   match(Set dst (SubL src1 (LoadL src2)));
11234   effect(KILL cr);
11235   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11236 
11237   ins_cost(150);
11238   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11239   ins_encode %{
11240     __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11241   %}
11242   ins_pipe(ialu_reg_mem);
11243 %}
11244 
11245 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11246 %{
11247   predicate(UseAPX);
11248   match(Set dst (SubL (LoadL src1) src2));
11249   effect(KILL cr);
11250   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11251 
11252   ins_cost(150);
11253   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11254   ins_encode %{
11255     __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11256   %}
11257   ins_pipe(ialu_reg_mem);
11258 %}
11259 
11260 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11261 %{
11262   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11263   effect(KILL cr);
11264   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11265 
11266   ins_cost(150);
11267   format %{ "subq    $dst, $src\t# long" %}
11268   ins_encode %{
11269     __ subq($dst$$Address, $src$$Register);
11270   %}
11271   ins_pipe(ialu_mem_reg);
11272 %}
11273 
11274 // Subtract from a pointer
11275 // XXX hmpf???
11276 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11277 %{
11278   match(Set dst (AddP dst (SubI zero src)));
11279   effect(KILL cr);
11280 
11281   format %{ "subq    $dst, $src\t# ptr - int" %}
11282   ins_encode %{
11283     __ subq($dst$$Register, $src$$Register);
11284   %}
11285   ins_pipe(ialu_reg_reg);
11286 %}
11287 
11288 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11289 %{
11290   predicate(!UseAPX);
11291   match(Set dst (SubI zero dst));
11292   effect(KILL cr);
11293   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11294 
11295   format %{ "negl    $dst\t# int" %}
11296   ins_encode %{
11297     __ negl($dst$$Register);
11298   %}
11299   ins_pipe(ialu_reg);
11300 %}
11301 
11302 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11303 %{
11304   predicate(UseAPX);
11305   match(Set dst (SubI zero src));
11306   effect(KILL cr);
11307   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11308 
11309   format %{ "enegl    $dst, $src\t# int ndd" %}
11310   ins_encode %{
11311     __ enegl($dst$$Register, $src$$Register, false);
11312   %}
11313   ins_pipe(ialu_reg);
11314 %}
11315 
11316 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11317 %{
11318   predicate(!UseAPX);
11319   match(Set dst (NegI dst));
11320   effect(KILL cr);
11321   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11322 
11323   format %{ "negl    $dst\t# int" %}
11324   ins_encode %{
11325     __ negl($dst$$Register);
11326   %}
11327   ins_pipe(ialu_reg);
11328 %}
11329 
11330 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11331 %{
11332   predicate(UseAPX);
11333   match(Set dst (NegI src));
11334   effect(KILL cr);
11335   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11336 
11337   format %{ "enegl    $dst, $src\t# int ndd" %}
11338   ins_encode %{
11339     __ enegl($dst$$Register, $src$$Register, false);
11340   %}
11341   ins_pipe(ialu_reg);
11342 %}
11343 
11344 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11345 %{
11346   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11347   effect(KILL cr);
11348   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11349 
11350   format %{ "negl    $dst\t# int" %}
11351   ins_encode %{
11352     __ negl($dst$$Address);
11353   %}
11354   ins_pipe(ialu_reg);
11355 %}
11356 
11357 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11358 %{
11359   predicate(!UseAPX);
11360   match(Set dst (SubL zero dst));
11361   effect(KILL cr);
11362   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11363 
11364   format %{ "negq    $dst\t# long" %}
11365   ins_encode %{
11366     __ negq($dst$$Register);
11367   %}
11368   ins_pipe(ialu_reg);
11369 %}
11370 
11371 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11372 %{
11373   predicate(UseAPX);
11374   match(Set dst (SubL zero src));
11375   effect(KILL cr);
11376   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11377 
11378   format %{ "enegq    $dst, $src\t# long ndd" %}
11379   ins_encode %{
11380     __ enegq($dst$$Register, $src$$Register, false);
11381   %}
11382   ins_pipe(ialu_reg);
11383 %}
11384 
11385 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11386 %{
11387   predicate(!UseAPX);
11388   match(Set dst (NegL dst));
11389   effect(KILL cr);
11390   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11391 
11392   format %{ "negq    $dst\t# int" %}
11393   ins_encode %{
11394     __ negq($dst$$Register);
11395   %}
11396   ins_pipe(ialu_reg);
11397 %}
11398 
11399 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11400 %{
11401   predicate(UseAPX);
11402   match(Set dst (NegL src));
11403   effect(KILL cr);
11404   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11405 
11406   format %{ "enegq    $dst, $src\t# long ndd" %}
11407   ins_encode %{
11408     __ enegq($dst$$Register, $src$$Register, false);
11409   %}
11410   ins_pipe(ialu_reg);
11411 %}
11412 
11413 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11414 %{
11415   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11416   effect(KILL cr);
11417   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11418 
11419   format %{ "negq    $dst\t# long" %}
11420   ins_encode %{
11421     __ negq($dst$$Address);
11422   %}
11423   ins_pipe(ialu_reg);
11424 %}
11425 
11426 //----------Multiplication/Division Instructions-------------------------------
11427 // Integer Multiplication Instructions
11428 // Multiply Register
11429 
11430 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11431 %{
11432   predicate(!UseAPX);
11433   match(Set dst (MulI dst src));
11434   effect(KILL cr);
11435 
11436   ins_cost(300);
11437   format %{ "imull   $dst, $src\t# int" %}
11438   ins_encode %{
11439     __ imull($dst$$Register, $src$$Register);
11440   %}
11441   ins_pipe(ialu_reg_reg_alu0);
11442 %}
11443 
11444 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11445 %{
11446   predicate(UseAPX);
11447   match(Set dst (MulI src1 src2));
11448   effect(KILL cr);
11449   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11450 
11451   ins_cost(300);
11452   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11453   ins_encode %{
11454     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11455   %}
11456   ins_pipe(ialu_reg_reg_alu0);
11457 %}
11458 
11459 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11460 %{
11461   match(Set dst (MulI src imm));
11462   effect(KILL cr);
11463 
11464   ins_cost(300);
11465   format %{ "imull   $dst, $src, $imm\t# int" %}
11466   ins_encode %{
11467     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11468   %}
11469   ins_pipe(ialu_reg_reg_alu0);
11470 %}
11471 
11472 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11473 %{
11474   predicate(!UseAPX);
11475   match(Set dst (MulI dst (LoadI src)));
11476   effect(KILL cr);
11477 
11478   ins_cost(350);
11479   format %{ "imull   $dst, $src\t# int" %}
11480   ins_encode %{
11481     __ imull($dst$$Register, $src$$Address);
11482   %}
11483   ins_pipe(ialu_reg_mem_alu0);
11484 %}
11485 
11486 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11487 %{
11488   predicate(UseAPX);
11489   match(Set dst (MulI src1 (LoadI src2)));
11490   effect(KILL cr);
11491   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11492 
11493   ins_cost(350);
11494   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11495   ins_encode %{
11496     __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11497   %}
11498   ins_pipe(ialu_reg_mem_alu0);
11499 %}
11500 
11501 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11502 %{
11503   match(Set dst (MulI (LoadI src) imm));
11504   effect(KILL cr);
11505 
11506   ins_cost(300);
11507   format %{ "imull   $dst, $src, $imm\t# int" %}
11508   ins_encode %{
11509     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11510   %}
11511   ins_pipe(ialu_reg_mem_alu0);
11512 %}
11513 
11514 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11515 %{
11516   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11517   effect(KILL cr, KILL src2);
11518 
11519   expand %{ mulI_rReg(dst, src1, cr);
11520            mulI_rReg(src2, src3, cr);
11521            addI_rReg(dst, src2, cr); %}
11522 %}
11523 
11524 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11525 %{
11526   predicate(!UseAPX);
11527   match(Set dst (MulL dst src));
11528   effect(KILL cr);
11529 
11530   ins_cost(300);
11531   format %{ "imulq   $dst, $src\t# long" %}
11532   ins_encode %{
11533     __ imulq($dst$$Register, $src$$Register);
11534   %}
11535   ins_pipe(ialu_reg_reg_alu0);
11536 %}
11537 
11538 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11539 %{
11540   predicate(UseAPX);
11541   match(Set dst (MulL src1 src2));
11542   effect(KILL cr);
11543   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11544 
11545   ins_cost(300);
11546   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11547   ins_encode %{
11548     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11549   %}
11550   ins_pipe(ialu_reg_reg_alu0);
11551 %}
11552 
11553 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11554 %{
11555   match(Set dst (MulL src imm));
11556   effect(KILL cr);
11557 
11558   ins_cost(300);
11559   format %{ "imulq   $dst, $src, $imm\t# long" %}
11560   ins_encode %{
11561     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11562   %}
11563   ins_pipe(ialu_reg_reg_alu0);
11564 %}
11565 
11566 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11567 %{
11568   predicate(!UseAPX);
11569   match(Set dst (MulL dst (LoadL src)));
11570   effect(KILL cr);
11571 
11572   ins_cost(350);
11573   format %{ "imulq   $dst, $src\t# long" %}
11574   ins_encode %{
11575     __ imulq($dst$$Register, $src$$Address);
11576   %}
11577   ins_pipe(ialu_reg_mem_alu0);
11578 %}
11579 
11580 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11581 %{
11582   predicate(UseAPX);
11583   match(Set dst (MulL src1 (LoadL src2)));
11584   effect(KILL cr);
11585   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11586 
11587   ins_cost(350);
11588   format %{ "eimulq   $dst, $src1, $src2 \t# long" %}
11589   ins_encode %{
11590     __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11591   %}
11592   ins_pipe(ialu_reg_mem_alu0);
11593 %}
11594 
11595 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11596 %{
11597   match(Set dst (MulL (LoadL src) imm));
11598   effect(KILL cr);
11599 
11600   ins_cost(300);
11601   format %{ "imulq   $dst, $src, $imm\t# long" %}
11602   ins_encode %{
11603     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11604   %}
11605   ins_pipe(ialu_reg_mem_alu0);
11606 %}
11607 
11608 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11609 %{
11610   match(Set dst (MulHiL src rax));
11611   effect(USE_KILL rax, KILL cr);
11612 
11613   ins_cost(300);
11614   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11615   ins_encode %{
11616     __ imulq($src$$Register);
11617   %}
11618   ins_pipe(ialu_reg_reg_alu0);
11619 %}
11620 
11621 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11622 %{
11623   match(Set dst (UMulHiL src rax));
11624   effect(USE_KILL rax, KILL cr);
11625 
11626   ins_cost(300);
11627   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11628   ins_encode %{
11629     __ mulq($src$$Register);
11630   %}
11631   ins_pipe(ialu_reg_reg_alu0);
11632 %}
11633 
11634 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11635                    rFlagsReg cr)
11636 %{
11637   match(Set rax (DivI rax div));
11638   effect(KILL rdx, KILL cr);
11639 
11640   ins_cost(30*100+10*100); // XXX
11641   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11642             "jne,s   normal\n\t"
11643             "xorl    rdx, rdx\n\t"
11644             "cmpl    $div, -1\n\t"
11645             "je,s    done\n"
11646     "normal: cdql\n\t"
11647             "idivl   $div\n"
11648     "done:"        %}
11649   ins_encode(cdql_enc(div));
11650   ins_pipe(ialu_reg_reg_alu0);
11651 %}
11652 
11653 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11654                    rFlagsReg cr)
11655 %{
11656   match(Set rax (DivL rax div));
11657   effect(KILL rdx, KILL cr);
11658 
11659   ins_cost(30*100+10*100); // XXX
11660   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11661             "cmpq    rax, rdx\n\t"
11662             "jne,s   normal\n\t"
11663             "xorl    rdx, rdx\n\t"
11664             "cmpq    $div, -1\n\t"
11665             "je,s    done\n"
11666     "normal: cdqq\n\t"
11667             "idivq   $div\n"
11668     "done:"        %}
11669   ins_encode(cdqq_enc(div));
11670   ins_pipe(ialu_reg_reg_alu0);
11671 %}
11672 
11673 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11674 %{
11675   match(Set rax (UDivI rax div));
11676   effect(KILL rdx, KILL cr);
11677 
11678   ins_cost(300);
11679   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11680   ins_encode %{
11681     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11682   %}
11683   ins_pipe(ialu_reg_reg_alu0);
11684 %}
11685 
11686 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11687 %{
11688   match(Set rax (UDivL rax div));
11689   effect(KILL rdx, KILL cr);
11690 
11691   ins_cost(300);
11692   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11693   ins_encode %{
11694      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11695   %}
11696   ins_pipe(ialu_reg_reg_alu0);
11697 %}
11698 
11699 // Integer DIVMOD with Register, both quotient and mod results
11700 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11701                              rFlagsReg cr)
11702 %{
11703   match(DivModI rax div);
11704   effect(KILL cr);
11705 
11706   ins_cost(30*100+10*100); // XXX
11707   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11708             "jne,s   normal\n\t"
11709             "xorl    rdx, rdx\n\t"
11710             "cmpl    $div, -1\n\t"
11711             "je,s    done\n"
11712     "normal: cdql\n\t"
11713             "idivl   $div\n"
11714     "done:"        %}
11715   ins_encode(cdql_enc(div));
11716   ins_pipe(pipe_slow);
11717 %}
11718 
11719 // Long DIVMOD with Register, both quotient and mod results
11720 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11721                              rFlagsReg cr)
11722 %{
11723   match(DivModL rax div);
11724   effect(KILL cr);
11725 
11726   ins_cost(30*100+10*100); // XXX
11727   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11728             "cmpq    rax, rdx\n\t"
11729             "jne,s   normal\n\t"
11730             "xorl    rdx, rdx\n\t"
11731             "cmpq    $div, -1\n\t"
11732             "je,s    done\n"
11733     "normal: cdqq\n\t"
11734             "idivq   $div\n"
11735     "done:"        %}
11736   ins_encode(cdqq_enc(div));
11737   ins_pipe(pipe_slow);
11738 %}
11739 
11740 // Unsigned integer DIVMOD with Register, both quotient and mod results
11741 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11742                               no_rax_rdx_RegI div, rFlagsReg cr)
11743 %{
11744   match(UDivModI rax div);
11745   effect(TEMP tmp, KILL cr);
11746 
11747   ins_cost(300);
11748   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11749             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11750           %}
11751   ins_encode %{
11752     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11753   %}
11754   ins_pipe(pipe_slow);
11755 %}
11756 
11757 // Unsigned long DIVMOD with Register, both quotient and mod results
11758 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11759                               no_rax_rdx_RegL div, rFlagsReg cr)
11760 %{
11761   match(UDivModL rax div);
11762   effect(TEMP tmp, KILL cr);
11763 
11764   ins_cost(300);
11765   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11766             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11767           %}
11768   ins_encode %{
11769     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11770   %}
11771   ins_pipe(pipe_slow);
11772 %}
11773 
11774 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11775                    rFlagsReg cr)
11776 %{
11777   match(Set rdx (ModI rax div));
11778   effect(KILL rax, KILL cr);
11779 
11780   ins_cost(300); // XXX
11781   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11782             "jne,s   normal\n\t"
11783             "xorl    rdx, rdx\n\t"
11784             "cmpl    $div, -1\n\t"
11785             "je,s    done\n"
11786     "normal: cdql\n\t"
11787             "idivl   $div\n"
11788     "done:"        %}
11789   ins_encode(cdql_enc(div));
11790   ins_pipe(ialu_reg_reg_alu0);
11791 %}
11792 
11793 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11794                    rFlagsReg cr)
11795 %{
11796   match(Set rdx (ModL rax div));
11797   effect(KILL rax, KILL cr);
11798 
11799   ins_cost(300); // XXX
11800   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11801             "cmpq    rax, rdx\n\t"
11802             "jne,s   normal\n\t"
11803             "xorl    rdx, rdx\n\t"
11804             "cmpq    $div, -1\n\t"
11805             "je,s    done\n"
11806     "normal: cdqq\n\t"
11807             "idivq   $div\n"
11808     "done:"        %}
11809   ins_encode(cdqq_enc(div));
11810   ins_pipe(ialu_reg_reg_alu0);
11811 %}
11812 
11813 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11814 %{
11815   match(Set rdx (UModI rax div));
11816   effect(KILL rax, KILL cr);
11817 
11818   ins_cost(300);
11819   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11820   ins_encode %{
11821     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11822   %}
11823   ins_pipe(ialu_reg_reg_alu0);
11824 %}
11825 
11826 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11827 %{
11828   match(Set rdx (UModL rax div));
11829   effect(KILL rax, KILL cr);
11830 
11831   ins_cost(300);
11832   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11833   ins_encode %{
11834     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11835   %}
11836   ins_pipe(ialu_reg_reg_alu0);
11837 %}
11838 
11839 // Integer Shift Instructions
11840 // Shift Left by one, two, three
11841 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11842 %{
11843   predicate(!UseAPX);
11844   match(Set dst (LShiftI dst shift));
11845   effect(KILL cr);
11846 
11847   format %{ "sall    $dst, $shift" %}
11848   ins_encode %{
11849     __ sall($dst$$Register, $shift$$constant);
11850   %}
11851   ins_pipe(ialu_reg);
11852 %}
11853 
11854 // Shift Left by one, two, three
11855 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11856 %{
11857   predicate(UseAPX);
11858   match(Set dst (LShiftI src shift));
11859   effect(KILL cr);
11860   flag(PD::Flag_ndd_demotable_opr1);
11861 
11862   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
11863   ins_encode %{
11864     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11865   %}
11866   ins_pipe(ialu_reg);
11867 %}
11868 
11869 // Shift Left by 8-bit immediate
11870 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11871 %{
11872   predicate(!UseAPX);
11873   match(Set dst (LShiftI dst shift));
11874   effect(KILL cr);
11875 
11876   format %{ "sall    $dst, $shift" %}
11877   ins_encode %{
11878     __ sall($dst$$Register, $shift$$constant);
11879   %}
11880   ins_pipe(ialu_reg);
11881 %}
11882 
11883 // Shift Left by 8-bit immediate
11884 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11885 %{
11886   predicate(UseAPX);
11887   match(Set dst (LShiftI src shift));
11888   effect(KILL cr);
11889   flag(PD::Flag_ndd_demotable_opr1);
11890 
11891   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11892   ins_encode %{
11893     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11894   %}
11895   ins_pipe(ialu_reg);
11896 %}
11897 
11898 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11899 %{
11900   predicate(UseAPX);
11901   match(Set dst (LShiftI (LoadI src) shift));
11902   effect(KILL cr);
11903 
11904   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11905   ins_encode %{
11906     __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11907   %}
11908   ins_pipe(ialu_reg);
11909 %}
11910 
11911 // Shift Left by 8-bit immediate
11912 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11913 %{
11914   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11915   effect(KILL cr);
11916 
11917   format %{ "sall    $dst, $shift" %}
11918   ins_encode %{
11919     __ sall($dst$$Address, $shift$$constant);
11920   %}
11921   ins_pipe(ialu_mem_imm);
11922 %}
11923 
11924 // Shift Left by variable
11925 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11926 %{
11927   predicate(!VM_Version::supports_bmi2());
11928   match(Set dst (LShiftI dst shift));
11929   effect(KILL cr);
11930 
11931   format %{ "sall    $dst, $shift" %}
11932   ins_encode %{
11933     __ sall($dst$$Register);
11934   %}
11935   ins_pipe(ialu_reg_reg);
11936 %}
11937 
11938 // Shift Left by variable
11939 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11940 %{
11941   predicate(!VM_Version::supports_bmi2());
11942   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11943   effect(KILL cr);
11944 
11945   format %{ "sall    $dst, $shift" %}
11946   ins_encode %{
11947     __ sall($dst$$Address);
11948   %}
11949   ins_pipe(ialu_mem_reg);
11950 %}
11951 
11952 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11953 %{
11954   predicate(VM_Version::supports_bmi2());
11955   match(Set dst (LShiftI src shift));
11956 
11957   format %{ "shlxl   $dst, $src, $shift" %}
11958   ins_encode %{
11959     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
11960   %}
11961   ins_pipe(ialu_reg_reg);
11962 %}
11963 
11964 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
11965 %{
11966   predicate(VM_Version::supports_bmi2());
11967   match(Set dst (LShiftI (LoadI src) shift));
11968   ins_cost(175);
11969   format %{ "shlxl   $dst, $src, $shift" %}
11970   ins_encode %{
11971     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
11972   %}
11973   ins_pipe(ialu_reg_mem);
11974 %}
11975 
11976 // Arithmetic Shift Right by 8-bit immediate
11977 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11978 %{
11979   predicate(!UseAPX);
11980   match(Set dst (RShiftI dst shift));
11981   effect(KILL cr);
11982 
11983   format %{ "sarl    $dst, $shift" %}
11984   ins_encode %{
11985     __ sarl($dst$$Register, $shift$$constant);
11986   %}
11987   ins_pipe(ialu_mem_imm);
11988 %}
11989 
11990 // Arithmetic Shift Right by 8-bit immediate
11991 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11992 %{
11993   predicate(UseAPX);
11994   match(Set dst (RShiftI src shift));
11995   effect(KILL cr);
11996   flag(PD::Flag_ndd_demotable_opr1);
11997 
11998   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
11999   ins_encode %{
12000     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12001   %}
12002   ins_pipe(ialu_mem_imm);
12003 %}
12004 
12005 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12006 %{
12007   predicate(UseAPX);
12008   match(Set dst (RShiftI (LoadI src) shift));
12009   effect(KILL cr);
12010 
12011   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12012   ins_encode %{
12013     __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12014   %}
12015   ins_pipe(ialu_mem_imm);
12016 %}
12017 
12018 // Arithmetic Shift Right by 8-bit immediate
12019 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12020 %{
12021   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12022   effect(KILL cr);
12023 
12024   format %{ "sarl    $dst, $shift" %}
12025   ins_encode %{
12026     __ sarl($dst$$Address, $shift$$constant);
12027   %}
12028   ins_pipe(ialu_mem_imm);
12029 %}
12030 
12031 // Arithmetic Shift Right by variable
12032 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12033 %{
12034   predicate(!VM_Version::supports_bmi2());
12035   match(Set dst (RShiftI dst shift));
12036   effect(KILL cr);
12037 
12038   format %{ "sarl    $dst, $shift" %}
12039   ins_encode %{
12040     __ sarl($dst$$Register);
12041   %}
12042   ins_pipe(ialu_reg_reg);
12043 %}
12044 
12045 // Arithmetic Shift Right by variable
12046 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12047 %{
12048   predicate(!VM_Version::supports_bmi2());
12049   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12050   effect(KILL cr);
12051 
12052   format %{ "sarl    $dst, $shift" %}
12053   ins_encode %{
12054     __ sarl($dst$$Address);
12055   %}
12056   ins_pipe(ialu_mem_reg);
12057 %}
12058 
12059 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12060 %{
12061   predicate(VM_Version::supports_bmi2());
12062   match(Set dst (RShiftI src shift));
12063 
12064   format %{ "sarxl   $dst, $src, $shift" %}
12065   ins_encode %{
12066     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12067   %}
12068   ins_pipe(ialu_reg_reg);
12069 %}
12070 
12071 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12072 %{
12073   predicate(VM_Version::supports_bmi2());
12074   match(Set dst (RShiftI (LoadI src) shift));
12075   ins_cost(175);
12076   format %{ "sarxl   $dst, $src, $shift" %}
12077   ins_encode %{
12078     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12079   %}
12080   ins_pipe(ialu_reg_mem);
12081 %}
12082 
12083 // Logical Shift Right by 8-bit immediate
12084 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12085 %{
12086   predicate(!UseAPX);
12087   match(Set dst (URShiftI dst shift));
12088   effect(KILL cr);
12089 
12090   format %{ "shrl    $dst, $shift" %}
12091   ins_encode %{
12092     __ shrl($dst$$Register, $shift$$constant);
12093   %}
12094   ins_pipe(ialu_reg);
12095 %}
12096 
12097 // Logical Shift Right by 8-bit immediate
12098 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12099 %{
12100   predicate(UseAPX);
12101   match(Set dst (URShiftI src shift));
12102   effect(KILL cr);
12103   flag(PD::Flag_ndd_demotable_opr1);
12104 
12105   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12106   ins_encode %{
12107     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12108   %}
12109   ins_pipe(ialu_reg);
12110 %}
12111 
12112 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12113 %{
12114   predicate(UseAPX);
12115   match(Set dst (URShiftI (LoadI src) shift));
12116   effect(KILL cr);
12117 
12118   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12119   ins_encode %{
12120     __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12121   %}
12122   ins_pipe(ialu_reg);
12123 %}
12124 
12125 // Logical Shift Right by 8-bit immediate
12126 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12127 %{
12128   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12129   effect(KILL cr);
12130 
12131   format %{ "shrl    $dst, $shift" %}
12132   ins_encode %{
12133     __ shrl($dst$$Address, $shift$$constant);
12134   %}
12135   ins_pipe(ialu_mem_imm);
12136 %}
12137 
12138 // Logical Shift Right by variable
12139 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12140 %{
12141   predicate(!VM_Version::supports_bmi2());
12142   match(Set dst (URShiftI dst shift));
12143   effect(KILL cr);
12144 
12145   format %{ "shrl    $dst, $shift" %}
12146   ins_encode %{
12147     __ shrl($dst$$Register);
12148   %}
12149   ins_pipe(ialu_reg_reg);
12150 %}
12151 
12152 // Logical Shift Right by variable
12153 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12154 %{
12155   predicate(!VM_Version::supports_bmi2());
12156   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12157   effect(KILL cr);
12158 
12159   format %{ "shrl    $dst, $shift" %}
12160   ins_encode %{
12161     __ shrl($dst$$Address);
12162   %}
12163   ins_pipe(ialu_mem_reg);
12164 %}
12165 
12166 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12167 %{
12168   predicate(VM_Version::supports_bmi2());
12169   match(Set dst (URShiftI src shift));
12170 
12171   format %{ "shrxl   $dst, $src, $shift" %}
12172   ins_encode %{
12173     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12174   %}
12175   ins_pipe(ialu_reg_reg);
12176 %}
12177 
12178 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12179 %{
12180   predicate(VM_Version::supports_bmi2());
12181   match(Set dst (URShiftI (LoadI src) shift));
12182   ins_cost(175);
12183   format %{ "shrxl   $dst, $src, $shift" %}
12184   ins_encode %{
12185     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12186   %}
12187   ins_pipe(ialu_reg_mem);
12188 %}
12189 
12190 // Long Shift Instructions
12191 // Shift Left by one, two, three
12192 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12193 %{
12194   predicate(!UseAPX);
12195   match(Set dst (LShiftL dst shift));
12196   effect(KILL cr);
12197 
12198   format %{ "salq    $dst, $shift" %}
12199   ins_encode %{
12200     __ salq($dst$$Register, $shift$$constant);
12201   %}
12202   ins_pipe(ialu_reg);
12203 %}
12204 
12205 // Shift Left by one, two, three
12206 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12207 %{
12208   predicate(UseAPX);
12209   match(Set dst (LShiftL src shift));
12210   effect(KILL cr);
12211   flag(PD::Flag_ndd_demotable_opr1);
12212 
12213   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12214   ins_encode %{
12215     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12216   %}
12217   ins_pipe(ialu_reg);
12218 %}
12219 
12220 // Shift Left by 8-bit immediate
12221 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12222 %{
12223   predicate(!UseAPX);
12224   match(Set dst (LShiftL dst shift));
12225   effect(KILL cr);
12226 
12227   format %{ "salq    $dst, $shift" %}
12228   ins_encode %{
12229     __ salq($dst$$Register, $shift$$constant);
12230   %}
12231   ins_pipe(ialu_reg);
12232 %}
12233 
12234 // Shift Left by 8-bit immediate
12235 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12236 %{
12237   predicate(UseAPX);
12238   match(Set dst (LShiftL src shift));
12239   effect(KILL cr);
12240   flag(PD::Flag_ndd_demotable_opr1);
12241 
12242   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12243   ins_encode %{
12244     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12245   %}
12246   ins_pipe(ialu_reg);
12247 %}
12248 
12249 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12250 %{
12251   predicate(UseAPX);
12252   match(Set dst (LShiftL (LoadL src) shift));
12253   effect(KILL cr);
12254 
12255   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12256   ins_encode %{
12257     __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12258   %}
12259   ins_pipe(ialu_reg);
12260 %}
12261 
12262 // Shift Left by 8-bit immediate
12263 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12264 %{
12265   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12266   effect(KILL cr);
12267 
12268   format %{ "salq    $dst, $shift" %}
12269   ins_encode %{
12270     __ salq($dst$$Address, $shift$$constant);
12271   %}
12272   ins_pipe(ialu_mem_imm);
12273 %}
12274 
12275 // Shift Left by variable
12276 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12277 %{
12278   predicate(!VM_Version::supports_bmi2());
12279   match(Set dst (LShiftL dst shift));
12280   effect(KILL cr);
12281 
12282   format %{ "salq    $dst, $shift" %}
12283   ins_encode %{
12284     __ salq($dst$$Register);
12285   %}
12286   ins_pipe(ialu_reg_reg);
12287 %}
12288 
12289 // Shift Left by variable
12290 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12291 %{
12292   predicate(!VM_Version::supports_bmi2());
12293   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12294   effect(KILL cr);
12295 
12296   format %{ "salq    $dst, $shift" %}
12297   ins_encode %{
12298     __ salq($dst$$Address);
12299   %}
12300   ins_pipe(ialu_mem_reg);
12301 %}
12302 
12303 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12304 %{
12305   predicate(VM_Version::supports_bmi2());
12306   match(Set dst (LShiftL src shift));
12307 
12308   format %{ "shlxq   $dst, $src, $shift" %}
12309   ins_encode %{
12310     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12311   %}
12312   ins_pipe(ialu_reg_reg);
12313 %}
12314 
12315 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12316 %{
12317   predicate(VM_Version::supports_bmi2());
12318   match(Set dst (LShiftL (LoadL src) shift));
12319   ins_cost(175);
12320   format %{ "shlxq   $dst, $src, $shift" %}
12321   ins_encode %{
12322     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12323   %}
12324   ins_pipe(ialu_reg_mem);
12325 %}
12326 
12327 // Arithmetic Shift Right by 8-bit immediate
12328 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12329 %{
12330   predicate(!UseAPX);
12331   match(Set dst (RShiftL dst shift));
12332   effect(KILL cr);
12333 
12334   format %{ "sarq    $dst, $shift" %}
12335   ins_encode %{
12336     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12337   %}
12338   ins_pipe(ialu_mem_imm);
12339 %}
12340 
12341 // Arithmetic Shift Right by 8-bit immediate
12342 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12343 %{
12344   predicate(UseAPX);
12345   match(Set dst (RShiftL src shift));
12346   effect(KILL cr);
12347   flag(PD::Flag_ndd_demotable_opr1);
12348 
12349   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12350   ins_encode %{
12351     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12352   %}
12353   ins_pipe(ialu_mem_imm);
12354 %}
12355 
12356 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12357 %{
12358   predicate(UseAPX);
12359   match(Set dst (RShiftL (LoadL src) shift));
12360   effect(KILL cr);
12361 
12362   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12363   ins_encode %{
12364     __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12365   %}
12366   ins_pipe(ialu_mem_imm);
12367 %}
12368 
12369 // Arithmetic Shift Right by 8-bit immediate
12370 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12371 %{
12372   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12373   effect(KILL cr);
12374 
12375   format %{ "sarq    $dst, $shift" %}
12376   ins_encode %{
12377     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12378   %}
12379   ins_pipe(ialu_mem_imm);
12380 %}
12381 
12382 // Arithmetic Shift Right by variable
12383 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12384 %{
12385   predicate(!VM_Version::supports_bmi2());
12386   match(Set dst (RShiftL dst shift));
12387   effect(KILL cr);
12388 
12389   format %{ "sarq    $dst, $shift" %}
12390   ins_encode %{
12391     __ sarq($dst$$Register);
12392   %}
12393   ins_pipe(ialu_reg_reg);
12394 %}
12395 
12396 // Arithmetic Shift Right by variable
12397 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12398 %{
12399   predicate(!VM_Version::supports_bmi2());
12400   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12401   effect(KILL cr);
12402 
12403   format %{ "sarq    $dst, $shift" %}
12404   ins_encode %{
12405     __ sarq($dst$$Address);
12406   %}
12407   ins_pipe(ialu_mem_reg);
12408 %}
12409 
12410 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12411 %{
12412   predicate(VM_Version::supports_bmi2());
12413   match(Set dst (RShiftL src shift));
12414 
12415   format %{ "sarxq   $dst, $src, $shift" %}
12416   ins_encode %{
12417     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12418   %}
12419   ins_pipe(ialu_reg_reg);
12420 %}
12421 
12422 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12423 %{
12424   predicate(VM_Version::supports_bmi2());
12425   match(Set dst (RShiftL (LoadL src) shift));
12426   ins_cost(175);
12427   format %{ "sarxq   $dst, $src, $shift" %}
12428   ins_encode %{
12429     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12430   %}
12431   ins_pipe(ialu_reg_mem);
12432 %}
12433 
12434 // Logical Shift Right by 8-bit immediate
12435 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12436 %{
12437   predicate(!UseAPX);
12438   match(Set dst (URShiftL dst shift));
12439   effect(KILL cr);
12440 
12441   format %{ "shrq    $dst, $shift" %}
12442   ins_encode %{
12443     __ shrq($dst$$Register, $shift$$constant);
12444   %}
12445   ins_pipe(ialu_reg);
12446 %}
12447 
12448 // Logical Shift Right by 8-bit immediate
12449 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12450 %{
12451   predicate(UseAPX);
12452   match(Set dst (URShiftL src shift));
12453   effect(KILL cr);
12454   flag(PD::Flag_ndd_demotable_opr1);
12455 
12456   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12457   ins_encode %{
12458     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12459   %}
12460   ins_pipe(ialu_reg);
12461 %}
12462 
12463 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12464 %{
12465   predicate(UseAPX);
12466   match(Set dst (URShiftL (LoadL src) shift));
12467   effect(KILL cr);
12468 
12469   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12470   ins_encode %{
12471     __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12472   %}
12473   ins_pipe(ialu_reg);
12474 %}
12475 
12476 // Logical Shift Right by 8-bit immediate
12477 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12478 %{
12479   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12480   effect(KILL cr);
12481 
12482   format %{ "shrq    $dst, $shift" %}
12483   ins_encode %{
12484     __ shrq($dst$$Address, $shift$$constant);
12485   %}
12486   ins_pipe(ialu_mem_imm);
12487 %}
12488 
12489 // Logical Shift Right by variable
12490 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12491 %{
12492   predicate(!VM_Version::supports_bmi2());
12493   match(Set dst (URShiftL dst shift));
12494   effect(KILL cr);
12495 
12496   format %{ "shrq    $dst, $shift" %}
12497   ins_encode %{
12498     __ shrq($dst$$Register);
12499   %}
12500   ins_pipe(ialu_reg_reg);
12501 %}
12502 
12503 // Logical Shift Right by variable
12504 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12505 %{
12506   predicate(!VM_Version::supports_bmi2());
12507   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12508   effect(KILL cr);
12509 
12510   format %{ "shrq    $dst, $shift" %}
12511   ins_encode %{
12512     __ shrq($dst$$Address);
12513   %}
12514   ins_pipe(ialu_mem_reg);
12515 %}
12516 
12517 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12518 %{
12519   predicate(VM_Version::supports_bmi2());
12520   match(Set dst (URShiftL src shift));
12521 
12522   format %{ "shrxq   $dst, $src, $shift" %}
12523   ins_encode %{
12524     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12525   %}
12526   ins_pipe(ialu_reg_reg);
12527 %}
12528 
12529 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12530 %{
12531   predicate(VM_Version::supports_bmi2());
12532   match(Set dst (URShiftL (LoadL src) shift));
12533   ins_cost(175);
12534   format %{ "shrxq   $dst, $src, $shift" %}
12535   ins_encode %{
12536     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12537   %}
12538   ins_pipe(ialu_reg_mem);
12539 %}
12540 
12541 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12542 // This idiom is used by the compiler for the i2b bytecode.
12543 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12544 %{
12545   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12546 
12547   format %{ "movsbl  $dst, $src\t# i2b" %}
12548   ins_encode %{
12549     __ movsbl($dst$$Register, $src$$Register);
12550   %}
12551   ins_pipe(ialu_reg_reg);
12552 %}
12553 
12554 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12555 // This idiom is used by the compiler the i2s bytecode.
12556 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12557 %{
12558   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12559 
12560   format %{ "movswl  $dst, $src\t# i2s" %}
12561   ins_encode %{
12562     __ movswl($dst$$Register, $src$$Register);
12563   %}
12564   ins_pipe(ialu_reg_reg);
12565 %}
12566 
12567 // ROL/ROR instructions
12568 
12569 // Rotate left by constant.
12570 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12571 %{
12572   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12573   match(Set dst (RotateLeft dst shift));
12574   effect(KILL cr);
12575   format %{ "roll    $dst, $shift" %}
12576   ins_encode %{
12577     __ roll($dst$$Register, $shift$$constant);
12578   %}
12579   ins_pipe(ialu_reg);
12580 %}
12581 
12582 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12583 %{
12584   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12585   match(Set dst (RotateLeft src shift));
12586   format %{ "rolxl   $dst, $src, $shift" %}
12587   ins_encode %{
12588     int shift = 32 - ($shift$$constant & 31);
12589     __ rorxl($dst$$Register, $src$$Register, shift);
12590   %}
12591   ins_pipe(ialu_reg_reg);
12592 %}
12593 
12594 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12595 %{
12596   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12597   match(Set dst (RotateLeft (LoadI src) shift));
12598   ins_cost(175);
12599   format %{ "rolxl   $dst, $src, $shift" %}
12600   ins_encode %{
12601     int shift = 32 - ($shift$$constant & 31);
12602     __ rorxl($dst$$Register, $src$$Address, shift);
12603   %}
12604   ins_pipe(ialu_reg_mem);
12605 %}
12606 
12607 // Rotate Left by variable
12608 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12609 %{
12610   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12611   match(Set dst (RotateLeft dst shift));
12612   effect(KILL cr);
12613   format %{ "roll    $dst, $shift" %}
12614   ins_encode %{
12615     __ roll($dst$$Register);
12616   %}
12617   ins_pipe(ialu_reg_reg);
12618 %}
12619 
12620 // Rotate Left by variable
12621 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12622 %{
12623   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12624   match(Set dst (RotateLeft src shift));
12625   effect(KILL cr);
12626   flag(PD::Flag_ndd_demotable_opr1);
12627 
12628   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12629   ins_encode %{
12630     __ eroll($dst$$Register, $src$$Register, false);
12631   %}
12632   ins_pipe(ialu_reg_reg);
12633 %}
12634 
12635 // Rotate Right by constant.
12636 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12637 %{
12638   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12639   match(Set dst (RotateRight dst shift));
12640   effect(KILL cr);
12641   format %{ "rorl    $dst, $shift" %}
12642   ins_encode %{
12643     __ rorl($dst$$Register, $shift$$constant);
12644   %}
12645   ins_pipe(ialu_reg);
12646 %}
12647 
12648 // Rotate Right by constant.
12649 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12650 %{
12651   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12652   match(Set dst (RotateRight src shift));
12653   format %{ "rorxl   $dst, $src, $shift" %}
12654   ins_encode %{
12655     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12656   %}
12657   ins_pipe(ialu_reg_reg);
12658 %}
12659 
12660 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12661 %{
12662   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12663   match(Set dst (RotateRight (LoadI src) shift));
12664   ins_cost(175);
12665   format %{ "rorxl   $dst, $src, $shift" %}
12666   ins_encode %{
12667     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12668   %}
12669   ins_pipe(ialu_reg_mem);
12670 %}
12671 
12672 // Rotate Right by variable
12673 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12674 %{
12675   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12676   match(Set dst (RotateRight dst shift));
12677   effect(KILL cr);
12678   format %{ "rorl    $dst, $shift" %}
12679   ins_encode %{
12680     __ rorl($dst$$Register);
12681   %}
12682   ins_pipe(ialu_reg_reg);
12683 %}
12684 
12685 // Rotate Right by variable
12686 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12687 %{
12688   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12689   match(Set dst (RotateRight src shift));
12690   effect(KILL cr);
12691   flag(PD::Flag_ndd_demotable_opr1);
12692 
12693   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12694   ins_encode %{
12695     __ erorl($dst$$Register, $src$$Register, false);
12696   %}
12697   ins_pipe(ialu_reg_reg);
12698 %}
12699 
12700 // Rotate Left by constant.
12701 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12702 %{
12703   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12704   match(Set dst (RotateLeft dst shift));
12705   effect(KILL cr);
12706   format %{ "rolq    $dst, $shift" %}
12707   ins_encode %{
12708     __ rolq($dst$$Register, $shift$$constant);
12709   %}
12710   ins_pipe(ialu_reg);
12711 %}
12712 
12713 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12714 %{
12715   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12716   match(Set dst (RotateLeft src shift));
12717   format %{ "rolxq   $dst, $src, $shift" %}
12718   ins_encode %{
12719     int shift = 64 - ($shift$$constant & 63);
12720     __ rorxq($dst$$Register, $src$$Register, shift);
12721   %}
12722   ins_pipe(ialu_reg_reg);
12723 %}
12724 
12725 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12726 %{
12727   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12728   match(Set dst (RotateLeft (LoadL src) shift));
12729   ins_cost(175);
12730   format %{ "rolxq   $dst, $src, $shift" %}
12731   ins_encode %{
12732     int shift = 64 - ($shift$$constant & 63);
12733     __ rorxq($dst$$Register, $src$$Address, shift);
12734   %}
12735   ins_pipe(ialu_reg_mem);
12736 %}
12737 
12738 // Rotate Left by variable
12739 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12740 %{
12741   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12742   match(Set dst (RotateLeft dst shift));
12743   effect(KILL cr);
12744 
12745   format %{ "rolq    $dst, $shift" %}
12746   ins_encode %{
12747     __ rolq($dst$$Register);
12748   %}
12749   ins_pipe(ialu_reg_reg);
12750 %}
12751 
12752 // Rotate Left by variable
12753 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12754 %{
12755   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12756   match(Set dst (RotateLeft src shift));
12757   effect(KILL cr);
12758   flag(PD::Flag_ndd_demotable_opr1);
12759 
12760   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12761   ins_encode %{
12762     __ erolq($dst$$Register, $src$$Register, false);
12763   %}
12764   ins_pipe(ialu_reg_reg);
12765 %}
12766 
12767 // Rotate Right by constant.
12768 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12769 %{
12770   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12771   match(Set dst (RotateRight dst shift));
12772   effect(KILL cr);
12773   format %{ "rorq    $dst, $shift" %}
12774   ins_encode %{
12775     __ rorq($dst$$Register, $shift$$constant);
12776   %}
12777   ins_pipe(ialu_reg);
12778 %}
12779 
12780 // Rotate Right by constant
12781 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12782 %{
12783   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12784   match(Set dst (RotateRight src shift));
12785   format %{ "rorxq   $dst, $src, $shift" %}
12786   ins_encode %{
12787     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12788   %}
12789   ins_pipe(ialu_reg_reg);
12790 %}
12791 
12792 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12793 %{
12794   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12795   match(Set dst (RotateRight (LoadL src) shift));
12796   ins_cost(175);
12797   format %{ "rorxq   $dst, $src, $shift" %}
12798   ins_encode %{
12799     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12800   %}
12801   ins_pipe(ialu_reg_mem);
12802 %}
12803 
12804 // Rotate Right by variable
12805 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12806 %{
12807   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12808   match(Set dst (RotateRight dst shift));
12809   effect(KILL cr);
12810   format %{ "rorq    $dst, $shift" %}
12811   ins_encode %{
12812     __ rorq($dst$$Register);
12813   %}
12814   ins_pipe(ialu_reg_reg);
12815 %}
12816 
12817 // Rotate Right by variable
12818 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12819 %{
12820   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12821   match(Set dst (RotateRight src shift));
12822   effect(KILL cr);
12823   flag(PD::Flag_ndd_demotable_opr1);
12824 
12825   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12826   ins_encode %{
12827     __ erorq($dst$$Register, $src$$Register, false);
12828   %}
12829   ins_pipe(ialu_reg_reg);
12830 %}
12831 
12832 //----------------------------- CompressBits/ExpandBits ------------------------
12833 
12834 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12835   predicate(n->bottom_type()->isa_long());
12836   match(Set dst (CompressBits src mask));
12837   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12838   ins_encode %{
12839     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12840   %}
12841   ins_pipe( pipe_slow );
12842 %}
12843 
12844 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12845   predicate(n->bottom_type()->isa_long());
12846   match(Set dst (ExpandBits src mask));
12847   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12848   ins_encode %{
12849     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12850   %}
12851   ins_pipe( pipe_slow );
12852 %}
12853 
12854 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12855   predicate(n->bottom_type()->isa_long());
12856   match(Set dst (CompressBits src (LoadL mask)));
12857   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12858   ins_encode %{
12859     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12860   %}
12861   ins_pipe( pipe_slow );
12862 %}
12863 
12864 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12865   predicate(n->bottom_type()->isa_long());
12866   match(Set dst (ExpandBits src (LoadL mask)));
12867   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12868   ins_encode %{
12869     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12870   %}
12871   ins_pipe( pipe_slow );
12872 %}
12873 
12874 
12875 // Logical Instructions
12876 
12877 // Integer Logical Instructions
12878 
12879 // And Instructions
12880 // And Register with Register
12881 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12882 %{
12883   predicate(!UseAPX);
12884   match(Set dst (AndI dst src));
12885   effect(KILL cr);
12886   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12887 
12888   format %{ "andl    $dst, $src\t# int" %}
12889   ins_encode %{
12890     __ andl($dst$$Register, $src$$Register);
12891   %}
12892   ins_pipe(ialu_reg_reg);
12893 %}
12894 
12895 // And Register with Register using New Data Destination (NDD)
12896 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12897 %{
12898   predicate(UseAPX);
12899   match(Set dst (AndI src1 src2));
12900   effect(KILL cr);
12901   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12902 
12903   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
12904   ins_encode %{
12905     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12906 
12907   %}
12908   ins_pipe(ialu_reg_reg);
12909 %}
12910 
12911 // And Register with Immediate 255
12912 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12913 %{
12914   match(Set dst (AndI src mask));
12915 
12916   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
12917   ins_encode %{
12918     __ movzbl($dst$$Register, $src$$Register);
12919   %}
12920   ins_pipe(ialu_reg);
12921 %}
12922 
12923 // And Register with Immediate 255 and promote to long
12924 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12925 %{
12926   match(Set dst (ConvI2L (AndI src mask)));
12927 
12928   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
12929   ins_encode %{
12930     __ movzbl($dst$$Register, $src$$Register);
12931   %}
12932   ins_pipe(ialu_reg);
12933 %}
12934 
12935 // And Register with Immediate 65535
12936 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12937 %{
12938   match(Set dst (AndI src mask));
12939 
12940   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
12941   ins_encode %{
12942     __ movzwl($dst$$Register, $src$$Register);
12943   %}
12944   ins_pipe(ialu_reg);
12945 %}
12946 
12947 // And Register with Immediate 65535 and promote to long
12948 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12949 %{
12950   match(Set dst (ConvI2L (AndI src mask)));
12951 
12952   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
12953   ins_encode %{
12954     __ movzwl($dst$$Register, $src$$Register);
12955   %}
12956   ins_pipe(ialu_reg);
12957 %}
12958 
12959 // Can skip int2long conversions after AND with small bitmask
12960 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
12961 %{
12962   predicate(VM_Version::supports_bmi2());
12963   ins_cost(125);
12964   effect(TEMP tmp, KILL cr);
12965   match(Set dst (ConvI2L (AndI src mask)));
12966   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
12967   ins_encode %{
12968     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
12969     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
12970   %}
12971   ins_pipe(ialu_reg_reg);
12972 %}
12973 
12974 // And Register with Immediate
12975 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12976 %{
12977   predicate(!UseAPX);
12978   match(Set dst (AndI dst src));
12979   effect(KILL cr);
12980   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12981 
12982   format %{ "andl    $dst, $src\t# int" %}
12983   ins_encode %{
12984     __ andl($dst$$Register, $src$$constant);
12985   %}
12986   ins_pipe(ialu_reg);
12987 %}
12988 
12989 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
12990 %{
12991   predicate(UseAPX);
12992   match(Set dst (AndI src1 src2));
12993   effect(KILL cr);
12994   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
12995 
12996   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
12997   ins_encode %{
12998     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
12999   %}
13000   ins_pipe(ialu_reg);
13001 %}
13002 
13003 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13004 %{
13005   predicate(UseAPX);
13006   match(Set dst (AndI (LoadI src1) src2));
13007   effect(KILL cr);
13008   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13009 
13010   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13011   ins_encode %{
13012     __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13013   %}
13014   ins_pipe(ialu_reg);
13015 %}
13016 
13017 // And Register with Memory
13018 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13019 %{
13020   predicate(!UseAPX);
13021   match(Set dst (AndI dst (LoadI src)));
13022   effect(KILL cr);
13023   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13024 
13025   ins_cost(150);
13026   format %{ "andl    $dst, $src\t# int" %}
13027   ins_encode %{
13028     __ andl($dst$$Register, $src$$Address);
13029   %}
13030   ins_pipe(ialu_reg_mem);
13031 %}
13032 
13033 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13034 %{
13035   predicate(UseAPX);
13036   match(Set dst (AndI src1 (LoadI src2)));
13037   effect(KILL cr);
13038   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13039 
13040   ins_cost(150);
13041   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13042   ins_encode %{
13043     __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13044   %}
13045   ins_pipe(ialu_reg_mem);
13046 %}
13047 
13048 // And Memory with Register
13049 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13050 %{
13051   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13052   effect(KILL cr);
13053   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13054 
13055   ins_cost(150);
13056   format %{ "andb    $dst, $src\t# byte" %}
13057   ins_encode %{
13058     __ andb($dst$$Address, $src$$Register);
13059   %}
13060   ins_pipe(ialu_mem_reg);
13061 %}
13062 
13063 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13064 %{
13065   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13066   effect(KILL cr);
13067   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13068 
13069   ins_cost(150);
13070   format %{ "andl    $dst, $src\t# int" %}
13071   ins_encode %{
13072     __ andl($dst$$Address, $src$$Register);
13073   %}
13074   ins_pipe(ialu_mem_reg);
13075 %}
13076 
13077 // And Memory with Immediate
13078 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13079 %{
13080   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13081   effect(KILL cr);
13082   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13083 
13084   ins_cost(125);
13085   format %{ "andl    $dst, $src\t# int" %}
13086   ins_encode %{
13087     __ andl($dst$$Address, $src$$constant);
13088   %}
13089   ins_pipe(ialu_mem_imm);
13090 %}
13091 
13092 // BMI1 instructions
13093 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13094   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13095   predicate(UseBMI1Instructions);
13096   effect(KILL cr);
13097   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13098 
13099   ins_cost(125);
13100   format %{ "andnl  $dst, $src1, $src2" %}
13101 
13102   ins_encode %{
13103     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13104   %}
13105   ins_pipe(ialu_reg_mem);
13106 %}
13107 
13108 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13109   match(Set dst (AndI (XorI src1 minus_1) src2));
13110   predicate(UseBMI1Instructions);
13111   effect(KILL cr);
13112   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13113 
13114   format %{ "andnl  $dst, $src1, $src2" %}
13115 
13116   ins_encode %{
13117     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13118   %}
13119   ins_pipe(ialu_reg);
13120 %}
13121 
13122 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13123   match(Set dst (AndI (SubI imm_zero src) src));
13124   predicate(UseBMI1Instructions);
13125   effect(KILL cr);
13126   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13127 
13128   format %{ "blsil  $dst, $src" %}
13129 
13130   ins_encode %{
13131     __ blsil($dst$$Register, $src$$Register);
13132   %}
13133   ins_pipe(ialu_reg);
13134 %}
13135 
13136 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13137   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13138   predicate(UseBMI1Instructions);
13139   effect(KILL cr);
13140   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13141 
13142   ins_cost(125);
13143   format %{ "blsil  $dst, $src" %}
13144 
13145   ins_encode %{
13146     __ blsil($dst$$Register, $src$$Address);
13147   %}
13148   ins_pipe(ialu_reg_mem);
13149 %}
13150 
13151 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13152 %{
13153   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13154   predicate(UseBMI1Instructions);
13155   effect(KILL cr);
13156   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13157 
13158   ins_cost(125);
13159   format %{ "blsmskl $dst, $src" %}
13160 
13161   ins_encode %{
13162     __ blsmskl($dst$$Register, $src$$Address);
13163   %}
13164   ins_pipe(ialu_reg_mem);
13165 %}
13166 
13167 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13168 %{
13169   match(Set dst (XorI (AddI src minus_1) src));
13170   predicate(UseBMI1Instructions);
13171   effect(KILL cr);
13172   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13173 
13174   format %{ "blsmskl $dst, $src" %}
13175 
13176   ins_encode %{
13177     __ blsmskl($dst$$Register, $src$$Register);
13178   %}
13179 
13180   ins_pipe(ialu_reg);
13181 %}
13182 
13183 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13184 %{
13185   match(Set dst (AndI (AddI src minus_1) src) );
13186   predicate(UseBMI1Instructions);
13187   effect(KILL cr);
13188   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13189 
13190   format %{ "blsrl  $dst, $src" %}
13191 
13192   ins_encode %{
13193     __ blsrl($dst$$Register, $src$$Register);
13194   %}
13195 
13196   ins_pipe(ialu_reg_mem);
13197 %}
13198 
13199 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13200 %{
13201   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13202   predicate(UseBMI1Instructions);
13203   effect(KILL cr);
13204   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13205 
13206   ins_cost(125);
13207   format %{ "blsrl  $dst, $src" %}
13208 
13209   ins_encode %{
13210     __ blsrl($dst$$Register, $src$$Address);
13211   %}
13212 
13213   ins_pipe(ialu_reg);
13214 %}
13215 
13216 // Or Instructions
13217 // Or Register with Register
13218 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13219 %{
13220   predicate(!UseAPX);
13221   match(Set dst (OrI dst src));
13222   effect(KILL cr);
13223   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13224 
13225   format %{ "orl     $dst, $src\t# int" %}
13226   ins_encode %{
13227     __ orl($dst$$Register, $src$$Register);
13228   %}
13229   ins_pipe(ialu_reg_reg);
13230 %}
13231 
13232 // Or Register with Register using New Data Destination (NDD)
13233 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13234 %{
13235   predicate(UseAPX);
13236   match(Set dst (OrI src1 src2));
13237   effect(KILL cr);
13238   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13239 
13240   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13241   ins_encode %{
13242     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13243   %}
13244   ins_pipe(ialu_reg_reg);
13245 %}
13246 
13247 // Or Register with Immediate
13248 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13249 %{
13250   predicate(!UseAPX);
13251   match(Set dst (OrI dst src));
13252   effect(KILL cr);
13253   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13254 
13255   format %{ "orl     $dst, $src\t# int" %}
13256   ins_encode %{
13257     __ orl($dst$$Register, $src$$constant);
13258   %}
13259   ins_pipe(ialu_reg);
13260 %}
13261 
13262 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13263 %{
13264   predicate(UseAPX);
13265   match(Set dst (OrI src1 src2));
13266   effect(KILL cr);
13267   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13268 
13269   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13270   ins_encode %{
13271     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13272   %}
13273   ins_pipe(ialu_reg);
13274 %}
13275 
13276 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13277 %{
13278   predicate(UseAPX);
13279   match(Set dst (OrI src1 src2));
13280   effect(KILL cr);
13281   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13282 
13283   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
13284   ins_encode %{
13285     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13286   %}
13287   ins_pipe(ialu_reg);
13288 %}
13289 
13290 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13291 %{
13292   predicate(UseAPX);
13293   match(Set dst (OrI (LoadI src1) src2));
13294   effect(KILL cr);
13295   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13296 
13297   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13298   ins_encode %{
13299     __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13300   %}
13301   ins_pipe(ialu_reg);
13302 %}
13303 
13304 // Or Register with Memory
13305 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13306 %{
13307   predicate(!UseAPX);
13308   match(Set dst (OrI dst (LoadI src)));
13309   effect(KILL cr);
13310   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13311 
13312   ins_cost(150);
13313   format %{ "orl     $dst, $src\t# int" %}
13314   ins_encode %{
13315     __ orl($dst$$Register, $src$$Address);
13316   %}
13317   ins_pipe(ialu_reg_mem);
13318 %}
13319 
13320 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13321 %{
13322   predicate(UseAPX);
13323   match(Set dst (OrI src1 (LoadI src2)));
13324   effect(KILL cr);
13325   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13326 
13327   ins_cost(150);
13328   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13329   ins_encode %{
13330     __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13331   %}
13332   ins_pipe(ialu_reg_mem);
13333 %}
13334 
13335 // Or Memory with Register
13336 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13337 %{
13338   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13339   effect(KILL cr);
13340   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13341 
13342   ins_cost(150);
13343   format %{ "orb    $dst, $src\t# byte" %}
13344   ins_encode %{
13345     __ orb($dst$$Address, $src$$Register);
13346   %}
13347   ins_pipe(ialu_mem_reg);
13348 %}
13349 
13350 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13351 %{
13352   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13353   effect(KILL cr);
13354   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13355 
13356   ins_cost(150);
13357   format %{ "orl     $dst, $src\t# int" %}
13358   ins_encode %{
13359     __ orl($dst$$Address, $src$$Register);
13360   %}
13361   ins_pipe(ialu_mem_reg);
13362 %}
13363 
13364 // Or Memory with Immediate
13365 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13366 %{
13367   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13368   effect(KILL cr);
13369   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13370 
13371   ins_cost(125);
13372   format %{ "orl     $dst, $src\t# int" %}
13373   ins_encode %{
13374     __ orl($dst$$Address, $src$$constant);
13375   %}
13376   ins_pipe(ialu_mem_imm);
13377 %}
13378 
13379 // Xor Instructions
13380 // Xor Register with Register
13381 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13382 %{
13383   predicate(!UseAPX);
13384   match(Set dst (XorI dst src));
13385   effect(KILL cr);
13386   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13387 
13388   format %{ "xorl    $dst, $src\t# int" %}
13389   ins_encode %{
13390     __ xorl($dst$$Register, $src$$Register);
13391   %}
13392   ins_pipe(ialu_reg_reg);
13393 %}
13394 
13395 // Xor Register with Register using New Data Destination (NDD)
13396 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13397 %{
13398   predicate(UseAPX);
13399   match(Set dst (XorI src1 src2));
13400   effect(KILL cr);
13401   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13402 
13403   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13404   ins_encode %{
13405     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13406   %}
13407   ins_pipe(ialu_reg_reg);
13408 %}
13409 
13410 // Xor Register with Immediate -1
13411 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13412 %{
13413   predicate(!UseAPX);
13414   match(Set dst (XorI dst imm));
13415 
13416   format %{ "notl    $dst" %}
13417   ins_encode %{
13418      __ notl($dst$$Register);
13419   %}
13420   ins_pipe(ialu_reg);
13421 %}
13422 
13423 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13424 %{
13425   match(Set dst (XorI src imm));
13426   predicate(UseAPX);
13427   flag(PD::Flag_ndd_demotable_opr1);
13428 
13429   format %{ "enotl    $dst, $src" %}
13430   ins_encode %{
13431      __ enotl($dst$$Register, $src$$Register);
13432   %}
13433   ins_pipe(ialu_reg);
13434 %}
13435 
13436 // Xor Register with Immediate
13437 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13438 %{
13439   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13440   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13441   match(Set dst (XorI dst src));
13442   effect(KILL cr);
13443   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13444 
13445   format %{ "xorl    $dst, $src\t# int" %}
13446   ins_encode %{
13447     __ xorl($dst$$Register, $src$$constant);
13448   %}
13449   ins_pipe(ialu_reg);
13450 %}
13451 
13452 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13453 %{
13454   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13455   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13456   match(Set dst (XorI src1 src2));
13457   effect(KILL cr);
13458   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13459 
13460   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13461   ins_encode %{
13462     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13463   %}
13464   ins_pipe(ialu_reg);
13465 %}
13466 
13467 // Xor Memory with Immediate
13468 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13469 %{
13470   predicate(UseAPX);
13471   match(Set dst (XorI (LoadI src1) src2));
13472   effect(KILL cr);
13473   ins_cost(150);
13474   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13475 
13476   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13477   ins_encode %{
13478     __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13479   %}
13480   ins_pipe(ialu_reg);
13481 %}
13482 
13483 // Xor Register with Memory
13484 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13485 %{
13486   predicate(!UseAPX);
13487   match(Set dst (XorI dst (LoadI src)));
13488   effect(KILL cr);
13489   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13490 
13491   ins_cost(150);
13492   format %{ "xorl    $dst, $src\t# int" %}
13493   ins_encode %{
13494     __ xorl($dst$$Register, $src$$Address);
13495   %}
13496   ins_pipe(ialu_reg_mem);
13497 %}
13498 
13499 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13500 %{
13501   predicate(UseAPX);
13502   match(Set dst (XorI src1 (LoadI src2)));
13503   effect(KILL cr);
13504   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13505 
13506   ins_cost(150);
13507   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13508   ins_encode %{
13509     __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13510   %}
13511   ins_pipe(ialu_reg_mem);
13512 %}
13513 
13514 // Xor Memory with Register
13515 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13516 %{
13517   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13518   effect(KILL cr);
13519   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13520 
13521   ins_cost(150);
13522   format %{ "xorb    $dst, $src\t# byte" %}
13523   ins_encode %{
13524     __ xorb($dst$$Address, $src$$Register);
13525   %}
13526   ins_pipe(ialu_mem_reg);
13527 %}
13528 
13529 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13530 %{
13531   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13532   effect(KILL cr);
13533   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13534 
13535   ins_cost(150);
13536   format %{ "xorl    $dst, $src\t# int" %}
13537   ins_encode %{
13538     __ xorl($dst$$Address, $src$$Register);
13539   %}
13540   ins_pipe(ialu_mem_reg);
13541 %}
13542 
13543 // Xor Memory with Immediate
13544 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13545 %{
13546   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13547   effect(KILL cr);
13548   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13549 
13550   ins_cost(125);
13551   format %{ "xorl    $dst, $src\t# int" %}
13552   ins_encode %{
13553     __ xorl($dst$$Address, $src$$constant);
13554   %}
13555   ins_pipe(ialu_mem_imm);
13556 %}
13557 
13558 
13559 // Long Logical Instructions
13560 
13561 // And Instructions
13562 // And Register with Register
13563 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13564 %{
13565   predicate(!UseAPX);
13566   match(Set dst (AndL dst src));
13567   effect(KILL cr);
13568   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13569 
13570   format %{ "andq    $dst, $src\t# long" %}
13571   ins_encode %{
13572     __ andq($dst$$Register, $src$$Register);
13573   %}
13574   ins_pipe(ialu_reg_reg);
13575 %}
13576 
13577 // And Register with Register using New Data Destination (NDD)
13578 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13579 %{
13580   predicate(UseAPX);
13581   match(Set dst (AndL src1 src2));
13582   effect(KILL cr);
13583   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13584 
13585   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13586   ins_encode %{
13587     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13588 
13589   %}
13590   ins_pipe(ialu_reg_reg);
13591 %}
13592 
13593 // And Register with Immediate 255
13594 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13595 %{
13596   match(Set dst (AndL src mask));
13597 
13598   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13599   ins_encode %{
13600     // movzbl zeroes out the upper 32-bit and does not need REX.W
13601     __ movzbl($dst$$Register, $src$$Register);
13602   %}
13603   ins_pipe(ialu_reg);
13604 %}
13605 
13606 // And Register with Immediate 65535
13607 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13608 %{
13609   match(Set dst (AndL src mask));
13610 
13611   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13612   ins_encode %{
13613     // movzwl zeroes out the upper 32-bit and does not need REX.W
13614     __ movzwl($dst$$Register, $src$$Register);
13615   %}
13616   ins_pipe(ialu_reg);
13617 %}
13618 
13619 // And Register with Immediate
13620 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13621 %{
13622   predicate(!UseAPX);
13623   match(Set dst (AndL dst src));
13624   effect(KILL cr);
13625   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13626 
13627   format %{ "andq    $dst, $src\t# long" %}
13628   ins_encode %{
13629     __ andq($dst$$Register, $src$$constant);
13630   %}
13631   ins_pipe(ialu_reg);
13632 %}
13633 
13634 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13635 %{
13636   predicate(UseAPX);
13637   match(Set dst (AndL src1 src2));
13638   effect(KILL cr);
13639   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13640 
13641   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13642   ins_encode %{
13643     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13644   %}
13645   ins_pipe(ialu_reg);
13646 %}
13647 
13648 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13649 %{
13650   predicate(UseAPX);
13651   match(Set dst (AndL (LoadL src1) src2));
13652   effect(KILL cr);
13653   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13654 
13655   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13656   ins_encode %{
13657     __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13658   %}
13659   ins_pipe(ialu_reg);
13660 %}
13661 
13662 // And Register with Memory
13663 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13664 %{
13665   predicate(!UseAPX);
13666   match(Set dst (AndL dst (LoadL src)));
13667   effect(KILL cr);
13668   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13669 
13670   ins_cost(150);
13671   format %{ "andq    $dst, $src\t# long" %}
13672   ins_encode %{
13673     __ andq($dst$$Register, $src$$Address);
13674   %}
13675   ins_pipe(ialu_reg_mem);
13676 %}
13677 
13678 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13679 %{
13680   predicate(UseAPX);
13681   match(Set dst (AndL src1 (LoadL src2)));
13682   effect(KILL cr);
13683   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13684 
13685   ins_cost(150);
13686   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13687   ins_encode %{
13688     __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13689   %}
13690   ins_pipe(ialu_reg_mem);
13691 %}
13692 
13693 // And Memory with Register
13694 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13695 %{
13696   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13697   effect(KILL cr);
13698   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13699 
13700   ins_cost(150);
13701   format %{ "andq    $dst, $src\t# long" %}
13702   ins_encode %{
13703     __ andq($dst$$Address, $src$$Register);
13704   %}
13705   ins_pipe(ialu_mem_reg);
13706 %}
13707 
13708 // And Memory with Immediate
13709 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13710 %{
13711   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13712   effect(KILL cr);
13713   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13714 
13715   ins_cost(125);
13716   format %{ "andq    $dst, $src\t# long" %}
13717   ins_encode %{
13718     __ andq($dst$$Address, $src$$constant);
13719   %}
13720   ins_pipe(ialu_mem_imm);
13721 %}
13722 
13723 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13724 %{
13725   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13726   // because AND/OR works well enough for 8/32-bit values.
13727   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13728 
13729   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13730   effect(KILL cr);
13731 
13732   ins_cost(125);
13733   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13734   ins_encode %{
13735     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13736   %}
13737   ins_pipe(ialu_mem_imm);
13738 %}
13739 
13740 // BMI1 instructions
13741 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13742   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13743   predicate(UseBMI1Instructions);
13744   effect(KILL cr);
13745   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13746 
13747   ins_cost(125);
13748   format %{ "andnq  $dst, $src1, $src2" %}
13749 
13750   ins_encode %{
13751     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13752   %}
13753   ins_pipe(ialu_reg_mem);
13754 %}
13755 
13756 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13757   match(Set dst (AndL (XorL src1 minus_1) src2));
13758   predicate(UseBMI1Instructions);
13759   effect(KILL cr);
13760   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13761 
13762   format %{ "andnq  $dst, $src1, $src2" %}
13763 
13764   ins_encode %{
13765   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13766   %}
13767   ins_pipe(ialu_reg_mem);
13768 %}
13769 
13770 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13771   match(Set dst (AndL (SubL imm_zero src) src));
13772   predicate(UseBMI1Instructions);
13773   effect(KILL cr);
13774   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13775 
13776   format %{ "blsiq  $dst, $src" %}
13777 
13778   ins_encode %{
13779     __ blsiq($dst$$Register, $src$$Register);
13780   %}
13781   ins_pipe(ialu_reg);
13782 %}
13783 
13784 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13785   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13786   predicate(UseBMI1Instructions);
13787   effect(KILL cr);
13788   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13789 
13790   ins_cost(125);
13791   format %{ "blsiq  $dst, $src" %}
13792 
13793   ins_encode %{
13794     __ blsiq($dst$$Register, $src$$Address);
13795   %}
13796   ins_pipe(ialu_reg_mem);
13797 %}
13798 
13799 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13800 %{
13801   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13802   predicate(UseBMI1Instructions);
13803   effect(KILL cr);
13804   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13805 
13806   ins_cost(125);
13807   format %{ "blsmskq $dst, $src" %}
13808 
13809   ins_encode %{
13810     __ blsmskq($dst$$Register, $src$$Address);
13811   %}
13812   ins_pipe(ialu_reg_mem);
13813 %}
13814 
13815 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13816 %{
13817   match(Set dst (XorL (AddL src minus_1) src));
13818   predicate(UseBMI1Instructions);
13819   effect(KILL cr);
13820   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13821 
13822   format %{ "blsmskq $dst, $src" %}
13823 
13824   ins_encode %{
13825     __ blsmskq($dst$$Register, $src$$Register);
13826   %}
13827 
13828   ins_pipe(ialu_reg);
13829 %}
13830 
13831 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13832 %{
13833   match(Set dst (AndL (AddL src minus_1) src) );
13834   predicate(UseBMI1Instructions);
13835   effect(KILL cr);
13836   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13837 
13838   format %{ "blsrq  $dst, $src" %}
13839 
13840   ins_encode %{
13841     __ blsrq($dst$$Register, $src$$Register);
13842   %}
13843 
13844   ins_pipe(ialu_reg);
13845 %}
13846 
13847 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13848 %{
13849   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13850   predicate(UseBMI1Instructions);
13851   effect(KILL cr);
13852   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13853 
13854   ins_cost(125);
13855   format %{ "blsrq  $dst, $src" %}
13856 
13857   ins_encode %{
13858     __ blsrq($dst$$Register, $src$$Address);
13859   %}
13860 
13861   ins_pipe(ialu_reg);
13862 %}
13863 
13864 // Or Instructions
13865 // Or Register with Register
13866 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13867 %{
13868   predicate(!UseAPX);
13869   match(Set dst (OrL dst src));
13870   effect(KILL cr);
13871   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13872 
13873   format %{ "orq     $dst, $src\t# long" %}
13874   ins_encode %{
13875     __ orq($dst$$Register, $src$$Register);
13876   %}
13877   ins_pipe(ialu_reg_reg);
13878 %}
13879 
13880 // Or Register with Register using New Data Destination (NDD)
13881 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13882 %{
13883   predicate(UseAPX);
13884   match(Set dst (OrL src1 src2));
13885   effect(KILL cr);
13886   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13887 
13888   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13889   ins_encode %{
13890     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13891 
13892   %}
13893   ins_pipe(ialu_reg_reg);
13894 %}
13895 
13896 // Use any_RegP to match R15 (TLS register) without spilling.
13897 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13898   match(Set dst (OrL dst (CastP2X src)));
13899   effect(KILL cr);
13900   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13901 
13902   format %{ "orq     $dst, $src\t# long" %}
13903   ins_encode %{
13904     __ orq($dst$$Register, $src$$Register);
13905   %}
13906   ins_pipe(ialu_reg_reg);
13907 %}
13908 
13909 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13910   match(Set dst (OrL src1 (CastP2X src2)));
13911   effect(KILL cr);
13912   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13913 
13914   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13915   ins_encode %{
13916     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13917   %}
13918   ins_pipe(ialu_reg_reg);
13919 %}
13920 
13921 // Or Register with Immediate
13922 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13923 %{
13924   predicate(!UseAPX);
13925   match(Set dst (OrL dst src));
13926   effect(KILL cr);
13927   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13928 
13929   format %{ "orq     $dst, $src\t# long" %}
13930   ins_encode %{
13931     __ orq($dst$$Register, $src$$constant);
13932   %}
13933   ins_pipe(ialu_reg);
13934 %}
13935 
13936 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13937 %{
13938   predicate(UseAPX);
13939   match(Set dst (OrL src1 src2));
13940   effect(KILL cr);
13941   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13942 
13943   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13944   ins_encode %{
13945     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13946   %}
13947   ins_pipe(ialu_reg);
13948 %}
13949 
13950 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13951 %{
13952   predicate(UseAPX);
13953   match(Set dst (OrL src1 src2));
13954   effect(KILL cr);
13955   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13956 
13957   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
13958   ins_encode %{
13959     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
13960   %}
13961   ins_pipe(ialu_reg);
13962 %}
13963 
13964 // Or Memory with Immediate
13965 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13966 %{
13967   predicate(UseAPX);
13968   match(Set dst (OrL (LoadL src1) src2));
13969   effect(KILL cr);
13970   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13971 
13972   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13973   ins_encode %{
13974     __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
13975   %}
13976   ins_pipe(ialu_reg);
13977 %}
13978 
13979 // Or Register with Memory
13980 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13981 %{
13982   predicate(!UseAPX);
13983   match(Set dst (OrL dst (LoadL src)));
13984   effect(KILL cr);
13985   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13986 
13987   ins_cost(150);
13988   format %{ "orq     $dst, $src\t# long" %}
13989   ins_encode %{
13990     __ orq($dst$$Register, $src$$Address);
13991   %}
13992   ins_pipe(ialu_reg_mem);
13993 %}
13994 
13995 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13996 %{
13997   predicate(UseAPX);
13998   match(Set dst (OrL src1 (LoadL src2)));
13999   effect(KILL cr);
14000   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14001 
14002   ins_cost(150);
14003   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14004   ins_encode %{
14005     __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14006   %}
14007   ins_pipe(ialu_reg_mem);
14008 %}
14009 
14010 // Or Memory with Register
14011 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14012 %{
14013   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14014   effect(KILL cr);
14015   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14016 
14017   ins_cost(150);
14018   format %{ "orq     $dst, $src\t# long" %}
14019   ins_encode %{
14020     __ orq($dst$$Address, $src$$Register);
14021   %}
14022   ins_pipe(ialu_mem_reg);
14023 %}
14024 
14025 // Or Memory with Immediate
14026 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14027 %{
14028   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14029   effect(KILL cr);
14030   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14031 
14032   ins_cost(125);
14033   format %{ "orq     $dst, $src\t# long" %}
14034   ins_encode %{
14035     __ orq($dst$$Address, $src$$constant);
14036   %}
14037   ins_pipe(ialu_mem_imm);
14038 %}
14039 
14040 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14041 %{
14042   // con should be a pure 64-bit power of 2 immediate
14043   // because AND/OR works well enough for 8/32-bit values.
14044   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14045 
14046   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14047   effect(KILL cr);
14048 
14049   ins_cost(125);
14050   format %{ "btsq    $dst, log2($con)\t# long" %}
14051   ins_encode %{
14052     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14053   %}
14054   ins_pipe(ialu_mem_imm);
14055 %}
14056 
14057 // Xor Instructions
14058 // Xor Register with Register
14059 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14060 %{
14061   predicate(!UseAPX);
14062   match(Set dst (XorL dst src));
14063   effect(KILL cr);
14064   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14065 
14066   format %{ "xorq    $dst, $src\t# long" %}
14067   ins_encode %{
14068     __ xorq($dst$$Register, $src$$Register);
14069   %}
14070   ins_pipe(ialu_reg_reg);
14071 %}
14072 
14073 // Xor Register with Register using New Data Destination (NDD)
14074 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14075 %{
14076   predicate(UseAPX);
14077   match(Set dst (XorL src1 src2));
14078   effect(KILL cr);
14079   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14080 
14081   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14082   ins_encode %{
14083     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14084   %}
14085   ins_pipe(ialu_reg_reg);
14086 %}
14087 
14088 // Xor Register with Immediate -1
14089 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14090 %{
14091   predicate(!UseAPX);
14092   match(Set dst (XorL dst imm));
14093 
14094   format %{ "notq   $dst" %}
14095   ins_encode %{
14096      __ notq($dst$$Register);
14097   %}
14098   ins_pipe(ialu_reg);
14099 %}
14100 
14101 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14102 %{
14103   predicate(UseAPX);
14104   match(Set dst (XorL src imm));
14105   flag(PD::Flag_ndd_demotable_opr1);
14106 
14107   format %{ "enotq   $dst, $src" %}
14108   ins_encode %{
14109     __ enotq($dst$$Register, $src$$Register);
14110   %}
14111   ins_pipe(ialu_reg);
14112 %}
14113 
14114 // Xor Register with Immediate
14115 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14116 %{
14117   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14118   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14119   match(Set dst (XorL dst src));
14120   effect(KILL cr);
14121   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14122 
14123   format %{ "xorq    $dst, $src\t# long" %}
14124   ins_encode %{
14125     __ xorq($dst$$Register, $src$$constant);
14126   %}
14127   ins_pipe(ialu_reg);
14128 %}
14129 
14130 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14131 %{
14132   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14133   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14134   match(Set dst (XorL src1 src2));
14135   effect(KILL cr);
14136   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14137 
14138   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14139   ins_encode %{
14140     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14141   %}
14142   ins_pipe(ialu_reg);
14143 %}
14144 
14145 // Xor Memory with Immediate
14146 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14147 %{
14148   predicate(UseAPX);
14149   match(Set dst (XorL (LoadL src1) src2));
14150   effect(KILL cr);
14151   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14152   ins_cost(150);
14153 
14154   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14155   ins_encode %{
14156     __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14157   %}
14158   ins_pipe(ialu_reg);
14159 %}
14160 
14161 // Xor Register with Memory
14162 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14163 %{
14164   predicate(!UseAPX);
14165   match(Set dst (XorL dst (LoadL src)));
14166   effect(KILL cr);
14167   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14168 
14169   ins_cost(150);
14170   format %{ "xorq    $dst, $src\t# long" %}
14171   ins_encode %{
14172     __ xorq($dst$$Register, $src$$Address);
14173   %}
14174   ins_pipe(ialu_reg_mem);
14175 %}
14176 
14177 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14178 %{
14179   predicate(UseAPX);
14180   match(Set dst (XorL src1 (LoadL src2)));
14181   effect(KILL cr);
14182   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14183 
14184   ins_cost(150);
14185   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14186   ins_encode %{
14187     __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14188   %}
14189   ins_pipe(ialu_reg_mem);
14190 %}
14191 
14192 // Xor Memory with Register
14193 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14194 %{
14195   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14196   effect(KILL cr);
14197   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14198 
14199   ins_cost(150);
14200   format %{ "xorq    $dst, $src\t# long" %}
14201   ins_encode %{
14202     __ xorq($dst$$Address, $src$$Register);
14203   %}
14204   ins_pipe(ialu_mem_reg);
14205 %}
14206 
14207 // Xor Memory with Immediate
14208 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14209 %{
14210   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14211   effect(KILL cr);
14212   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14213 
14214   ins_cost(125);
14215   format %{ "xorq    $dst, $src\t# long" %}
14216   ins_encode %{
14217     __ xorq($dst$$Address, $src$$constant);
14218   %}
14219   ins_pipe(ialu_mem_imm);
14220 %}
14221 
14222 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14223 %{
14224   match(Set dst (CmpLTMask p q));
14225   effect(KILL cr);
14226 
14227   ins_cost(400);
14228   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
14229             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
14230             "negl    $dst" %}
14231   ins_encode %{
14232     __ cmpl($p$$Register, $q$$Register);
14233     __ setcc(Assembler::less, $dst$$Register);
14234     __ negl($dst$$Register);
14235   %}
14236   ins_pipe(pipe_slow);
14237 %}
14238 
14239 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14240 %{
14241   match(Set dst (CmpLTMask dst zero));
14242   effect(KILL cr);
14243 
14244   ins_cost(100);
14245   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
14246   ins_encode %{
14247     __ sarl($dst$$Register, 31);
14248   %}
14249   ins_pipe(ialu_reg);
14250 %}
14251 
14252 /* Better to save a register than avoid a branch */
14253 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14254 %{
14255   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14256   effect(KILL cr);
14257   ins_cost(300);
14258   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
14259             "jge     done\n\t"
14260             "addl    $p,$y\n"
14261             "done:   " %}
14262   ins_encode %{
14263     Register Rp = $p$$Register;
14264     Register Rq = $q$$Register;
14265     Register Ry = $y$$Register;
14266     Label done;
14267     __ subl(Rp, Rq);
14268     __ jccb(Assembler::greaterEqual, done);
14269     __ addl(Rp, Ry);
14270     __ bind(done);
14271   %}
14272   ins_pipe(pipe_cmplt);
14273 %}
14274 
14275 /* Better to save a register than avoid a branch */
14276 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14277 %{
14278   match(Set y (AndI (CmpLTMask p q) y));
14279   effect(KILL cr);
14280 
14281   ins_cost(300);
14282 
14283   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
14284             "jlt     done\n\t"
14285             "xorl    $y, $y\n"
14286             "done:   " %}
14287   ins_encode %{
14288     Register Rp = $p$$Register;
14289     Register Rq = $q$$Register;
14290     Register Ry = $y$$Register;
14291     Label done;
14292     __ cmpl(Rp, Rq);
14293     __ jccb(Assembler::less, done);
14294     __ xorl(Ry, Ry);
14295     __ bind(done);
14296   %}
14297   ins_pipe(pipe_cmplt);
14298 %}
14299 
14300 
14301 //---------- FP Instructions------------------------------------------------
14302 
14303 // Really expensive, avoid
14304 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14305 %{
14306   match(Set cr (CmpF src1 src2));
14307 
14308   ins_cost(500);
14309   format %{ "ucomiss $src1, $src2\n\t"
14310             "jnp,s   exit\n\t"
14311             "pushfq\t# saw NaN, set CF\n\t"
14312             "andq    [rsp], #0xffffff2b\n\t"
14313             "popfq\n"
14314     "exit:" %}
14315   ins_encode %{
14316     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14317     emit_cmpfp_fixup(masm);
14318   %}
14319   ins_pipe(pipe_slow);
14320 %}
14321 
14322 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
14323   match(Set cr (CmpF src1 src2));
14324 
14325   ins_cost(100);
14326   format %{ "ucomiss $src1, $src2" %}
14327   ins_encode %{
14328     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14329   %}
14330   ins_pipe(pipe_slow);
14331 %}
14332 
14333 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14334   match(Set cr (CmpF src1 (LoadF src2)));
14335 
14336   ins_cost(100);
14337   format %{ "ucomiss $src1, $src2" %}
14338   ins_encode %{
14339     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14340   %}
14341   ins_pipe(pipe_slow);
14342 %}
14343 
14344 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14345   match(Set cr (CmpF src con));
14346   ins_cost(100);
14347   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14348   ins_encode %{
14349     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14350   %}
14351   ins_pipe(pipe_slow);
14352 %}
14353 
14354 // Really expensive, avoid
14355 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14356 %{
14357   match(Set cr (CmpD src1 src2));
14358 
14359   ins_cost(500);
14360   format %{ "ucomisd $src1, $src2\n\t"
14361             "jnp,s   exit\n\t"
14362             "pushfq\t# saw NaN, set CF\n\t"
14363             "andq    [rsp], #0xffffff2b\n\t"
14364             "popfq\n"
14365     "exit:" %}
14366   ins_encode %{
14367     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14368     emit_cmpfp_fixup(masm);
14369   %}
14370   ins_pipe(pipe_slow);
14371 %}
14372 
14373 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
14374   match(Set cr (CmpD src1 src2));
14375 
14376   ins_cost(100);
14377   format %{ "ucomisd $src1, $src2 test" %}
14378   ins_encode %{
14379     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14380   %}
14381   ins_pipe(pipe_slow);
14382 %}
14383 
14384 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14385   match(Set cr (CmpD src1 (LoadD src2)));
14386 
14387   ins_cost(100);
14388   format %{ "ucomisd $src1, $src2" %}
14389   ins_encode %{
14390     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14391   %}
14392   ins_pipe(pipe_slow);
14393 %}
14394 
14395 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14396   match(Set cr (CmpD src con));
14397   ins_cost(100);
14398   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14399   ins_encode %{
14400     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14401   %}
14402   ins_pipe(pipe_slow);
14403 %}
14404 
14405 // Compare into -1,0,1
14406 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14407 %{
14408   match(Set dst (CmpF3 src1 src2));
14409   effect(KILL cr);
14410 
14411   ins_cost(275);
14412   format %{ "ucomiss $src1, $src2\n\t"
14413             "movl    $dst, #-1\n\t"
14414             "jp,s    done\n\t"
14415             "jb,s    done\n\t"
14416             "setne   $dst\n\t"
14417             "movzbl  $dst, $dst\n"
14418     "done:" %}
14419   ins_encode %{
14420     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14421     emit_cmpfp3(masm, $dst$$Register);
14422   %}
14423   ins_pipe(pipe_slow);
14424 %}
14425 
14426 // Compare into -1,0,1
14427 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14428 %{
14429   match(Set dst (CmpF3 src1 (LoadF src2)));
14430   effect(KILL cr);
14431 
14432   ins_cost(275);
14433   format %{ "ucomiss $src1, $src2\n\t"
14434             "movl    $dst, #-1\n\t"
14435             "jp,s    done\n\t"
14436             "jb,s    done\n\t"
14437             "setne   $dst\n\t"
14438             "movzbl  $dst, $dst\n"
14439     "done:" %}
14440   ins_encode %{
14441     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14442     emit_cmpfp3(masm, $dst$$Register);
14443   %}
14444   ins_pipe(pipe_slow);
14445 %}
14446 
14447 // Compare into -1,0,1
14448 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14449   match(Set dst (CmpF3 src con));
14450   effect(KILL cr);
14451 
14452   ins_cost(275);
14453   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14454             "movl    $dst, #-1\n\t"
14455             "jp,s    done\n\t"
14456             "jb,s    done\n\t"
14457             "setne   $dst\n\t"
14458             "movzbl  $dst, $dst\n"
14459     "done:" %}
14460   ins_encode %{
14461     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14462     emit_cmpfp3(masm, $dst$$Register);
14463   %}
14464   ins_pipe(pipe_slow);
14465 %}
14466 
14467 // Compare into -1,0,1
14468 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14469 %{
14470   match(Set dst (CmpD3 src1 src2));
14471   effect(KILL cr);
14472 
14473   ins_cost(275);
14474   format %{ "ucomisd $src1, $src2\n\t"
14475             "movl    $dst, #-1\n\t"
14476             "jp,s    done\n\t"
14477             "jb,s    done\n\t"
14478             "setne   $dst\n\t"
14479             "movzbl  $dst, $dst\n"
14480     "done:" %}
14481   ins_encode %{
14482     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14483     emit_cmpfp3(masm, $dst$$Register);
14484   %}
14485   ins_pipe(pipe_slow);
14486 %}
14487 
14488 // Compare into -1,0,1
14489 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14490 %{
14491   match(Set dst (CmpD3 src1 (LoadD src2)));
14492   effect(KILL cr);
14493 
14494   ins_cost(275);
14495   format %{ "ucomisd $src1, $src2\n\t"
14496             "movl    $dst, #-1\n\t"
14497             "jp,s    done\n\t"
14498             "jb,s    done\n\t"
14499             "setne   $dst\n\t"
14500             "movzbl  $dst, $dst\n"
14501     "done:" %}
14502   ins_encode %{
14503     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14504     emit_cmpfp3(masm, $dst$$Register);
14505   %}
14506   ins_pipe(pipe_slow);
14507 %}
14508 
14509 // Compare into -1,0,1
14510 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14511   match(Set dst (CmpD3 src con));
14512   effect(KILL cr);
14513 
14514   ins_cost(275);
14515   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14516             "movl    $dst, #-1\n\t"
14517             "jp,s    done\n\t"
14518             "jb,s    done\n\t"
14519             "setne   $dst\n\t"
14520             "movzbl  $dst, $dst\n"
14521     "done:" %}
14522   ins_encode %{
14523     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14524     emit_cmpfp3(masm, $dst$$Register);
14525   %}
14526   ins_pipe(pipe_slow);
14527 %}
14528 
14529 //----------Arithmetic Conversion Instructions---------------------------------
14530 
14531 instruct convF2D_reg_reg(regD dst, regF src)
14532 %{
14533   match(Set dst (ConvF2D src));
14534 
14535   format %{ "cvtss2sd $dst, $src" %}
14536   ins_encode %{
14537     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14538   %}
14539   ins_pipe(pipe_slow); // XXX
14540 %}
14541 
14542 instruct convF2D_reg_mem(regD dst, memory src)
14543 %{
14544   predicate(UseAVX == 0);
14545   match(Set dst (ConvF2D (LoadF src)));
14546 
14547   format %{ "cvtss2sd $dst, $src" %}
14548   ins_encode %{
14549     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14550   %}
14551   ins_pipe(pipe_slow); // XXX
14552 %}
14553 
14554 instruct convD2F_reg_reg(regF dst, regD src)
14555 %{
14556   match(Set dst (ConvD2F src));
14557 
14558   format %{ "cvtsd2ss $dst, $src" %}
14559   ins_encode %{
14560     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14561   %}
14562   ins_pipe(pipe_slow); // XXX
14563 %}
14564 
14565 instruct convD2F_reg_mem(regF dst, memory src)
14566 %{
14567   predicate(UseAVX == 0);
14568   match(Set dst (ConvD2F (LoadD src)));
14569 
14570   format %{ "cvtsd2ss $dst, $src" %}
14571   ins_encode %{
14572     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14573   %}
14574   ins_pipe(pipe_slow); // XXX
14575 %}
14576 
14577 // XXX do mem variants
14578 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14579 %{
14580   predicate(!VM_Version::supports_avx10_2());
14581   match(Set dst (ConvF2I src));
14582   effect(KILL cr);
14583   format %{ "convert_f2i $dst, $src" %}
14584   ins_encode %{
14585     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14586   %}
14587   ins_pipe(pipe_slow);
14588 %}
14589 
14590 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14591 %{
14592   predicate(VM_Version::supports_avx10_2());
14593   match(Set dst (ConvF2I src));
14594   format %{ "evcvttss2sisl $dst, $src" %}
14595   ins_encode %{
14596     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14597   %}
14598   ins_pipe(pipe_slow);
14599 %}
14600 
14601 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14602 %{
14603   predicate(VM_Version::supports_avx10_2());
14604   match(Set dst (ConvF2I (LoadF src)));
14605   format %{ "evcvttss2sisl $dst, $src" %}
14606   ins_encode %{
14607     __ evcvttss2sisl($dst$$Register, $src$$Address);
14608   %}
14609   ins_pipe(pipe_slow);
14610 %}
14611 
14612 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14613 %{
14614   predicate(!VM_Version::supports_avx10_2());
14615   match(Set dst (ConvF2L src));
14616   effect(KILL cr);
14617   format %{ "convert_f2l $dst, $src"%}
14618   ins_encode %{
14619     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14620   %}
14621   ins_pipe(pipe_slow);
14622 %}
14623 
14624 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14625 %{
14626   predicate(VM_Version::supports_avx10_2());
14627   match(Set dst (ConvF2L src));
14628   format %{ "evcvttss2sisq $dst, $src" %}
14629   ins_encode %{
14630     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14631   %}
14632   ins_pipe(pipe_slow);
14633 %}
14634 
14635 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14636 %{
14637   predicate(VM_Version::supports_avx10_2());
14638   match(Set dst (ConvF2L (LoadF src)));
14639   format %{ "evcvttss2sisq $dst, $src" %}
14640   ins_encode %{
14641     __ evcvttss2sisq($dst$$Register, $src$$Address);
14642   %}
14643   ins_pipe(pipe_slow);
14644 %}
14645 
14646 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14647 %{
14648   predicate(!VM_Version::supports_avx10_2());
14649   match(Set dst (ConvD2I src));
14650   effect(KILL cr);
14651   format %{ "convert_d2i $dst, $src"%}
14652   ins_encode %{
14653     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14654   %}
14655   ins_pipe(pipe_slow);
14656 %}
14657 
14658 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14659 %{
14660   predicate(VM_Version::supports_avx10_2());
14661   match(Set dst (ConvD2I src));
14662   format %{ "evcvttsd2sisl $dst, $src" %}
14663   ins_encode %{
14664     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14665   %}
14666   ins_pipe(pipe_slow);
14667 %}
14668 
14669 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14670 %{
14671   predicate(VM_Version::supports_avx10_2());
14672   match(Set dst (ConvD2I (LoadD src)));
14673   format %{ "evcvttsd2sisl $dst, $src" %}
14674   ins_encode %{
14675     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14676   %}
14677   ins_pipe(pipe_slow);
14678 %}
14679 
14680 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14681 %{
14682   predicate(!VM_Version::supports_avx10_2());
14683   match(Set dst (ConvD2L src));
14684   effect(KILL cr);
14685   format %{ "convert_d2l $dst, $src"%}
14686   ins_encode %{
14687     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14688   %}
14689   ins_pipe(pipe_slow);
14690 %}
14691 
14692 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14693 %{
14694   predicate(VM_Version::supports_avx10_2());
14695   match(Set dst (ConvD2L src));
14696   format %{ "evcvttsd2sisq $dst, $src" %}
14697   ins_encode %{
14698     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14699   %}
14700   ins_pipe(pipe_slow);
14701 %}
14702 
14703 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14704 %{
14705   predicate(VM_Version::supports_avx10_2());
14706   match(Set dst (ConvD2L (LoadD src)));
14707   format %{ "evcvttsd2sisq $dst, $src" %}
14708   ins_encode %{
14709     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14710   %}
14711   ins_pipe(pipe_slow);
14712 %}
14713 
14714 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14715 %{
14716   match(Set dst (RoundD src));
14717   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14718   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14719   ins_encode %{
14720     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14721   %}
14722   ins_pipe(pipe_slow);
14723 %}
14724 
14725 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14726 %{
14727   match(Set dst (RoundF src));
14728   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14729   format %{ "round_float $dst,$src" %}
14730   ins_encode %{
14731     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14732   %}
14733   ins_pipe(pipe_slow);
14734 %}
14735 
14736 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14737 %{
14738   predicate(!UseXmmI2F);
14739   match(Set dst (ConvI2F src));
14740 
14741   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14742   ins_encode %{
14743     if (UseAVX > 0) {
14744       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14745     }
14746     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14747   %}
14748   ins_pipe(pipe_slow); // XXX
14749 %}
14750 
14751 instruct convI2F_reg_mem(regF dst, memory src)
14752 %{
14753   predicate(UseAVX == 0);
14754   match(Set dst (ConvI2F (LoadI src)));
14755 
14756   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14757   ins_encode %{
14758     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14759   %}
14760   ins_pipe(pipe_slow); // XXX
14761 %}
14762 
14763 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14764 %{
14765   predicate(!UseXmmI2D);
14766   match(Set dst (ConvI2D src));
14767 
14768   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14769   ins_encode %{
14770     if (UseAVX > 0) {
14771       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14772     }
14773     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14774   %}
14775   ins_pipe(pipe_slow); // XXX
14776 %}
14777 
14778 instruct convI2D_reg_mem(regD dst, memory src)
14779 %{
14780   predicate(UseAVX == 0);
14781   match(Set dst (ConvI2D (LoadI src)));
14782 
14783   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14784   ins_encode %{
14785     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14786   %}
14787   ins_pipe(pipe_slow); // XXX
14788 %}
14789 
14790 instruct convXI2F_reg(regF dst, rRegI src)
14791 %{
14792   predicate(UseXmmI2F);
14793   match(Set dst (ConvI2F src));
14794 
14795   format %{ "movdl $dst, $src\n\t"
14796             "cvtdq2psl $dst, $dst\t# i2f" %}
14797   ins_encode %{
14798     __ movdl($dst$$XMMRegister, $src$$Register);
14799     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14800   %}
14801   ins_pipe(pipe_slow); // XXX
14802 %}
14803 
14804 instruct convXI2D_reg(regD dst, rRegI src)
14805 %{
14806   predicate(UseXmmI2D);
14807   match(Set dst (ConvI2D src));
14808 
14809   format %{ "movdl $dst, $src\n\t"
14810             "cvtdq2pdl $dst, $dst\t# i2d" %}
14811   ins_encode %{
14812     __ movdl($dst$$XMMRegister, $src$$Register);
14813     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14814   %}
14815   ins_pipe(pipe_slow); // XXX
14816 %}
14817 
14818 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14819 %{
14820   match(Set dst (ConvL2F src));
14821 
14822   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14823   ins_encode %{
14824     if (UseAVX > 0) {
14825       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14826     }
14827     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14828   %}
14829   ins_pipe(pipe_slow); // XXX
14830 %}
14831 
14832 instruct convL2F_reg_mem(regF dst, memory src)
14833 %{
14834   predicate(UseAVX == 0);
14835   match(Set dst (ConvL2F (LoadL src)));
14836 
14837   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14838   ins_encode %{
14839     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14840   %}
14841   ins_pipe(pipe_slow); // XXX
14842 %}
14843 
14844 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14845 %{
14846   match(Set dst (ConvL2D src));
14847 
14848   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14849   ins_encode %{
14850     if (UseAVX > 0) {
14851       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14852     }
14853     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14854   %}
14855   ins_pipe(pipe_slow); // XXX
14856 %}
14857 
14858 instruct convL2D_reg_mem(regD dst, memory src)
14859 %{
14860   predicate(UseAVX == 0);
14861   match(Set dst (ConvL2D (LoadL src)));
14862 
14863   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14864   ins_encode %{
14865     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14866   %}
14867   ins_pipe(pipe_slow); // XXX
14868 %}
14869 
14870 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14871 %{
14872   match(Set dst (ConvI2L src));
14873 
14874   ins_cost(125);
14875   format %{ "movslq  $dst, $src\t# i2l" %}
14876   ins_encode %{
14877     __ movslq($dst$$Register, $src$$Register);
14878   %}
14879   ins_pipe(ialu_reg_reg);
14880 %}
14881 
14882 // Zero-extend convert int to long
14883 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14884 %{
14885   match(Set dst (AndL (ConvI2L src) mask));
14886 
14887   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14888   ins_encode %{
14889     if ($dst$$reg != $src$$reg) {
14890       __ movl($dst$$Register, $src$$Register);
14891     }
14892   %}
14893   ins_pipe(ialu_reg_reg);
14894 %}
14895 
14896 // Zero-extend convert int to long
14897 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14898 %{
14899   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14900 
14901   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14902   ins_encode %{
14903     __ movl($dst$$Register, $src$$Address);
14904   %}
14905   ins_pipe(ialu_reg_mem);
14906 %}
14907 
14908 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14909 %{
14910   match(Set dst (AndL src mask));
14911 
14912   format %{ "movl    $dst, $src\t# zero-extend long" %}
14913   ins_encode %{
14914     __ movl($dst$$Register, $src$$Register);
14915   %}
14916   ins_pipe(ialu_reg_reg);
14917 %}
14918 
14919 instruct convL2I_reg_reg(rRegI dst, rRegL src)
14920 %{
14921   match(Set dst (ConvL2I src));
14922 
14923   format %{ "movl    $dst, $src\t# l2i" %}
14924   ins_encode %{
14925     __ movl($dst$$Register, $src$$Register);
14926   %}
14927   ins_pipe(ialu_reg_reg);
14928 %}
14929 
14930 
14931 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
14932   match(Set dst (MoveF2I src));
14933   effect(DEF dst, USE src);
14934 
14935   ins_cost(125);
14936   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
14937   ins_encode %{
14938     __ movl($dst$$Register, Address(rsp, $src$$disp));
14939   %}
14940   ins_pipe(ialu_reg_mem);
14941 %}
14942 
14943 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
14944   match(Set dst (MoveI2F src));
14945   effect(DEF dst, USE src);
14946 
14947   ins_cost(125);
14948   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
14949   ins_encode %{
14950     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
14951   %}
14952   ins_pipe(pipe_slow);
14953 %}
14954 
14955 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
14956   match(Set dst (MoveD2L src));
14957   effect(DEF dst, USE src);
14958 
14959   ins_cost(125);
14960   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
14961   ins_encode %{
14962     __ movq($dst$$Register, Address(rsp, $src$$disp));
14963   %}
14964   ins_pipe(ialu_reg_mem);
14965 %}
14966 
14967 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
14968   predicate(!UseXmmLoadAndClearUpper);
14969   match(Set dst (MoveL2D src));
14970   effect(DEF dst, USE src);
14971 
14972   ins_cost(125);
14973   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
14974   ins_encode %{
14975     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14976   %}
14977   ins_pipe(pipe_slow);
14978 %}
14979 
14980 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
14981   predicate(UseXmmLoadAndClearUpper);
14982   match(Set dst (MoveL2D src));
14983   effect(DEF dst, USE src);
14984 
14985   ins_cost(125);
14986   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
14987   ins_encode %{
14988     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14989   %}
14990   ins_pipe(pipe_slow);
14991 %}
14992 
14993 
14994 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
14995   match(Set dst (MoveF2I src));
14996   effect(DEF dst, USE src);
14997 
14998   ins_cost(95); // XXX
14999   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
15000   ins_encode %{
15001     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15002   %}
15003   ins_pipe(pipe_slow);
15004 %}
15005 
15006 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15007   match(Set dst (MoveI2F src));
15008   effect(DEF dst, USE src);
15009 
15010   ins_cost(100);
15011   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
15012   ins_encode %{
15013     __ movl(Address(rsp, $dst$$disp), $src$$Register);
15014   %}
15015   ins_pipe( ialu_mem_reg );
15016 %}
15017 
15018 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15019   match(Set dst (MoveD2L src));
15020   effect(DEF dst, USE src);
15021 
15022   ins_cost(95); // XXX
15023   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
15024   ins_encode %{
15025     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15026   %}
15027   ins_pipe(pipe_slow);
15028 %}
15029 
15030 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15031   match(Set dst (MoveL2D src));
15032   effect(DEF dst, USE src);
15033 
15034   ins_cost(100);
15035   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
15036   ins_encode %{
15037     __ movq(Address(rsp, $dst$$disp), $src$$Register);
15038   %}
15039   ins_pipe(ialu_mem_reg);
15040 %}
15041 
15042 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15043   match(Set dst (MoveF2I src));
15044   effect(DEF dst, USE src);
15045   ins_cost(85);
15046   format %{ "movd    $dst,$src\t# MoveF2I" %}
15047   ins_encode %{
15048     __ movdl($dst$$Register, $src$$XMMRegister);
15049   %}
15050   ins_pipe( pipe_slow );
15051 %}
15052 
15053 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15054   match(Set dst (MoveD2L src));
15055   effect(DEF dst, USE src);
15056   ins_cost(85);
15057   format %{ "movd    $dst,$src\t# MoveD2L" %}
15058   ins_encode %{
15059     __ movdq($dst$$Register, $src$$XMMRegister);
15060   %}
15061   ins_pipe( pipe_slow );
15062 %}
15063 
15064 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15065   match(Set dst (MoveI2F src));
15066   effect(DEF dst, USE src);
15067   ins_cost(100);
15068   format %{ "movd    $dst,$src\t# MoveI2F" %}
15069   ins_encode %{
15070     __ movdl($dst$$XMMRegister, $src$$Register);
15071   %}
15072   ins_pipe( pipe_slow );
15073 %}
15074 
15075 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15076   match(Set dst (MoveL2D src));
15077   effect(DEF dst, USE src);
15078   ins_cost(100);
15079   format %{ "movd    $dst,$src\t# MoveL2D" %}
15080   ins_encode %{
15081      __ movdq($dst$$XMMRegister, $src$$Register);
15082   %}
15083   ins_pipe( pipe_slow );
15084 %}
15085 
15086 // Fast clearing of an array
15087 // Small non-constant lenght ClearArray for non-AVX512 targets.
15088 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15089                   Universe dummy, rFlagsReg cr)
15090 %{
15091   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15092   match(Set dummy (ClearArray cnt base));
15093   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15094 
15095   format %{ $$template
15096     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15097     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15098     $$emit$$"jg      LARGE\n\t"
15099     $$emit$$"dec     rcx\n\t"
15100     $$emit$$"js      DONE\t# Zero length\n\t"
15101     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15102     $$emit$$"dec     rcx\n\t"
15103     $$emit$$"jge     LOOP\n\t"
15104     $$emit$$"jmp     DONE\n\t"
15105     $$emit$$"# LARGE:\n\t"
15106     if (UseFastStosb) {
15107        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15108        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15109     } else if (UseXMMForObjInit) {
15110        $$emit$$"mov     rdi,rax\n\t"
15111        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15112        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15113        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15114        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15115        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15116        $$emit$$"add     0x40,rax\n\t"
15117        $$emit$$"# L_zero_64_bytes:\n\t"
15118        $$emit$$"sub     0x8,rcx\n\t"
15119        $$emit$$"jge     L_loop\n\t"
15120        $$emit$$"add     0x4,rcx\n\t"
15121        $$emit$$"jl      L_tail\n\t"
15122        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15123        $$emit$$"add     0x20,rax\n\t"
15124        $$emit$$"sub     0x4,rcx\n\t"
15125        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15126        $$emit$$"add     0x4,rcx\n\t"
15127        $$emit$$"jle     L_end\n\t"
15128        $$emit$$"dec     rcx\n\t"
15129        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15130        $$emit$$"vmovq   xmm0,(rax)\n\t"
15131        $$emit$$"add     0x8,rax\n\t"
15132        $$emit$$"dec     rcx\n\t"
15133        $$emit$$"jge     L_sloop\n\t"
15134        $$emit$$"# L_end:\n\t"
15135     } else {
15136        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15137     }
15138     $$emit$$"# DONE"
15139   %}
15140   ins_encode %{
15141     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15142                  $tmp$$XMMRegister, false, knoreg);
15143   %}
15144   ins_pipe(pipe_slow);
15145 %}
15146 
15147 // Small non-constant length ClearArray for AVX512 targets.
15148 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15149                        Universe dummy, rFlagsReg cr)
15150 %{
15151   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15152   match(Set dummy (ClearArray cnt base));
15153   ins_cost(125);
15154   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15155 
15156   format %{ $$template
15157     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15158     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15159     $$emit$$"jg      LARGE\n\t"
15160     $$emit$$"dec     rcx\n\t"
15161     $$emit$$"js      DONE\t# Zero length\n\t"
15162     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15163     $$emit$$"dec     rcx\n\t"
15164     $$emit$$"jge     LOOP\n\t"
15165     $$emit$$"jmp     DONE\n\t"
15166     $$emit$$"# LARGE:\n\t"
15167     if (UseFastStosb) {
15168        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15169        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15170     } else if (UseXMMForObjInit) {
15171        $$emit$$"mov     rdi,rax\n\t"
15172        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15173        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15174        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15175        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15176        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15177        $$emit$$"add     0x40,rax\n\t"
15178        $$emit$$"# L_zero_64_bytes:\n\t"
15179        $$emit$$"sub     0x8,rcx\n\t"
15180        $$emit$$"jge     L_loop\n\t"
15181        $$emit$$"add     0x4,rcx\n\t"
15182        $$emit$$"jl      L_tail\n\t"
15183        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15184        $$emit$$"add     0x20,rax\n\t"
15185        $$emit$$"sub     0x4,rcx\n\t"
15186        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15187        $$emit$$"add     0x4,rcx\n\t"
15188        $$emit$$"jle     L_end\n\t"
15189        $$emit$$"dec     rcx\n\t"
15190        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15191        $$emit$$"vmovq   xmm0,(rax)\n\t"
15192        $$emit$$"add     0x8,rax\n\t"
15193        $$emit$$"dec     rcx\n\t"
15194        $$emit$$"jge     L_sloop\n\t"
15195        $$emit$$"# L_end:\n\t"
15196     } else {
15197        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15198     }
15199     $$emit$$"# DONE"
15200   %}
15201   ins_encode %{
15202     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15203                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15204   %}
15205   ins_pipe(pipe_slow);
15206 %}
15207 
15208 // Large non-constant length ClearArray for non-AVX512 targets.
15209 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15210                         Universe dummy, rFlagsReg cr)
15211 %{
15212   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15213   match(Set dummy (ClearArray cnt base));
15214   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15215 
15216   format %{ $$template
15217     if (UseFastStosb) {
15218        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15219        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15220        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15221     } else if (UseXMMForObjInit) {
15222        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15223        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15224        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15225        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15226        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15227        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15228        $$emit$$"add     0x40,rax\n\t"
15229        $$emit$$"# L_zero_64_bytes:\n\t"
15230        $$emit$$"sub     0x8,rcx\n\t"
15231        $$emit$$"jge     L_loop\n\t"
15232        $$emit$$"add     0x4,rcx\n\t"
15233        $$emit$$"jl      L_tail\n\t"
15234        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15235        $$emit$$"add     0x20,rax\n\t"
15236        $$emit$$"sub     0x4,rcx\n\t"
15237        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15238        $$emit$$"add     0x4,rcx\n\t"
15239        $$emit$$"jle     L_end\n\t"
15240        $$emit$$"dec     rcx\n\t"
15241        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15242        $$emit$$"vmovq   xmm0,(rax)\n\t"
15243        $$emit$$"add     0x8,rax\n\t"
15244        $$emit$$"dec     rcx\n\t"
15245        $$emit$$"jge     L_sloop\n\t"
15246        $$emit$$"# L_end:\n\t"
15247     } else {
15248        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15249        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15250     }
15251   %}
15252   ins_encode %{
15253     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15254                  $tmp$$XMMRegister, true, knoreg);
15255   %}
15256   ins_pipe(pipe_slow);
15257 %}
15258 
15259 // Large non-constant length ClearArray for AVX512 targets.
15260 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15261                              Universe dummy, rFlagsReg cr)
15262 %{
15263   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15264   match(Set dummy (ClearArray cnt base));
15265   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15266 
15267   format %{ $$template
15268     if (UseFastStosb) {
15269        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15270        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15271        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15272     } else if (UseXMMForObjInit) {
15273        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15274        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15275        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15276        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15277        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15278        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15279        $$emit$$"add     0x40,rax\n\t"
15280        $$emit$$"# L_zero_64_bytes:\n\t"
15281        $$emit$$"sub     0x8,rcx\n\t"
15282        $$emit$$"jge     L_loop\n\t"
15283        $$emit$$"add     0x4,rcx\n\t"
15284        $$emit$$"jl      L_tail\n\t"
15285        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15286        $$emit$$"add     0x20,rax\n\t"
15287        $$emit$$"sub     0x4,rcx\n\t"
15288        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15289        $$emit$$"add     0x4,rcx\n\t"
15290        $$emit$$"jle     L_end\n\t"
15291        $$emit$$"dec     rcx\n\t"
15292        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15293        $$emit$$"vmovq   xmm0,(rax)\n\t"
15294        $$emit$$"add     0x8,rax\n\t"
15295        $$emit$$"dec     rcx\n\t"
15296        $$emit$$"jge     L_sloop\n\t"
15297        $$emit$$"# L_end:\n\t"
15298     } else {
15299        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15300        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15301     }
15302   %}
15303   ins_encode %{
15304     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15305                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15306   %}
15307   ins_pipe(pipe_slow);
15308 %}
15309 
15310 // Small constant length ClearArray for AVX512 targets.
15311 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15312 %{
15313   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15314   match(Set dummy (ClearArray cnt base));
15315   ins_cost(100);
15316   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15317   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15318   ins_encode %{
15319    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15320   %}
15321   ins_pipe(pipe_slow);
15322 %}
15323 
15324 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15325                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15326 %{
15327   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15328   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15329   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15330 
15331   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15332   ins_encode %{
15333     __ string_compare($str1$$Register, $str2$$Register,
15334                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15335                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15336   %}
15337   ins_pipe( pipe_slow );
15338 %}
15339 
15340 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15341                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15342 %{
15343   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15344   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15345   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15346 
15347   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15348   ins_encode %{
15349     __ string_compare($str1$$Register, $str2$$Register,
15350                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15351                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15352   %}
15353   ins_pipe( pipe_slow );
15354 %}
15355 
15356 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15357                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15358 %{
15359   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15360   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15361   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15362 
15363   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15364   ins_encode %{
15365     __ string_compare($str1$$Register, $str2$$Register,
15366                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15367                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15368   %}
15369   ins_pipe( pipe_slow );
15370 %}
15371 
15372 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15373                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15374 %{
15375   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15376   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15377   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15378 
15379   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15380   ins_encode %{
15381     __ string_compare($str1$$Register, $str2$$Register,
15382                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15383                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15384   %}
15385   ins_pipe( pipe_slow );
15386 %}
15387 
15388 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15389                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15390 %{
15391   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15392   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15393   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15394 
15395   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15396   ins_encode %{
15397     __ string_compare($str1$$Register, $str2$$Register,
15398                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15399                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15400   %}
15401   ins_pipe( pipe_slow );
15402 %}
15403 
15404 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15405                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15406 %{
15407   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15408   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15409   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15410 
15411   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15412   ins_encode %{
15413     __ string_compare($str1$$Register, $str2$$Register,
15414                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15415                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15416   %}
15417   ins_pipe( pipe_slow );
15418 %}
15419 
15420 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15421                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15422 %{
15423   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15424   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15425   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15426 
15427   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15428   ins_encode %{
15429     __ string_compare($str2$$Register, $str1$$Register,
15430                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15431                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15432   %}
15433   ins_pipe( pipe_slow );
15434 %}
15435 
15436 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15437                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15438 %{
15439   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15440   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15441   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15442 
15443   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15444   ins_encode %{
15445     __ string_compare($str2$$Register, $str1$$Register,
15446                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15447                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15448   %}
15449   ins_pipe( pipe_slow );
15450 %}
15451 
15452 // fast search of substring with known size.
15453 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15454                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15455 %{
15456   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15457   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15458   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15459 
15460   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15461   ins_encode %{
15462     int icnt2 = (int)$int_cnt2$$constant;
15463     if (icnt2 >= 16) {
15464       // IndexOf for constant substrings with size >= 16 elements
15465       // which don't need to be loaded through stack.
15466       __ string_indexofC8($str1$$Register, $str2$$Register,
15467                           $cnt1$$Register, $cnt2$$Register,
15468                           icnt2, $result$$Register,
15469                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15470     } else {
15471       // Small strings are loaded through stack if they cross page boundary.
15472       __ string_indexof($str1$$Register, $str2$$Register,
15473                         $cnt1$$Register, $cnt2$$Register,
15474                         icnt2, $result$$Register,
15475                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15476     }
15477   %}
15478   ins_pipe( pipe_slow );
15479 %}
15480 
15481 // fast search of substring with known size.
15482 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15483                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15484 %{
15485   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15486   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15487   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15488 
15489   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15490   ins_encode %{
15491     int icnt2 = (int)$int_cnt2$$constant;
15492     if (icnt2 >= 8) {
15493       // IndexOf for constant substrings with size >= 8 elements
15494       // which don't need to be loaded through stack.
15495       __ string_indexofC8($str1$$Register, $str2$$Register,
15496                           $cnt1$$Register, $cnt2$$Register,
15497                           icnt2, $result$$Register,
15498                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15499     } else {
15500       // Small strings are loaded through stack if they cross page boundary.
15501       __ string_indexof($str1$$Register, $str2$$Register,
15502                         $cnt1$$Register, $cnt2$$Register,
15503                         icnt2, $result$$Register,
15504                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15505     }
15506   %}
15507   ins_pipe( pipe_slow );
15508 %}
15509 
15510 // fast search of substring with known size.
15511 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15512                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15513 %{
15514   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15515   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15516   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15517 
15518   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15519   ins_encode %{
15520     int icnt2 = (int)$int_cnt2$$constant;
15521     if (icnt2 >= 8) {
15522       // IndexOf for constant substrings with size >= 8 elements
15523       // which don't need to be loaded through stack.
15524       __ string_indexofC8($str1$$Register, $str2$$Register,
15525                           $cnt1$$Register, $cnt2$$Register,
15526                           icnt2, $result$$Register,
15527                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15528     } else {
15529       // Small strings are loaded through stack if they cross page boundary.
15530       __ string_indexof($str1$$Register, $str2$$Register,
15531                         $cnt1$$Register, $cnt2$$Register,
15532                         icnt2, $result$$Register,
15533                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15534     }
15535   %}
15536   ins_pipe( pipe_slow );
15537 %}
15538 
15539 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15540                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15541 %{
15542   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15543   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15544   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15545 
15546   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15547   ins_encode %{
15548     __ string_indexof($str1$$Register, $str2$$Register,
15549                       $cnt1$$Register, $cnt2$$Register,
15550                       (-1), $result$$Register,
15551                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15552   %}
15553   ins_pipe( pipe_slow );
15554 %}
15555 
15556 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15557                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15558 %{
15559   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15560   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15561   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15562 
15563   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15564   ins_encode %{
15565     __ string_indexof($str1$$Register, $str2$$Register,
15566                       $cnt1$$Register, $cnt2$$Register,
15567                       (-1), $result$$Register,
15568                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15569   %}
15570   ins_pipe( pipe_slow );
15571 %}
15572 
15573 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15574                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15575 %{
15576   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15577   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15578   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15579 
15580   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15581   ins_encode %{
15582     __ string_indexof($str1$$Register, $str2$$Register,
15583                       $cnt1$$Register, $cnt2$$Register,
15584                       (-1), $result$$Register,
15585                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15586   %}
15587   ins_pipe( pipe_slow );
15588 %}
15589 
15590 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15591                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15592 %{
15593   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15594   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15595   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15596   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15597   ins_encode %{
15598     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15599                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15600   %}
15601   ins_pipe( pipe_slow );
15602 %}
15603 
15604 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15605                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15606 %{
15607   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15608   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15609   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15610   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15611   ins_encode %{
15612     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15613                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15614   %}
15615   ins_pipe( pipe_slow );
15616 %}
15617 
15618 // fast string equals
15619 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15620                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15621 %{
15622   predicate(!VM_Version::supports_avx512vlbw());
15623   match(Set result (StrEquals (Binary str1 str2) cnt));
15624   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15625 
15626   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15627   ins_encode %{
15628     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15629                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15630                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15631   %}
15632   ins_pipe( pipe_slow );
15633 %}
15634 
15635 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15636                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15637 %{
15638   predicate(VM_Version::supports_avx512vlbw());
15639   match(Set result (StrEquals (Binary str1 str2) cnt));
15640   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15641 
15642   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15643   ins_encode %{
15644     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15645                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15646                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15647   %}
15648   ins_pipe( pipe_slow );
15649 %}
15650 
15651 // fast array equals
15652 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15653                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15654 %{
15655   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15656   match(Set result (AryEq ary1 ary2));
15657   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15658 
15659   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15660   ins_encode %{
15661     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15662                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15663                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15664   %}
15665   ins_pipe( pipe_slow );
15666 %}
15667 
15668 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15669                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15670 %{
15671   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15672   match(Set result (AryEq ary1 ary2));
15673   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15674 
15675   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15676   ins_encode %{
15677     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15678                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15679                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15680   %}
15681   ins_pipe( pipe_slow );
15682 %}
15683 
15684 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15685                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15686 %{
15687   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15688   match(Set result (AryEq ary1 ary2));
15689   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15690 
15691   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15692   ins_encode %{
15693     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15694                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15695                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15696   %}
15697   ins_pipe( pipe_slow );
15698 %}
15699 
15700 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15701                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15702 %{
15703   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15704   match(Set result (AryEq ary1 ary2));
15705   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15706 
15707   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15708   ins_encode %{
15709     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15710                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15711                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15712   %}
15713   ins_pipe( pipe_slow );
15714 %}
15715 
15716 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15717                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15718                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15719                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15720                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15721 %{
15722   predicate(UseAVX >= 2);
15723   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15724   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15725          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15726          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15727          USE basic_type, KILL cr);
15728 
15729   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
15730   ins_encode %{
15731     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15732                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15733                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15734                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15735                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15736                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15737                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15738   %}
15739   ins_pipe( pipe_slow );
15740 %}
15741 
15742 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15743                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15744 %{
15745   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15746   match(Set result (CountPositives ary1 len));
15747   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15748 
15749   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15750   ins_encode %{
15751     __ count_positives($ary1$$Register, $len$$Register,
15752                        $result$$Register, $tmp3$$Register,
15753                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15754   %}
15755   ins_pipe( pipe_slow );
15756 %}
15757 
15758 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15759                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15760 %{
15761   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15762   match(Set result (CountPositives ary1 len));
15763   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15764 
15765   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15766   ins_encode %{
15767     __ count_positives($ary1$$Register, $len$$Register,
15768                        $result$$Register, $tmp3$$Register,
15769                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15770   %}
15771   ins_pipe( pipe_slow );
15772 %}
15773 
15774 // fast char[] to byte[] compression
15775 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15776                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15777   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15778   match(Set result (StrCompressedCopy src (Binary dst len)));
15779   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15780          USE_KILL len, KILL tmp5, KILL cr);
15781 
15782   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15783   ins_encode %{
15784     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15785                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15786                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15787                            knoreg, knoreg);
15788   %}
15789   ins_pipe( pipe_slow );
15790 %}
15791 
15792 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15793                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15794   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15795   match(Set result (StrCompressedCopy src (Binary dst len)));
15796   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15797          USE_KILL len, KILL tmp5, KILL cr);
15798 
15799   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15800   ins_encode %{
15801     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15802                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15803                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15804                            $ktmp1$$KRegister, $ktmp2$$KRegister);
15805   %}
15806   ins_pipe( pipe_slow );
15807 %}
15808 // fast byte[] to char[] inflation
15809 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15810                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15811   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15812   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15813   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15814 
15815   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15816   ins_encode %{
15817     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15818                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15819   %}
15820   ins_pipe( pipe_slow );
15821 %}
15822 
15823 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15824                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15825   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15826   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15827   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15828 
15829   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15830   ins_encode %{
15831     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15832                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15833   %}
15834   ins_pipe( pipe_slow );
15835 %}
15836 
15837 // encode char[] to byte[] in ISO_8859_1
15838 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15839                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15840                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15841   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15842   match(Set result (EncodeISOArray src (Binary dst len)));
15843   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15844 
15845   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15846   ins_encode %{
15847     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15848                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15849                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15850   %}
15851   ins_pipe( pipe_slow );
15852 %}
15853 
15854 // encode char[] to byte[] in ASCII
15855 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15856                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15857                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15858   predicate(((EncodeISOArrayNode*)n)->is_ascii());
15859   match(Set result (EncodeISOArray src (Binary dst len)));
15860   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15861 
15862   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15863   ins_encode %{
15864     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15865                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15866                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15867   %}
15868   ins_pipe( pipe_slow );
15869 %}
15870 
15871 //----------Overflow Math Instructions-----------------------------------------
15872 
15873 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15874 %{
15875   match(Set cr (OverflowAddI op1 op2));
15876   effect(DEF cr, USE_KILL op1, USE op2);
15877 
15878   format %{ "addl    $op1, $op2\t# overflow check int" %}
15879 
15880   ins_encode %{
15881     __ addl($op1$$Register, $op2$$Register);
15882   %}
15883   ins_pipe(ialu_reg_reg);
15884 %}
15885 
15886 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
15887 %{
15888   match(Set cr (OverflowAddI op1 op2));
15889   effect(DEF cr, USE_KILL op1, USE op2);
15890 
15891   format %{ "addl    $op1, $op2\t# overflow check int" %}
15892 
15893   ins_encode %{
15894     __ addl($op1$$Register, $op2$$constant);
15895   %}
15896   ins_pipe(ialu_reg_reg);
15897 %}
15898 
15899 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15900 %{
15901   match(Set cr (OverflowAddL op1 op2));
15902   effect(DEF cr, USE_KILL op1, USE op2);
15903 
15904   format %{ "addq    $op1, $op2\t# overflow check long" %}
15905   ins_encode %{
15906     __ addq($op1$$Register, $op2$$Register);
15907   %}
15908   ins_pipe(ialu_reg_reg);
15909 %}
15910 
15911 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
15912 %{
15913   match(Set cr (OverflowAddL op1 op2));
15914   effect(DEF cr, USE_KILL op1, USE op2);
15915 
15916   format %{ "addq    $op1, $op2\t# overflow check long" %}
15917   ins_encode %{
15918     __ addq($op1$$Register, $op2$$constant);
15919   %}
15920   ins_pipe(ialu_reg_reg);
15921 %}
15922 
15923 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15924 %{
15925   match(Set cr (OverflowSubI op1 op2));
15926 
15927   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
15928   ins_encode %{
15929     __ cmpl($op1$$Register, $op2$$Register);
15930   %}
15931   ins_pipe(ialu_reg_reg);
15932 %}
15933 
15934 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15935 %{
15936   match(Set cr (OverflowSubI op1 op2));
15937 
15938   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
15939   ins_encode %{
15940     __ cmpl($op1$$Register, $op2$$constant);
15941   %}
15942   ins_pipe(ialu_reg_reg);
15943 %}
15944 
15945 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15946 %{
15947   match(Set cr (OverflowSubL op1 op2));
15948 
15949   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
15950   ins_encode %{
15951     __ cmpq($op1$$Register, $op2$$Register);
15952   %}
15953   ins_pipe(ialu_reg_reg);
15954 %}
15955 
15956 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15957 %{
15958   match(Set cr (OverflowSubL op1 op2));
15959 
15960   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
15961   ins_encode %{
15962     __ cmpq($op1$$Register, $op2$$constant);
15963   %}
15964   ins_pipe(ialu_reg_reg);
15965 %}
15966 
15967 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
15968 %{
15969   match(Set cr (OverflowSubI zero op2));
15970   effect(DEF cr, USE_KILL op2);
15971 
15972   format %{ "negl    $op2\t# overflow check int" %}
15973   ins_encode %{
15974     __ negl($op2$$Register);
15975   %}
15976   ins_pipe(ialu_reg_reg);
15977 %}
15978 
15979 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
15980 %{
15981   match(Set cr (OverflowSubL zero op2));
15982   effect(DEF cr, USE_KILL op2);
15983 
15984   format %{ "negq    $op2\t# overflow check long" %}
15985   ins_encode %{
15986     __ negq($op2$$Register);
15987   %}
15988   ins_pipe(ialu_reg_reg);
15989 %}
15990 
15991 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15992 %{
15993   match(Set cr (OverflowMulI op1 op2));
15994   effect(DEF cr, USE_KILL op1, USE op2);
15995 
15996   format %{ "imull    $op1, $op2\t# overflow check int" %}
15997   ins_encode %{
15998     __ imull($op1$$Register, $op2$$Register);
15999   %}
16000   ins_pipe(ialu_reg_reg_alu0);
16001 %}
16002 
16003 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16004 %{
16005   match(Set cr (OverflowMulI op1 op2));
16006   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16007 
16008   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
16009   ins_encode %{
16010     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16011   %}
16012   ins_pipe(ialu_reg_reg_alu0);
16013 %}
16014 
16015 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16016 %{
16017   match(Set cr (OverflowMulL op1 op2));
16018   effect(DEF cr, USE_KILL op1, USE op2);
16019 
16020   format %{ "imulq    $op1, $op2\t# overflow check long" %}
16021   ins_encode %{
16022     __ imulq($op1$$Register, $op2$$Register);
16023   %}
16024   ins_pipe(ialu_reg_reg_alu0);
16025 %}
16026 
16027 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16028 %{
16029   match(Set cr (OverflowMulL op1 op2));
16030   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16031 
16032   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
16033   ins_encode %{
16034     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16035   %}
16036   ins_pipe(ialu_reg_reg_alu0);
16037 %}
16038 
16039 
16040 //----------Control Flow Instructions------------------------------------------
16041 // Signed compare Instructions
16042 
16043 // XXX more variants!!
16044 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16045 %{
16046   match(Set cr (CmpI op1 op2));
16047   effect(DEF cr, USE op1, USE op2);
16048 
16049   format %{ "cmpl    $op1, $op2" %}
16050   ins_encode %{
16051     __ cmpl($op1$$Register, $op2$$Register);
16052   %}
16053   ins_pipe(ialu_cr_reg_reg);
16054 %}
16055 
16056 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16057 %{
16058   match(Set cr (CmpI op1 op2));
16059 
16060   format %{ "cmpl    $op1, $op2" %}
16061   ins_encode %{
16062     __ cmpl($op1$$Register, $op2$$constant);
16063   %}
16064   ins_pipe(ialu_cr_reg_imm);
16065 %}
16066 
16067 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16068 %{
16069   match(Set cr (CmpI op1 (LoadI op2)));
16070 
16071   ins_cost(500); // XXX
16072   format %{ "cmpl    $op1, $op2" %}
16073   ins_encode %{
16074     __ cmpl($op1$$Register, $op2$$Address);
16075   %}
16076   ins_pipe(ialu_cr_reg_mem);
16077 %}
16078 
16079 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16080 %{
16081   match(Set cr (CmpI src zero));
16082 
16083   format %{ "testl   $src, $src" %}
16084   ins_encode %{
16085     __ testl($src$$Register, $src$$Register);
16086   %}
16087   ins_pipe(ialu_cr_reg_imm);
16088 %}
16089 
16090 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16091 %{
16092   match(Set cr (CmpI (AndI src con) zero));
16093 
16094   format %{ "testl   $src, $con" %}
16095   ins_encode %{
16096     __ testl($src$$Register, $con$$constant);
16097   %}
16098   ins_pipe(ialu_cr_reg_imm);
16099 %}
16100 
16101 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16102 %{
16103   match(Set cr (CmpI (AndI src1 src2) zero));
16104 
16105   format %{ "testl   $src1, $src2" %}
16106   ins_encode %{
16107     __ testl($src1$$Register, $src2$$Register);
16108   %}
16109   ins_pipe(ialu_cr_reg_imm);
16110 %}
16111 
16112 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16113 %{
16114   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16115 
16116   format %{ "testl   $src, $mem" %}
16117   ins_encode %{
16118     __ testl($src$$Register, $mem$$Address);
16119   %}
16120   ins_pipe(ialu_cr_reg_mem);
16121 %}
16122 
16123 // Unsigned compare Instructions; really, same as signed except they
16124 // produce an rFlagsRegU instead of rFlagsReg.
16125 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16126 %{
16127   match(Set cr (CmpU op1 op2));
16128 
16129   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16130   ins_encode %{
16131     __ cmpl($op1$$Register, $op2$$Register);
16132   %}
16133   ins_pipe(ialu_cr_reg_reg);
16134 %}
16135 
16136 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16137 %{
16138   match(Set cr (CmpU op1 op2));
16139 
16140   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16141   ins_encode %{
16142     __ cmpl($op1$$Register, $op2$$constant);
16143   %}
16144   ins_pipe(ialu_cr_reg_imm);
16145 %}
16146 
16147 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16148 %{
16149   match(Set cr (CmpU op1 (LoadI op2)));
16150 
16151   ins_cost(500); // XXX
16152   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16153   ins_encode %{
16154     __ cmpl($op1$$Register, $op2$$Address);
16155   %}
16156   ins_pipe(ialu_cr_reg_mem);
16157 %}
16158 
16159 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16160 %{
16161   match(Set cr (CmpU src zero));
16162 
16163   format %{ "testl   $src, $src\t# unsigned" %}
16164   ins_encode %{
16165     __ testl($src$$Register, $src$$Register);
16166   %}
16167   ins_pipe(ialu_cr_reg_imm);
16168 %}
16169 
16170 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16171 %{
16172   match(Set cr (CmpP op1 op2));
16173 
16174   format %{ "cmpq    $op1, $op2\t# ptr" %}
16175   ins_encode %{
16176     __ cmpq($op1$$Register, $op2$$Register);
16177   %}
16178   ins_pipe(ialu_cr_reg_reg);
16179 %}
16180 
16181 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16182 %{
16183   match(Set cr (CmpP op1 (LoadP op2)));
16184   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16185 
16186   ins_cost(500); // XXX
16187   format %{ "cmpq    $op1, $op2\t# ptr" %}
16188   ins_encode %{
16189     __ cmpq($op1$$Register, $op2$$Address);
16190   %}
16191   ins_pipe(ialu_cr_reg_mem);
16192 %}
16193 
16194 // XXX this is generalized by compP_rReg_mem???
16195 // Compare raw pointer (used in out-of-heap check).
16196 // Only works because non-oop pointers must be raw pointers
16197 // and raw pointers have no anti-dependencies.
16198 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16199 %{
16200   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16201             n->in(2)->as_Load()->barrier_data() == 0);
16202   match(Set cr (CmpP op1 (LoadP op2)));
16203 
16204   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
16205   ins_encode %{
16206     __ cmpq($op1$$Register, $op2$$Address);
16207   %}
16208   ins_pipe(ialu_cr_reg_mem);
16209 %}
16210 
16211 // This will generate a signed flags result. This should be OK since
16212 // any compare to a zero should be eq/neq.
16213 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16214 %{
16215   match(Set cr (CmpP src zero));
16216 
16217   format %{ "testq   $src, $src\t# ptr" %}
16218   ins_encode %{
16219     __ testq($src$$Register, $src$$Register);
16220   %}
16221   ins_pipe(ialu_cr_reg_imm);
16222 %}
16223 
16224 // This will generate a signed flags result. This should be OK since
16225 // any compare to a zero should be eq/neq.
16226 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16227 %{
16228   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16229             n->in(1)->as_Load()->barrier_data() == 0);
16230   match(Set cr (CmpP (LoadP op) zero));
16231 
16232   ins_cost(500); // XXX
16233   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
16234   ins_encode %{
16235     __ testq($op$$Address, 0xFFFFFFFF);
16236   %}
16237   ins_pipe(ialu_cr_reg_imm);
16238 %}
16239 
16240 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16241 %{
16242   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16243             n->in(1)->as_Load()->barrier_data() == 0);
16244   match(Set cr (CmpP (LoadP mem) zero));
16245 
16246   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
16247   ins_encode %{
16248     __ cmpq(r12, $mem$$Address);
16249   %}
16250   ins_pipe(ialu_cr_reg_mem);
16251 %}
16252 
16253 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16254 %{
16255   match(Set cr (CmpN op1 op2));
16256 
16257   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16258   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16259   ins_pipe(ialu_cr_reg_reg);
16260 %}
16261 
16262 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16263 %{
16264   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16265   match(Set cr (CmpN src (LoadN mem)));
16266 
16267   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
16268   ins_encode %{
16269     __ cmpl($src$$Register, $mem$$Address);
16270   %}
16271   ins_pipe(ialu_cr_reg_mem);
16272 %}
16273 
16274 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16275   match(Set cr (CmpN op1 op2));
16276 
16277   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16278   ins_encode %{
16279     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16280   %}
16281   ins_pipe(ialu_cr_reg_imm);
16282 %}
16283 
16284 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16285 %{
16286   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16287   match(Set cr (CmpN src (LoadN mem)));
16288 
16289   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
16290   ins_encode %{
16291     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16292   %}
16293   ins_pipe(ialu_cr_reg_mem);
16294 %}
16295 
16296 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16297   match(Set cr (CmpN op1 op2));
16298 
16299   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
16300   ins_encode %{
16301     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16302   %}
16303   ins_pipe(ialu_cr_reg_imm);
16304 %}
16305 
16306 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16307 %{
16308   predicate(!UseCompactObjectHeaders);
16309   match(Set cr (CmpN src (LoadNKlass mem)));
16310 
16311   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
16312   ins_encode %{
16313     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16314   %}
16315   ins_pipe(ialu_cr_reg_mem);
16316 %}
16317 
16318 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16319   match(Set cr (CmpN src zero));
16320 
16321   format %{ "testl   $src, $src\t# compressed ptr" %}
16322   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16323   ins_pipe(ialu_cr_reg_imm);
16324 %}
16325 
16326 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16327 %{
16328   predicate(CompressedOops::base() != nullptr &&
16329             n->in(1)->as_Load()->barrier_data() == 0);
16330   match(Set cr (CmpN (LoadN mem) zero));
16331 
16332   ins_cost(500); // XXX
16333   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
16334   ins_encode %{
16335     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16336   %}
16337   ins_pipe(ialu_cr_reg_mem);
16338 %}
16339 
16340 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16341 %{
16342   predicate(CompressedOops::base() == nullptr &&
16343             n->in(1)->as_Load()->barrier_data() == 0);
16344   match(Set cr (CmpN (LoadN mem) zero));
16345 
16346   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16347   ins_encode %{
16348     __ cmpl(r12, $mem$$Address);
16349   %}
16350   ins_pipe(ialu_cr_reg_mem);
16351 %}
16352 
16353 // Yanked all unsigned pointer compare operations.
16354 // Pointer compares are done with CmpP which is already unsigned.
16355 
16356 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16357 %{
16358   match(Set cr (CmpL op1 op2));
16359 
16360   format %{ "cmpq    $op1, $op2" %}
16361   ins_encode %{
16362     __ cmpq($op1$$Register, $op2$$Register);
16363   %}
16364   ins_pipe(ialu_cr_reg_reg);
16365 %}
16366 
16367 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16368 %{
16369   match(Set cr (CmpL op1 op2));
16370 
16371   format %{ "cmpq    $op1, $op2" %}
16372   ins_encode %{
16373     __ cmpq($op1$$Register, $op2$$constant);
16374   %}
16375   ins_pipe(ialu_cr_reg_imm);
16376 %}
16377 
16378 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16379 %{
16380   match(Set cr (CmpL op1 (LoadL op2)));
16381 
16382   format %{ "cmpq    $op1, $op2" %}
16383   ins_encode %{
16384     __ cmpq($op1$$Register, $op2$$Address);
16385   %}
16386   ins_pipe(ialu_cr_reg_mem);
16387 %}
16388 
16389 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16390 %{
16391   match(Set cr (CmpL src zero));
16392 
16393   format %{ "testq   $src, $src" %}
16394   ins_encode %{
16395     __ testq($src$$Register, $src$$Register);
16396   %}
16397   ins_pipe(ialu_cr_reg_imm);
16398 %}
16399 
16400 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16401 %{
16402   match(Set cr (CmpL (AndL src con) zero));
16403 
16404   format %{ "testq   $src, $con\t# long" %}
16405   ins_encode %{
16406     __ testq($src$$Register, $con$$constant);
16407   %}
16408   ins_pipe(ialu_cr_reg_imm);
16409 %}
16410 
16411 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16412 %{
16413   match(Set cr (CmpL (AndL src1 src2) zero));
16414 
16415   format %{ "testq   $src1, $src2\t# long" %}
16416   ins_encode %{
16417     __ testq($src1$$Register, $src2$$Register);
16418   %}
16419   ins_pipe(ialu_cr_reg_imm);
16420 %}
16421 
16422 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16423 %{
16424   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16425 
16426   format %{ "testq   $src, $mem" %}
16427   ins_encode %{
16428     __ testq($src$$Register, $mem$$Address);
16429   %}
16430   ins_pipe(ialu_cr_reg_mem);
16431 %}
16432 
16433 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16434 %{
16435   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16436 
16437   format %{ "testq   $src, $mem" %}
16438   ins_encode %{
16439     __ testq($src$$Register, $mem$$Address);
16440   %}
16441   ins_pipe(ialu_cr_reg_mem);
16442 %}
16443 
16444 // Manifest a CmpU result in an integer register.  Very painful.
16445 // This is the test to avoid.
16446 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16447 %{
16448   match(Set dst (CmpU3 src1 src2));
16449   effect(KILL flags);
16450 
16451   ins_cost(275); // XXX
16452   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16453             "movl    $dst, -1\n\t"
16454             "jb,u    done\n\t"
16455             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16456     "done:" %}
16457   ins_encode %{
16458     Label done;
16459     __ cmpl($src1$$Register, $src2$$Register);
16460     __ movl($dst$$Register, -1);
16461     __ jccb(Assembler::below, done);
16462     __ setcc(Assembler::notZero, $dst$$Register);
16463     __ bind(done);
16464   %}
16465   ins_pipe(pipe_slow);
16466 %}
16467 
16468 // Manifest a CmpL result in an integer register.  Very painful.
16469 // This is the test to avoid.
16470 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16471 %{
16472   match(Set dst (CmpL3 src1 src2));
16473   effect(KILL flags);
16474 
16475   ins_cost(275); // XXX
16476   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16477             "movl    $dst, -1\n\t"
16478             "jl,s    done\n\t"
16479             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16480     "done:" %}
16481   ins_encode %{
16482     Label done;
16483     __ cmpq($src1$$Register, $src2$$Register);
16484     __ movl($dst$$Register, -1);
16485     __ jccb(Assembler::less, done);
16486     __ setcc(Assembler::notZero, $dst$$Register);
16487     __ bind(done);
16488   %}
16489   ins_pipe(pipe_slow);
16490 %}
16491 
16492 // Manifest a CmpUL result in an integer register.  Very painful.
16493 // This is the test to avoid.
16494 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16495 %{
16496   match(Set dst (CmpUL3 src1 src2));
16497   effect(KILL flags);
16498 
16499   ins_cost(275); // XXX
16500   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16501             "movl    $dst, -1\n\t"
16502             "jb,u    done\n\t"
16503             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16504     "done:" %}
16505   ins_encode %{
16506     Label done;
16507     __ cmpq($src1$$Register, $src2$$Register);
16508     __ movl($dst$$Register, -1);
16509     __ jccb(Assembler::below, done);
16510     __ setcc(Assembler::notZero, $dst$$Register);
16511     __ bind(done);
16512   %}
16513   ins_pipe(pipe_slow);
16514 %}
16515 
16516 // Unsigned long compare Instructions; really, same as signed long except they
16517 // produce an rFlagsRegU instead of rFlagsReg.
16518 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16519 %{
16520   match(Set cr (CmpUL op1 op2));
16521 
16522   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16523   ins_encode %{
16524     __ cmpq($op1$$Register, $op2$$Register);
16525   %}
16526   ins_pipe(ialu_cr_reg_reg);
16527 %}
16528 
16529 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16530 %{
16531   match(Set cr (CmpUL op1 op2));
16532 
16533   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16534   ins_encode %{
16535     __ cmpq($op1$$Register, $op2$$constant);
16536   %}
16537   ins_pipe(ialu_cr_reg_imm);
16538 %}
16539 
16540 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16541 %{
16542   match(Set cr (CmpUL op1 (LoadL op2)));
16543 
16544   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16545   ins_encode %{
16546     __ cmpq($op1$$Register, $op2$$Address);
16547   %}
16548   ins_pipe(ialu_cr_reg_mem);
16549 %}
16550 
16551 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16552 %{
16553   match(Set cr (CmpUL src zero));
16554 
16555   format %{ "testq   $src, $src\t# unsigned" %}
16556   ins_encode %{
16557     __ testq($src$$Register, $src$$Register);
16558   %}
16559   ins_pipe(ialu_cr_reg_imm);
16560 %}
16561 
16562 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16563 %{
16564   match(Set cr (CmpI (LoadB mem) imm));
16565 
16566   ins_cost(125);
16567   format %{ "cmpb    $mem, $imm" %}
16568   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16569   ins_pipe(ialu_cr_reg_mem);
16570 %}
16571 
16572 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16573 %{
16574   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16575 
16576   ins_cost(125);
16577   format %{ "testb   $mem, $imm\t# ubyte" %}
16578   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16579   ins_pipe(ialu_cr_reg_mem);
16580 %}
16581 
16582 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16583 %{
16584   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16585 
16586   ins_cost(125);
16587   format %{ "testb   $mem, $imm\t# byte" %}
16588   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16589   ins_pipe(ialu_cr_reg_mem);
16590 %}
16591 
16592 //----------Max and Min--------------------------------------------------------
16593 // Min Instructions
16594 
16595 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16596 %{
16597   predicate(!UseAPX);
16598   effect(USE_DEF dst, USE src, USE cr);
16599 
16600   format %{ "cmovlgt $dst, $src\t# min" %}
16601   ins_encode %{
16602     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16603   %}
16604   ins_pipe(pipe_cmov_reg);
16605 %}
16606 
16607 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16608 %{
16609   predicate(UseAPX);
16610   effect(DEF dst, USE src1, USE src2, USE cr);
16611 
16612   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16613   ins_encode %{
16614     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16615   %}
16616   ins_pipe(pipe_cmov_reg);
16617 %}
16618 
16619 instruct minI_rReg(rRegI dst, rRegI src)
16620 %{
16621   predicate(!UseAPX);
16622   match(Set dst (MinI dst src));
16623 
16624   ins_cost(200);
16625   expand %{
16626     rFlagsReg cr;
16627     compI_rReg(cr, dst, src);
16628     cmovI_reg_g(dst, src, cr);
16629   %}
16630 %}
16631 
16632 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16633 %{
16634   predicate(UseAPX);
16635   match(Set dst (MinI src1 src2));
16636   effect(DEF dst, USE src1, USE src2);
16637   flag(PD::Flag_ndd_demotable_opr1);
16638 
16639   ins_cost(200);
16640   expand %{
16641     rFlagsReg cr;
16642     compI_rReg(cr, src1, src2);
16643     cmovI_reg_g_ndd(dst, src1, src2, cr);
16644   %}
16645 %}
16646 
16647 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16648 %{
16649   predicate(!UseAPX);
16650   effect(USE_DEF dst, USE src, USE cr);
16651 
16652   format %{ "cmovllt $dst, $src\t# max" %}
16653   ins_encode %{
16654     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16655   %}
16656   ins_pipe(pipe_cmov_reg);
16657 %}
16658 
16659 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16660 %{
16661   predicate(UseAPX);
16662   effect(DEF dst, USE src1, USE src2, USE cr);
16663 
16664   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16665   ins_encode %{
16666     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16667   %}
16668   ins_pipe(pipe_cmov_reg);
16669 %}
16670 
16671 instruct maxI_rReg(rRegI dst, rRegI src)
16672 %{
16673   predicate(!UseAPX);
16674   match(Set dst (MaxI dst src));
16675 
16676   ins_cost(200);
16677   expand %{
16678     rFlagsReg cr;
16679     compI_rReg(cr, dst, src);
16680     cmovI_reg_l(dst, src, cr);
16681   %}
16682 %}
16683 
16684 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16685 %{
16686   predicate(UseAPX);
16687   match(Set dst (MaxI src1 src2));
16688   effect(DEF dst, USE src1, USE src2);
16689   flag(PD::Flag_ndd_demotable_opr1);
16690 
16691   ins_cost(200);
16692   expand %{
16693     rFlagsReg cr;
16694     compI_rReg(cr, src1, src2);
16695     cmovI_reg_l_ndd(dst, src1, src2, cr);
16696   %}
16697 %}
16698 
16699 // ============================================================================
16700 // Branch Instructions
16701 
16702 // Jump Direct - Label defines a relative address from JMP+1
16703 instruct jmpDir(label labl)
16704 %{
16705   match(Goto);
16706   effect(USE labl);
16707 
16708   ins_cost(300);
16709   format %{ "jmp     $labl" %}
16710   size(5);
16711   ins_encode %{
16712     Label* L = $labl$$label;
16713     __ jmp(*L, false); // Always long jump
16714   %}
16715   ins_pipe(pipe_jmp);
16716 %}
16717 
16718 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16719 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16720 %{
16721   match(If cop cr);
16722   effect(USE labl);
16723 
16724   ins_cost(300);
16725   format %{ "j$cop     $labl" %}
16726   size(6);
16727   ins_encode %{
16728     Label* L = $labl$$label;
16729     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16730   %}
16731   ins_pipe(pipe_jcc);
16732 %}
16733 
16734 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16735 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16736 %{
16737   match(CountedLoopEnd cop cr);
16738   effect(USE labl);
16739 
16740   ins_cost(300);
16741   format %{ "j$cop     $labl\t# loop end" %}
16742   size(6);
16743   ins_encode %{
16744     Label* L = $labl$$label;
16745     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16746   %}
16747   ins_pipe(pipe_jcc);
16748 %}
16749 
16750 // Jump Direct Conditional - using unsigned comparison
16751 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16752   match(If cop cmp);
16753   effect(USE labl);
16754 
16755   ins_cost(300);
16756   format %{ "j$cop,u   $labl" %}
16757   size(6);
16758   ins_encode %{
16759     Label* L = $labl$$label;
16760     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16761   %}
16762   ins_pipe(pipe_jcc);
16763 %}
16764 
16765 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16766   match(If cop cmp);
16767   effect(USE labl);
16768 
16769   ins_cost(200);
16770   format %{ "j$cop,u   $labl" %}
16771   size(6);
16772   ins_encode %{
16773     Label* L = $labl$$label;
16774     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16775   %}
16776   ins_pipe(pipe_jcc);
16777 %}
16778 
16779 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16780   match(If cop cmp);
16781   effect(USE labl);
16782 
16783   ins_cost(200);
16784   format %{ $$template
16785     if ($cop$$cmpcode == Assembler::notEqual) {
16786       $$emit$$"jp,u    $labl\n\t"
16787       $$emit$$"j$cop,u   $labl"
16788     } else {
16789       $$emit$$"jp,u    done\n\t"
16790       $$emit$$"j$cop,u   $labl\n\t"
16791       $$emit$$"done:"
16792     }
16793   %}
16794   ins_encode %{
16795     Label* l = $labl$$label;
16796     if ($cop$$cmpcode == Assembler::notEqual) {
16797       __ jcc(Assembler::parity, *l, false);
16798       __ jcc(Assembler::notEqual, *l, false);
16799     } else if ($cop$$cmpcode == Assembler::equal) {
16800       Label done;
16801       __ jccb(Assembler::parity, done);
16802       __ jcc(Assembler::equal, *l, false);
16803       __ bind(done);
16804     } else {
16805        ShouldNotReachHere();
16806     }
16807   %}
16808   ins_pipe(pipe_jcc);
16809 %}
16810 
16811 // ============================================================================
16812 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
16813 // superklass array for an instance of the superklass.  Set a hidden
16814 // internal cache on a hit (cache is checked with exposed code in
16815 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
16816 // encoding ALSO sets flags.
16817 
16818 instruct partialSubtypeCheck(rdi_RegP result,
16819                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16820                              rFlagsReg cr)
16821 %{
16822   match(Set result (PartialSubtypeCheck sub super));
16823   predicate(!UseSecondarySupersTable);
16824   effect(KILL rcx, KILL cr);
16825 
16826   ins_cost(1100);  // slightly larger than the next version
16827   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16828             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16829             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16830             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16831             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
16832             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16833             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
16834     "miss:\t" %}
16835 
16836   ins_encode %{
16837     Label miss;
16838     // NB: Callers may assume that, when $result is a valid register,
16839     // check_klass_subtype_slow_path_linear sets it to a nonzero
16840     // value.
16841     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16842                                             $rcx$$Register, $result$$Register,
16843                                             nullptr, &miss,
16844                                             /*set_cond_codes:*/ true);
16845     __ xorptr($result$$Register, $result$$Register);
16846     __ bind(miss);
16847   %}
16848 
16849   ins_pipe(pipe_slow);
16850 %}
16851 
16852 // ============================================================================
16853 // Two versions of hashtable-based partialSubtypeCheck, both used when
16854 // we need to search for a super class in the secondary supers array.
16855 // The first is used when we don't know _a priori_ the class being
16856 // searched for. The second, far more common, is used when we do know:
16857 // this is used for instanceof, checkcast, and any case where C2 can
16858 // determine it by constant propagation.
16859 
16860 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
16861                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16862                                        rFlagsReg cr)
16863 %{
16864   match(Set result (PartialSubtypeCheck sub super));
16865   predicate(UseSecondarySupersTable);
16866   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16867 
16868   ins_cost(1000);
16869   format %{ "partialSubtypeCheck $result, $sub, $super" %}
16870 
16871   ins_encode %{
16872     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
16873 					 $temp3$$Register, $temp4$$Register, $result$$Register);
16874   %}
16875 
16876   ins_pipe(pipe_slow);
16877 %}
16878 
16879 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
16880                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16881                                        rFlagsReg cr)
16882 %{
16883   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
16884   predicate(UseSecondarySupersTable);
16885   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16886 
16887   ins_cost(700);  // smaller than the next version
16888   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
16889 
16890   ins_encode %{
16891     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
16892     if (InlineSecondarySupersTest) {
16893       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
16894                                        $temp3$$Register, $temp4$$Register, $result$$Register,
16895                                        super_klass_slot);
16896     } else {
16897       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
16898     }
16899   %}
16900 
16901   ins_pipe(pipe_slow);
16902 %}
16903 
16904 // ============================================================================
16905 // Branch Instructions -- short offset versions
16906 //
16907 // These instructions are used to replace jumps of a long offset (the default
16908 // match) with jumps of a shorter offset.  These instructions are all tagged
16909 // with the ins_short_branch attribute, which causes the ADLC to suppress the
16910 // match rules in general matching.  Instead, the ADLC generates a conversion
16911 // method in the MachNode which can be used to do in-place replacement of the
16912 // long variant with the shorter variant.  The compiler will determine if a
16913 // branch can be taken by the is_short_branch_offset() predicate in the machine
16914 // specific code section of the file.
16915 
16916 // Jump Direct - Label defines a relative address from JMP+1
16917 instruct jmpDir_short(label labl) %{
16918   match(Goto);
16919   effect(USE labl);
16920 
16921   ins_cost(300);
16922   format %{ "jmp,s   $labl" %}
16923   size(2);
16924   ins_encode %{
16925     Label* L = $labl$$label;
16926     __ jmpb(*L);
16927   %}
16928   ins_pipe(pipe_jmp);
16929   ins_short_branch(1);
16930 %}
16931 
16932 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16933 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
16934   match(If cop cr);
16935   effect(USE labl);
16936 
16937   ins_cost(300);
16938   format %{ "j$cop,s   $labl" %}
16939   size(2);
16940   ins_encode %{
16941     Label* L = $labl$$label;
16942     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16943   %}
16944   ins_pipe(pipe_jcc);
16945   ins_short_branch(1);
16946 %}
16947 
16948 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16949 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
16950   match(CountedLoopEnd cop cr);
16951   effect(USE labl);
16952 
16953   ins_cost(300);
16954   format %{ "j$cop,s   $labl\t# loop end" %}
16955   size(2);
16956   ins_encode %{
16957     Label* L = $labl$$label;
16958     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16959   %}
16960   ins_pipe(pipe_jcc);
16961   ins_short_branch(1);
16962 %}
16963 
16964 // Jump Direct Conditional - using unsigned comparison
16965 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16966   match(If cop cmp);
16967   effect(USE labl);
16968 
16969   ins_cost(300);
16970   format %{ "j$cop,us  $labl" %}
16971   size(2);
16972   ins_encode %{
16973     Label* L = $labl$$label;
16974     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16975   %}
16976   ins_pipe(pipe_jcc);
16977   ins_short_branch(1);
16978 %}
16979 
16980 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16981   match(If cop cmp);
16982   effect(USE labl);
16983 
16984   ins_cost(300);
16985   format %{ "j$cop,us  $labl" %}
16986   size(2);
16987   ins_encode %{
16988     Label* L = $labl$$label;
16989     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16990   %}
16991   ins_pipe(pipe_jcc);
16992   ins_short_branch(1);
16993 %}
16994 
16995 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16996   match(If cop cmp);
16997   effect(USE labl);
16998 
16999   ins_cost(300);
17000   format %{ $$template
17001     if ($cop$$cmpcode == Assembler::notEqual) {
17002       $$emit$$"jp,u,s  $labl\n\t"
17003       $$emit$$"j$cop,u,s  $labl"
17004     } else {
17005       $$emit$$"jp,u,s  done\n\t"
17006       $$emit$$"j$cop,u,s  $labl\n\t"
17007       $$emit$$"done:"
17008     }
17009   %}
17010   size(4);
17011   ins_encode %{
17012     Label* l = $labl$$label;
17013     if ($cop$$cmpcode == Assembler::notEqual) {
17014       __ jccb(Assembler::parity, *l);
17015       __ jccb(Assembler::notEqual, *l);
17016     } else if ($cop$$cmpcode == Assembler::equal) {
17017       Label done;
17018       __ jccb(Assembler::parity, done);
17019       __ jccb(Assembler::equal, *l);
17020       __ bind(done);
17021     } else {
17022        ShouldNotReachHere();
17023     }
17024   %}
17025   ins_pipe(pipe_jcc);
17026   ins_short_branch(1);
17027 %}
17028 
17029 // ============================================================================
17030 // inlined locking and unlocking
17031 
17032 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17033   match(Set cr (FastLock object box));
17034   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17035   ins_cost(300);
17036   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17037   ins_encode %{
17038     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17039   %}
17040   ins_pipe(pipe_slow);
17041 %}
17042 
17043 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17044   match(Set cr (FastUnlock object rax_reg));
17045   effect(TEMP tmp, USE_KILL rax_reg);
17046   ins_cost(300);
17047   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17048   ins_encode %{
17049     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17050   %}
17051   ins_pipe(pipe_slow);
17052 %}
17053 
17054 
17055 // ============================================================================
17056 // Safepoint Instructions
17057 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17058 %{
17059   match(SafePoint poll);
17060   effect(KILL cr, USE poll);
17061 
17062   format %{ "testl   rax, [$poll]\t"
17063             "# Safepoint: poll for GC" %}
17064   ins_cost(125);
17065   ins_encode %{
17066     __ relocate(relocInfo::poll_type);
17067     address pre_pc = __ pc();
17068     __ testl(rax, Address($poll$$Register, 0));
17069     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17070   %}
17071   ins_pipe(ialu_reg_mem);
17072 %}
17073 
17074 instruct mask_all_evexL(kReg dst, rRegL src) %{
17075   match(Set dst (MaskAll src));
17076   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17077   ins_encode %{
17078     int mask_len = Matcher::vector_length(this);
17079     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17080   %}
17081   ins_pipe( pipe_slow );
17082 %}
17083 
17084 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17085   predicate(Matcher::vector_length(n) > 32);
17086   match(Set dst (MaskAll src));
17087   effect(TEMP tmp);
17088   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17089   ins_encode %{
17090     int mask_len = Matcher::vector_length(this);
17091     __ movslq($tmp$$Register, $src$$Register);
17092     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17093   %}
17094   ins_pipe( pipe_slow );
17095 %}
17096 
17097 // ============================================================================
17098 // Procedure Call/Return Instructions
17099 // Call Java Static Instruction
17100 // Note: If this code changes, the corresponding ret_addr_offset() and
17101 //       compute_padding() functions will have to be adjusted.
17102 instruct CallStaticJavaDirect(method meth) %{
17103   match(CallStaticJava);
17104   effect(USE meth);
17105 
17106   ins_cost(300);
17107   format %{ "call,static " %}
17108   opcode(0xE8); /* E8 cd */
17109   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17110   ins_pipe(pipe_slow);
17111   ins_alignment(4);
17112 %}
17113 
17114 // Call Java Dynamic Instruction
17115 // Note: If this code changes, the corresponding ret_addr_offset() and
17116 //       compute_padding() functions will have to be adjusted.
17117 instruct CallDynamicJavaDirect(method meth)
17118 %{
17119   match(CallDynamicJava);
17120   effect(USE meth);
17121 
17122   ins_cost(300);
17123   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
17124             "call,dynamic " %}
17125   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17126   ins_pipe(pipe_slow);
17127   ins_alignment(4);
17128 %}
17129 
17130 // Call Runtime Instruction
17131 instruct CallRuntimeDirect(method meth)
17132 %{
17133   match(CallRuntime);
17134   effect(USE meth);
17135 
17136   ins_cost(300);
17137   format %{ "call,runtime " %}
17138   ins_encode(clear_avx, Java_To_Runtime(meth));
17139   ins_pipe(pipe_slow);
17140 %}
17141 
17142 // Call runtime without safepoint
17143 instruct CallLeafDirect(method meth)
17144 %{
17145   match(CallLeaf);
17146   effect(USE meth);
17147 
17148   ins_cost(300);
17149   format %{ "call_leaf,runtime " %}
17150   ins_encode(clear_avx, Java_To_Runtime(meth));
17151   ins_pipe(pipe_slow);
17152 %}
17153 
17154 // Call runtime without safepoint and with vector arguments
17155 instruct CallLeafDirectVector(method meth)
17156 %{
17157   match(CallLeafVector);
17158   effect(USE meth);
17159 
17160   ins_cost(300);
17161   format %{ "call_leaf,vector " %}
17162   ins_encode(Java_To_Runtime(meth));
17163   ins_pipe(pipe_slow);
17164 %}
17165 
17166 // Call runtime without safepoint
17167 instruct CallLeafNoFPDirect(method meth)
17168 %{
17169   match(CallLeafNoFP);
17170   effect(USE meth);
17171 
17172   ins_cost(300);
17173   format %{ "call_leaf_nofp,runtime " %}
17174   ins_encode(clear_avx, Java_To_Runtime(meth));
17175   ins_pipe(pipe_slow);
17176 %}
17177 
17178 // Return Instruction
17179 // Remove the return address & jump to it.
17180 // Notice: We always emit a nop after a ret to make sure there is room
17181 // for safepoint patching
17182 instruct Ret()
17183 %{
17184   match(Return);
17185 
17186   format %{ "ret" %}
17187   ins_encode %{
17188     __ ret(0);
17189   %}
17190   ins_pipe(pipe_jmp);
17191 %}
17192 
17193 // Tail Call; Jump from runtime stub to Java code.
17194 // Also known as an 'interprocedural jump'.
17195 // Target of jump will eventually return to caller.
17196 // TailJump below removes the return address.
17197 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17198 // emitted just above the TailCall which has reset rbp to the caller state.
17199 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17200 %{
17201   match(TailCall jump_target method_ptr);
17202 
17203   ins_cost(300);
17204   format %{ "jmp     $jump_target\t# rbx holds method" %}
17205   ins_encode %{
17206     __ jmp($jump_target$$Register);
17207   %}
17208   ins_pipe(pipe_jmp);
17209 %}
17210 
17211 // Tail Jump; remove the return address; jump to target.
17212 // TailCall above leaves the return address around.
17213 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17214 %{
17215   match(TailJump jump_target ex_oop);
17216 
17217   ins_cost(300);
17218   format %{ "popq    rdx\t# pop return address\n\t"
17219             "jmp     $jump_target" %}
17220   ins_encode %{
17221     __ popq(as_Register(RDX_enc));
17222     __ jmp($jump_target$$Register);
17223   %}
17224   ins_pipe(pipe_jmp);
17225 %}
17226 
17227 // Forward exception.
17228 instruct ForwardExceptionjmp()
17229 %{
17230   match(ForwardException);
17231 
17232   format %{ "jmp     forward_exception_stub" %}
17233   ins_encode %{
17234     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17235   %}
17236   ins_pipe(pipe_jmp);
17237 %}
17238 
17239 // Create exception oop: created by stack-crawling runtime code.
17240 // Created exception is now available to this handler, and is setup
17241 // just prior to jumping to this handler.  No code emitted.
17242 instruct CreateException(rax_RegP ex_oop)
17243 %{
17244   match(Set ex_oop (CreateEx));
17245 
17246   size(0);
17247   // use the following format syntax
17248   format %{ "# exception oop is in rax; no code emitted" %}
17249   ins_encode();
17250   ins_pipe(empty);
17251 %}
17252 
17253 // Rethrow exception:
17254 // The exception oop will come in the first argument position.
17255 // Then JUMP (not call) to the rethrow stub code.
17256 instruct RethrowException()
17257 %{
17258   match(Rethrow);
17259 
17260   // use the following format syntax
17261   format %{ "jmp     rethrow_stub" %}
17262   ins_encode %{
17263     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17264   %}
17265   ins_pipe(pipe_jmp);
17266 %}
17267 
17268 // ============================================================================
17269 // This name is KNOWN by the ADLC and cannot be changed.
17270 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17271 // for this guy.
17272 instruct tlsLoadP(r15_RegP dst) %{
17273   match(Set dst (ThreadLocal));
17274   effect(DEF dst);
17275 
17276   size(0);
17277   format %{ "# TLS is in R15" %}
17278   ins_encode( /*empty encoding*/ );
17279   ins_pipe(ialu_reg_reg);
17280 %}
17281 
17282 instruct addF_reg(regF dst, regF src) %{
17283   predicate(UseAVX == 0);
17284   match(Set dst (AddF dst src));
17285 
17286   format %{ "addss   $dst, $src" %}
17287   ins_cost(150);
17288   ins_encode %{
17289     __ addss($dst$$XMMRegister, $src$$XMMRegister);
17290   %}
17291   ins_pipe(pipe_slow);
17292 %}
17293 
17294 instruct addF_mem(regF dst, memory src) %{
17295   predicate(UseAVX == 0);
17296   match(Set dst (AddF dst (LoadF src)));
17297 
17298   format %{ "addss   $dst, $src" %}
17299   ins_cost(150);
17300   ins_encode %{
17301     __ addss($dst$$XMMRegister, $src$$Address);
17302   %}
17303   ins_pipe(pipe_slow);
17304 %}
17305 
17306 instruct addF_imm(regF dst, immF con) %{
17307   predicate(UseAVX == 0);
17308   match(Set dst (AddF dst con));
17309   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17310   ins_cost(150);
17311   ins_encode %{
17312     __ addss($dst$$XMMRegister, $constantaddress($con));
17313   %}
17314   ins_pipe(pipe_slow);
17315 %}
17316 
17317 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17318   predicate(UseAVX > 0);
17319   match(Set dst (AddF src1 src2));
17320 
17321   format %{ "vaddss  $dst, $src1, $src2" %}
17322   ins_cost(150);
17323   ins_encode %{
17324     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17325   %}
17326   ins_pipe(pipe_slow);
17327 %}
17328 
17329 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17330   predicate(UseAVX > 0);
17331   match(Set dst (AddF src1 (LoadF src2)));
17332 
17333   format %{ "vaddss  $dst, $src1, $src2" %}
17334   ins_cost(150);
17335   ins_encode %{
17336     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17337   %}
17338   ins_pipe(pipe_slow);
17339 %}
17340 
17341 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17342   predicate(UseAVX > 0);
17343   match(Set dst (AddF src con));
17344 
17345   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17346   ins_cost(150);
17347   ins_encode %{
17348     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17349   %}
17350   ins_pipe(pipe_slow);
17351 %}
17352 
17353 instruct addD_reg(regD dst, regD src) %{
17354   predicate(UseAVX == 0);
17355   match(Set dst (AddD dst src));
17356 
17357   format %{ "addsd   $dst, $src" %}
17358   ins_cost(150);
17359   ins_encode %{
17360     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17361   %}
17362   ins_pipe(pipe_slow);
17363 %}
17364 
17365 instruct addD_mem(regD dst, memory src) %{
17366   predicate(UseAVX == 0);
17367   match(Set dst (AddD dst (LoadD src)));
17368 
17369   format %{ "addsd   $dst, $src" %}
17370   ins_cost(150);
17371   ins_encode %{
17372     __ addsd($dst$$XMMRegister, $src$$Address);
17373   %}
17374   ins_pipe(pipe_slow);
17375 %}
17376 
17377 instruct addD_imm(regD dst, immD con) %{
17378   predicate(UseAVX == 0);
17379   match(Set dst (AddD dst con));
17380   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17381   ins_cost(150);
17382   ins_encode %{
17383     __ addsd($dst$$XMMRegister, $constantaddress($con));
17384   %}
17385   ins_pipe(pipe_slow);
17386 %}
17387 
17388 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17389   predicate(UseAVX > 0);
17390   match(Set dst (AddD src1 src2));
17391 
17392   format %{ "vaddsd  $dst, $src1, $src2" %}
17393   ins_cost(150);
17394   ins_encode %{
17395     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17396   %}
17397   ins_pipe(pipe_slow);
17398 %}
17399 
17400 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17401   predicate(UseAVX > 0);
17402   match(Set dst (AddD src1 (LoadD src2)));
17403 
17404   format %{ "vaddsd  $dst, $src1, $src2" %}
17405   ins_cost(150);
17406   ins_encode %{
17407     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17408   %}
17409   ins_pipe(pipe_slow);
17410 %}
17411 
17412 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17413   predicate(UseAVX > 0);
17414   match(Set dst (AddD src con));
17415 
17416   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17417   ins_cost(150);
17418   ins_encode %{
17419     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17420   %}
17421   ins_pipe(pipe_slow);
17422 %}
17423 
17424 instruct subF_reg(regF dst, regF src) %{
17425   predicate(UseAVX == 0);
17426   match(Set dst (SubF dst src));
17427 
17428   format %{ "subss   $dst, $src" %}
17429   ins_cost(150);
17430   ins_encode %{
17431     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17432   %}
17433   ins_pipe(pipe_slow);
17434 %}
17435 
17436 instruct subF_mem(regF dst, memory src) %{
17437   predicate(UseAVX == 0);
17438   match(Set dst (SubF dst (LoadF src)));
17439 
17440   format %{ "subss   $dst, $src" %}
17441   ins_cost(150);
17442   ins_encode %{
17443     __ subss($dst$$XMMRegister, $src$$Address);
17444   %}
17445   ins_pipe(pipe_slow);
17446 %}
17447 
17448 instruct subF_imm(regF dst, immF con) %{
17449   predicate(UseAVX == 0);
17450   match(Set dst (SubF dst con));
17451   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17452   ins_cost(150);
17453   ins_encode %{
17454     __ subss($dst$$XMMRegister, $constantaddress($con));
17455   %}
17456   ins_pipe(pipe_slow);
17457 %}
17458 
17459 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17460   predicate(UseAVX > 0);
17461   match(Set dst (SubF src1 src2));
17462 
17463   format %{ "vsubss  $dst, $src1, $src2" %}
17464   ins_cost(150);
17465   ins_encode %{
17466     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17467   %}
17468   ins_pipe(pipe_slow);
17469 %}
17470 
17471 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17472   predicate(UseAVX > 0);
17473   match(Set dst (SubF src1 (LoadF src2)));
17474 
17475   format %{ "vsubss  $dst, $src1, $src2" %}
17476   ins_cost(150);
17477   ins_encode %{
17478     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17479   %}
17480   ins_pipe(pipe_slow);
17481 %}
17482 
17483 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17484   predicate(UseAVX > 0);
17485   match(Set dst (SubF src con));
17486 
17487   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17488   ins_cost(150);
17489   ins_encode %{
17490     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17491   %}
17492   ins_pipe(pipe_slow);
17493 %}
17494 
17495 instruct subD_reg(regD dst, regD src) %{
17496   predicate(UseAVX == 0);
17497   match(Set dst (SubD dst src));
17498 
17499   format %{ "subsd   $dst, $src" %}
17500   ins_cost(150);
17501   ins_encode %{
17502     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17503   %}
17504   ins_pipe(pipe_slow);
17505 %}
17506 
17507 instruct subD_mem(regD dst, memory src) %{
17508   predicate(UseAVX == 0);
17509   match(Set dst (SubD dst (LoadD src)));
17510 
17511   format %{ "subsd   $dst, $src" %}
17512   ins_cost(150);
17513   ins_encode %{
17514     __ subsd($dst$$XMMRegister, $src$$Address);
17515   %}
17516   ins_pipe(pipe_slow);
17517 %}
17518 
17519 instruct subD_imm(regD dst, immD con) %{
17520   predicate(UseAVX == 0);
17521   match(Set dst (SubD dst con));
17522   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17523   ins_cost(150);
17524   ins_encode %{
17525     __ subsd($dst$$XMMRegister, $constantaddress($con));
17526   %}
17527   ins_pipe(pipe_slow);
17528 %}
17529 
17530 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17531   predicate(UseAVX > 0);
17532   match(Set dst (SubD src1 src2));
17533 
17534   format %{ "vsubsd  $dst, $src1, $src2" %}
17535   ins_cost(150);
17536   ins_encode %{
17537     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17538   %}
17539   ins_pipe(pipe_slow);
17540 %}
17541 
17542 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17543   predicate(UseAVX > 0);
17544   match(Set dst (SubD src1 (LoadD src2)));
17545 
17546   format %{ "vsubsd  $dst, $src1, $src2" %}
17547   ins_cost(150);
17548   ins_encode %{
17549     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17550   %}
17551   ins_pipe(pipe_slow);
17552 %}
17553 
17554 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17555   predicate(UseAVX > 0);
17556   match(Set dst (SubD src con));
17557 
17558   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17559   ins_cost(150);
17560   ins_encode %{
17561     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17562   %}
17563   ins_pipe(pipe_slow);
17564 %}
17565 
17566 instruct mulF_reg(regF dst, regF src) %{
17567   predicate(UseAVX == 0);
17568   match(Set dst (MulF dst src));
17569 
17570   format %{ "mulss   $dst, $src" %}
17571   ins_cost(150);
17572   ins_encode %{
17573     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17574   %}
17575   ins_pipe(pipe_slow);
17576 %}
17577 
17578 instruct mulF_mem(regF dst, memory src) %{
17579   predicate(UseAVX == 0);
17580   match(Set dst (MulF dst (LoadF src)));
17581 
17582   format %{ "mulss   $dst, $src" %}
17583   ins_cost(150);
17584   ins_encode %{
17585     __ mulss($dst$$XMMRegister, $src$$Address);
17586   %}
17587   ins_pipe(pipe_slow);
17588 %}
17589 
17590 instruct mulF_imm(regF dst, immF con) %{
17591   predicate(UseAVX == 0);
17592   match(Set dst (MulF dst con));
17593   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17594   ins_cost(150);
17595   ins_encode %{
17596     __ mulss($dst$$XMMRegister, $constantaddress($con));
17597   %}
17598   ins_pipe(pipe_slow);
17599 %}
17600 
17601 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17602   predicate(UseAVX > 0);
17603   match(Set dst (MulF src1 src2));
17604 
17605   format %{ "vmulss  $dst, $src1, $src2" %}
17606   ins_cost(150);
17607   ins_encode %{
17608     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17609   %}
17610   ins_pipe(pipe_slow);
17611 %}
17612 
17613 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17614   predicate(UseAVX > 0);
17615   match(Set dst (MulF src1 (LoadF src2)));
17616 
17617   format %{ "vmulss  $dst, $src1, $src2" %}
17618   ins_cost(150);
17619   ins_encode %{
17620     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17621   %}
17622   ins_pipe(pipe_slow);
17623 %}
17624 
17625 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17626   predicate(UseAVX > 0);
17627   match(Set dst (MulF src con));
17628 
17629   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17630   ins_cost(150);
17631   ins_encode %{
17632     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17633   %}
17634   ins_pipe(pipe_slow);
17635 %}
17636 
17637 instruct mulD_reg(regD dst, regD src) %{
17638   predicate(UseAVX == 0);
17639   match(Set dst (MulD dst src));
17640 
17641   format %{ "mulsd   $dst, $src" %}
17642   ins_cost(150);
17643   ins_encode %{
17644     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17645   %}
17646   ins_pipe(pipe_slow);
17647 %}
17648 
17649 instruct mulD_mem(regD dst, memory src) %{
17650   predicate(UseAVX == 0);
17651   match(Set dst (MulD dst (LoadD src)));
17652 
17653   format %{ "mulsd   $dst, $src" %}
17654   ins_cost(150);
17655   ins_encode %{
17656     __ mulsd($dst$$XMMRegister, $src$$Address);
17657   %}
17658   ins_pipe(pipe_slow);
17659 %}
17660 
17661 instruct mulD_imm(regD dst, immD con) %{
17662   predicate(UseAVX == 0);
17663   match(Set dst (MulD dst con));
17664   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17665   ins_cost(150);
17666   ins_encode %{
17667     __ mulsd($dst$$XMMRegister, $constantaddress($con));
17668   %}
17669   ins_pipe(pipe_slow);
17670 %}
17671 
17672 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17673   predicate(UseAVX > 0);
17674   match(Set dst (MulD src1 src2));
17675 
17676   format %{ "vmulsd  $dst, $src1, $src2" %}
17677   ins_cost(150);
17678   ins_encode %{
17679     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17680   %}
17681   ins_pipe(pipe_slow);
17682 %}
17683 
17684 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17685   predicate(UseAVX > 0);
17686   match(Set dst (MulD src1 (LoadD src2)));
17687 
17688   format %{ "vmulsd  $dst, $src1, $src2" %}
17689   ins_cost(150);
17690   ins_encode %{
17691     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17692   %}
17693   ins_pipe(pipe_slow);
17694 %}
17695 
17696 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17697   predicate(UseAVX > 0);
17698   match(Set dst (MulD src con));
17699 
17700   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17701   ins_cost(150);
17702   ins_encode %{
17703     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17704   %}
17705   ins_pipe(pipe_slow);
17706 %}
17707 
17708 instruct divF_reg(regF dst, regF src) %{
17709   predicate(UseAVX == 0);
17710   match(Set dst (DivF dst src));
17711 
17712   format %{ "divss   $dst, $src" %}
17713   ins_cost(150);
17714   ins_encode %{
17715     __ divss($dst$$XMMRegister, $src$$XMMRegister);
17716   %}
17717   ins_pipe(pipe_slow);
17718 %}
17719 
17720 instruct divF_mem(regF dst, memory src) %{
17721   predicate(UseAVX == 0);
17722   match(Set dst (DivF dst (LoadF src)));
17723 
17724   format %{ "divss   $dst, $src" %}
17725   ins_cost(150);
17726   ins_encode %{
17727     __ divss($dst$$XMMRegister, $src$$Address);
17728   %}
17729   ins_pipe(pipe_slow);
17730 %}
17731 
17732 instruct divF_imm(regF dst, immF con) %{
17733   predicate(UseAVX == 0);
17734   match(Set dst (DivF dst con));
17735   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17736   ins_cost(150);
17737   ins_encode %{
17738     __ divss($dst$$XMMRegister, $constantaddress($con));
17739   %}
17740   ins_pipe(pipe_slow);
17741 %}
17742 
17743 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17744   predicate(UseAVX > 0);
17745   match(Set dst (DivF src1 src2));
17746 
17747   format %{ "vdivss  $dst, $src1, $src2" %}
17748   ins_cost(150);
17749   ins_encode %{
17750     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17751   %}
17752   ins_pipe(pipe_slow);
17753 %}
17754 
17755 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17756   predicate(UseAVX > 0);
17757   match(Set dst (DivF src1 (LoadF src2)));
17758 
17759   format %{ "vdivss  $dst, $src1, $src2" %}
17760   ins_cost(150);
17761   ins_encode %{
17762     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17763   %}
17764   ins_pipe(pipe_slow);
17765 %}
17766 
17767 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17768   predicate(UseAVX > 0);
17769   match(Set dst (DivF src con));
17770 
17771   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17772   ins_cost(150);
17773   ins_encode %{
17774     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17775   %}
17776   ins_pipe(pipe_slow);
17777 %}
17778 
17779 instruct divD_reg(regD dst, regD src) %{
17780   predicate(UseAVX == 0);
17781   match(Set dst (DivD dst src));
17782 
17783   format %{ "divsd   $dst, $src" %}
17784   ins_cost(150);
17785   ins_encode %{
17786     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17787   %}
17788   ins_pipe(pipe_slow);
17789 %}
17790 
17791 instruct divD_mem(regD dst, memory src) %{
17792   predicate(UseAVX == 0);
17793   match(Set dst (DivD dst (LoadD src)));
17794 
17795   format %{ "divsd   $dst, $src" %}
17796   ins_cost(150);
17797   ins_encode %{
17798     __ divsd($dst$$XMMRegister, $src$$Address);
17799   %}
17800   ins_pipe(pipe_slow);
17801 %}
17802 
17803 instruct divD_imm(regD dst, immD con) %{
17804   predicate(UseAVX == 0);
17805   match(Set dst (DivD dst con));
17806   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17807   ins_cost(150);
17808   ins_encode %{
17809     __ divsd($dst$$XMMRegister, $constantaddress($con));
17810   %}
17811   ins_pipe(pipe_slow);
17812 %}
17813 
17814 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17815   predicate(UseAVX > 0);
17816   match(Set dst (DivD src1 src2));
17817 
17818   format %{ "vdivsd  $dst, $src1, $src2" %}
17819   ins_cost(150);
17820   ins_encode %{
17821     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17822   %}
17823   ins_pipe(pipe_slow);
17824 %}
17825 
17826 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17827   predicate(UseAVX > 0);
17828   match(Set dst (DivD src1 (LoadD src2)));
17829 
17830   format %{ "vdivsd  $dst, $src1, $src2" %}
17831   ins_cost(150);
17832   ins_encode %{
17833     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17834   %}
17835   ins_pipe(pipe_slow);
17836 %}
17837 
17838 instruct divD_reg_imm(regD dst, regD src, immD con) %{
17839   predicate(UseAVX > 0);
17840   match(Set dst (DivD src con));
17841 
17842   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17843   ins_cost(150);
17844   ins_encode %{
17845     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17846   %}
17847   ins_pipe(pipe_slow);
17848 %}
17849 
17850 instruct absF_reg(regF dst) %{
17851   predicate(UseAVX == 0);
17852   match(Set dst (AbsF dst));
17853   ins_cost(150);
17854   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
17855   ins_encode %{
17856     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
17857   %}
17858   ins_pipe(pipe_slow);
17859 %}
17860 
17861 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
17862   predicate(UseAVX > 0);
17863   match(Set dst (AbsF src));
17864   ins_cost(150);
17865   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
17866   ins_encode %{
17867     int vlen_enc = Assembler::AVX_128bit;
17868     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
17869               ExternalAddress(float_signmask()), vlen_enc);
17870   %}
17871   ins_pipe(pipe_slow);
17872 %}
17873 
17874 instruct absD_reg(regD dst) %{
17875   predicate(UseAVX == 0);
17876   match(Set dst (AbsD dst));
17877   ins_cost(150);
17878   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
17879             "# abs double by sign masking" %}
17880   ins_encode %{
17881     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
17882   %}
17883   ins_pipe(pipe_slow);
17884 %}
17885 
17886 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
17887   predicate(UseAVX > 0);
17888   match(Set dst (AbsD src));
17889   ins_cost(150);
17890   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
17891             "# abs double by sign masking" %}
17892   ins_encode %{
17893     int vlen_enc = Assembler::AVX_128bit;
17894     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
17895               ExternalAddress(double_signmask()), vlen_enc);
17896   %}
17897   ins_pipe(pipe_slow);
17898 %}
17899 
17900 instruct negF_reg(regF dst) %{
17901   predicate(UseAVX == 0);
17902   match(Set dst (NegF dst));
17903   ins_cost(150);
17904   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
17905   ins_encode %{
17906     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
17907   %}
17908   ins_pipe(pipe_slow);
17909 %}
17910 
17911 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
17912   predicate(UseAVX > 0);
17913   match(Set dst (NegF src));
17914   ins_cost(150);
17915   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
17916   ins_encode %{
17917     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
17918                  ExternalAddress(float_signflip()));
17919   %}
17920   ins_pipe(pipe_slow);
17921 %}
17922 
17923 instruct negD_reg(regD dst) %{
17924   predicate(UseAVX == 0);
17925   match(Set dst (NegD dst));
17926   ins_cost(150);
17927   format %{ "xorpd   $dst, [0x8000000000000000]\t"
17928             "# neg double by sign flipping" %}
17929   ins_encode %{
17930     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
17931   %}
17932   ins_pipe(pipe_slow);
17933 %}
17934 
17935 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
17936   predicate(UseAVX > 0);
17937   match(Set dst (NegD src));
17938   ins_cost(150);
17939   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
17940             "# neg double by sign flipping" %}
17941   ins_encode %{
17942     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
17943                  ExternalAddress(double_signflip()));
17944   %}
17945   ins_pipe(pipe_slow);
17946 %}
17947 
17948 // sqrtss instruction needs destination register to be pre initialized for best performance
17949 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17950 instruct sqrtF_reg(regF dst) %{
17951   match(Set dst (SqrtF dst));
17952   format %{ "sqrtss  $dst, $dst" %}
17953   ins_encode %{
17954     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
17955   %}
17956   ins_pipe(pipe_slow);
17957 %}
17958 
17959 // sqrtsd instruction needs destination register to be pre initialized for best performance
17960 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17961 instruct sqrtD_reg(regD dst) %{
17962   match(Set dst (SqrtD dst));
17963   format %{ "sqrtsd  $dst, $dst" %}
17964   ins_encode %{
17965     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
17966   %}
17967   ins_pipe(pipe_slow);
17968 %}
17969 
17970 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
17971   effect(TEMP tmp);
17972   match(Set dst (ConvF2HF src));
17973   ins_cost(125);
17974   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
17975   ins_encode %{
17976     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
17977   %}
17978   ins_pipe( pipe_slow );
17979 %}
17980 
17981 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
17982   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
17983   effect(TEMP ktmp, TEMP rtmp);
17984   match(Set mem (StoreC mem (ConvF2HF src)));
17985   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
17986   ins_encode %{
17987     __ movl($rtmp$$Register, 0x1);
17988     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
17989     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
17990   %}
17991   ins_pipe( pipe_slow );
17992 %}
17993 
17994 instruct vconvF2HF(vec dst, vec src) %{
17995   match(Set dst (VectorCastF2HF src));
17996   format %{ "vector_conv_F2HF $dst $src" %}
17997   ins_encode %{
17998     int vlen_enc = vector_length_encoding(this, $src);
17999     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18000   %}
18001   ins_pipe( pipe_slow );
18002 %}
18003 
18004 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18005   predicate(n->as_StoreVector()->memory_size() >= 16);
18006   match(Set mem (StoreVector mem (VectorCastF2HF src)));
18007   format %{ "vcvtps2ph $mem,$src" %}
18008   ins_encode %{
18009     int vlen_enc = vector_length_encoding(this, $src);
18010     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18011   %}
18012   ins_pipe( pipe_slow );
18013 %}
18014 
18015 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18016   match(Set dst (ConvHF2F src));
18017   format %{ "vcvtph2ps $dst,$src" %}
18018   ins_encode %{
18019     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18020   %}
18021   ins_pipe( pipe_slow );
18022 %}
18023 
18024 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18025   match(Set dst (VectorCastHF2F (LoadVector mem)));
18026   format %{ "vcvtph2ps $dst,$mem" %}
18027   ins_encode %{
18028     int vlen_enc = vector_length_encoding(this);
18029     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18030   %}
18031   ins_pipe( pipe_slow );
18032 %}
18033 
18034 instruct vconvHF2F(vec dst, vec src) %{
18035   match(Set dst (VectorCastHF2F src));
18036   ins_cost(125);
18037   format %{ "vector_conv_HF2F $dst,$src" %}
18038   ins_encode %{
18039     int vlen_enc = vector_length_encoding(this);
18040     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18041   %}
18042   ins_pipe( pipe_slow );
18043 %}
18044 
18045 // ---------------------------------------- VectorReinterpret ------------------------------------
18046 instruct reinterpret_mask(kReg dst) %{
18047   predicate(n->bottom_type()->isa_vectmask() &&
18048             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18049   match(Set dst (VectorReinterpret dst));
18050   ins_cost(125);
18051   format %{ "vector_reinterpret $dst\t!" %}
18052   ins_encode %{
18053     // empty
18054   %}
18055   ins_pipe( pipe_slow );
18056 %}
18057 
18058 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18059   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18060             n->bottom_type()->isa_vectmask() &&
18061             n->in(1)->bottom_type()->isa_vectmask() &&
18062             n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18063             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18064   match(Set dst (VectorReinterpret src));
18065   effect(TEMP xtmp);
18066   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18067   ins_encode %{
18068      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18069      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18070      assert(src_sz == dst_sz , "src and dst size mismatch");
18071      int vlen_enc = vector_length_encoding(src_sz);
18072      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18073      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18074   %}
18075   ins_pipe( pipe_slow );
18076 %}
18077 
18078 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18079   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18080             n->bottom_type()->isa_vectmask() &&
18081             n->in(1)->bottom_type()->isa_vectmask() &&
18082             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18083              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18084             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18085   match(Set dst (VectorReinterpret src));
18086   effect(TEMP xtmp);
18087   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18088   ins_encode %{
18089      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18090      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18091      assert(src_sz == dst_sz , "src and dst size mismatch");
18092      int vlen_enc = vector_length_encoding(src_sz);
18093      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18094      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18095   %}
18096   ins_pipe( pipe_slow );
18097 %}
18098 
18099 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18100   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18101             n->bottom_type()->isa_vectmask() &&
18102             n->in(1)->bottom_type()->isa_vectmask() &&
18103             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18104              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18105             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18106   match(Set dst (VectorReinterpret src));
18107   effect(TEMP xtmp);
18108   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18109   ins_encode %{
18110      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18111      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18112      assert(src_sz == dst_sz , "src and dst size mismatch");
18113      int vlen_enc = vector_length_encoding(src_sz);
18114      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18115      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18116   %}
18117   ins_pipe( pipe_slow );
18118 %}
18119 
18120 instruct reinterpret(vec dst) %{
18121   predicate(!n->bottom_type()->isa_vectmask() &&
18122             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18123   match(Set dst (VectorReinterpret dst));
18124   ins_cost(125);
18125   format %{ "vector_reinterpret $dst\t!" %}
18126   ins_encode %{
18127     // empty
18128   %}
18129   ins_pipe( pipe_slow );
18130 %}
18131 
18132 instruct reinterpret_expand(vec dst, vec src) %{
18133   predicate(UseAVX == 0 &&
18134             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18135   match(Set dst (VectorReinterpret src));
18136   ins_cost(125);
18137   effect(TEMP dst);
18138   format %{ "vector_reinterpret_expand $dst,$src" %}
18139   ins_encode %{
18140     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
18141     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
18142 
18143     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18144     if (src_vlen_in_bytes == 4) {
18145       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18146     } else {
18147       assert(src_vlen_in_bytes == 8, "");
18148       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18149     }
18150     __ pand($dst$$XMMRegister, $src$$XMMRegister);
18151   %}
18152   ins_pipe( pipe_slow );
18153 %}
18154 
18155 instruct vreinterpret_expand4(legVec dst, vec src) %{
18156   predicate(UseAVX > 0 &&
18157             !n->bottom_type()->isa_vectmask() &&
18158             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18159             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18160   match(Set dst (VectorReinterpret src));
18161   ins_cost(125);
18162   format %{ "vector_reinterpret_expand $dst,$src" %}
18163   ins_encode %{
18164     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18165   %}
18166   ins_pipe( pipe_slow );
18167 %}
18168 
18169 
18170 instruct vreinterpret_expand(legVec dst, vec src) %{
18171   predicate(UseAVX > 0 &&
18172             !n->bottom_type()->isa_vectmask() &&
18173             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18174             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18175   match(Set dst (VectorReinterpret src));
18176   ins_cost(125);
18177   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18178   ins_encode %{
18179     switch (Matcher::vector_length_in_bytes(this, $src)) {
18180       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18181       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18182       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18183       default: ShouldNotReachHere();
18184     }
18185   %}
18186   ins_pipe( pipe_slow );
18187 %}
18188 
18189 instruct reinterpret_shrink(vec dst, legVec src) %{
18190   predicate(!n->bottom_type()->isa_vectmask() &&
18191             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18192   match(Set dst (VectorReinterpret src));
18193   ins_cost(125);
18194   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18195   ins_encode %{
18196     switch (Matcher::vector_length_in_bytes(this)) {
18197       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18198       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18199       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18200       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18201       default: ShouldNotReachHere();
18202     }
18203   %}
18204   ins_pipe( pipe_slow );
18205 %}
18206 
18207 // ----------------------------------------------------------------------------------------------------
18208 
18209 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18210   match(Set dst (RoundDoubleMode src rmode));
18211   format %{ "roundsd $dst,$src" %}
18212   ins_cost(150);
18213   ins_encode %{
18214     assert(UseSSE >= 4, "required");
18215     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18216       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18217     }
18218     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18219   %}
18220   ins_pipe(pipe_slow);
18221 %}
18222 
18223 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18224   match(Set dst (RoundDoubleMode con rmode));
18225   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18226   ins_cost(150);
18227   ins_encode %{
18228     assert(UseSSE >= 4, "required");
18229     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18230   %}
18231   ins_pipe(pipe_slow);
18232 %}
18233 
18234 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18235   predicate(Matcher::vector_length(n) < 8);
18236   match(Set dst (RoundDoubleModeV src rmode));
18237   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18238   ins_encode %{
18239     assert(UseAVX > 0, "required");
18240     int vlen_enc = vector_length_encoding(this);
18241     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18242   %}
18243   ins_pipe( pipe_slow );
18244 %}
18245 
18246 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18247   predicate(Matcher::vector_length(n) == 8);
18248   match(Set dst (RoundDoubleModeV src rmode));
18249   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18250   ins_encode %{
18251     assert(UseAVX > 2, "required");
18252     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18253   %}
18254   ins_pipe( pipe_slow );
18255 %}
18256 
18257 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18258   predicate(Matcher::vector_length(n) < 8);
18259   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18260   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18261   ins_encode %{
18262     assert(UseAVX > 0, "required");
18263     int vlen_enc = vector_length_encoding(this);
18264     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18265   %}
18266   ins_pipe( pipe_slow );
18267 %}
18268 
18269 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18270   predicate(Matcher::vector_length(n) == 8);
18271   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18272   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18273   ins_encode %{
18274     assert(UseAVX > 2, "required");
18275     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18276   %}
18277   ins_pipe( pipe_slow );
18278 %}
18279 
18280 instruct onspinwait() %{
18281   match(OnSpinWait);
18282   ins_cost(200);
18283 
18284   format %{
18285     $$template
18286     $$emit$$"pause\t! membar_onspinwait"
18287   %}
18288   ins_encode %{
18289     __ pause();
18290   %}
18291   ins_pipe(pipe_slow);
18292 %}
18293 
18294 // a * b + c
18295 instruct fmaD_reg(regD a, regD b, regD c) %{
18296   match(Set c (FmaD  c (Binary a b)));
18297   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18298   ins_cost(150);
18299   ins_encode %{
18300     assert(UseFMA, "Needs FMA instructions support.");
18301     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18302   %}
18303   ins_pipe( pipe_slow );
18304 %}
18305 
18306 // a * b + c
18307 instruct fmaF_reg(regF a, regF b, regF c) %{
18308   match(Set c (FmaF  c (Binary a b)));
18309   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18310   ins_cost(150);
18311   ins_encode %{
18312     assert(UseFMA, "Needs FMA instructions support.");
18313     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18314   %}
18315   ins_pipe( pipe_slow );
18316 %}
18317 
18318 // ====================VECTOR INSTRUCTIONS=====================================
18319 
18320 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18321 instruct MoveVec2Leg(legVec dst, vec src) %{
18322   match(Set dst src);
18323   format %{ "" %}
18324   ins_encode %{
18325     ShouldNotReachHere();
18326   %}
18327   ins_pipe( fpu_reg_reg );
18328 %}
18329 
18330 instruct MoveLeg2Vec(vec dst, legVec src) %{
18331   match(Set dst src);
18332   format %{ "" %}
18333   ins_encode %{
18334     ShouldNotReachHere();
18335   %}
18336   ins_pipe( fpu_reg_reg );
18337 %}
18338 
18339 // ============================================================================
18340 
18341 // Load vectors generic operand pattern
18342 instruct loadV(vec dst, memory mem) %{
18343   match(Set dst (LoadVector mem));
18344   ins_cost(125);
18345   format %{ "load_vector $dst,$mem" %}
18346   ins_encode %{
18347     BasicType bt = Matcher::vector_element_basic_type(this);
18348     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18349   %}
18350   ins_pipe( pipe_slow );
18351 %}
18352 
18353 // Store vectors generic operand pattern.
18354 instruct storeV(memory mem, vec src) %{
18355   match(Set mem (StoreVector mem src));
18356   ins_cost(145);
18357   format %{ "store_vector $mem,$src\n\t" %}
18358   ins_encode %{
18359     switch (Matcher::vector_length_in_bytes(this, $src)) {
18360       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
18361       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
18362       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
18363       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
18364       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18365       default: ShouldNotReachHere();
18366     }
18367   %}
18368   ins_pipe( pipe_slow );
18369 %}
18370 
18371 // ---------------------------------------- Gather ------------------------------------
18372 
18373 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18374 
18375 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18376   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18377             Matcher::vector_length_in_bytes(n) <= 32);
18378   match(Set dst (LoadVectorGather mem idx));
18379   effect(TEMP dst, TEMP tmp, TEMP mask);
18380   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18381   ins_encode %{
18382     int vlen_enc = vector_length_encoding(this);
18383     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18384     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18385     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18386     __ lea($tmp$$Register, $mem$$Address);
18387     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18388   %}
18389   ins_pipe( pipe_slow );
18390 %}
18391 
18392 
18393 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18394   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18395             !is_subword_type(Matcher::vector_element_basic_type(n)));
18396   match(Set dst (LoadVectorGather mem idx));
18397   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18398   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18399   ins_encode %{
18400     int vlen_enc = vector_length_encoding(this);
18401     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18402     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18403     __ lea($tmp$$Register, $mem$$Address);
18404     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18405   %}
18406   ins_pipe( pipe_slow );
18407 %}
18408 
18409 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18410   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18411             !is_subword_type(Matcher::vector_element_basic_type(n)));
18412   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18413   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18414   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18415   ins_encode %{
18416     assert(UseAVX > 2, "sanity");
18417     int vlen_enc = vector_length_encoding(this);
18418     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18419     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18420     // Note: Since gather instruction partially updates the opmask register used
18421     // for predication hense moving mask operand to a temporary.
18422     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18423     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18424     __ lea($tmp$$Register, $mem$$Address);
18425     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18426   %}
18427   ins_pipe( pipe_slow );
18428 %}
18429 
18430 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18431   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18432   match(Set dst (LoadVectorGather mem idx_base));
18433   effect(TEMP tmp, TEMP rtmp);
18434   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18435   ins_encode %{
18436     int vlen_enc = vector_length_encoding(this);
18437     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18438     __ lea($tmp$$Register, $mem$$Address);
18439     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18440   %}
18441   ins_pipe( pipe_slow );
18442 %}
18443 
18444 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18445                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18446   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18447   match(Set dst (LoadVectorGather mem idx_base));
18448   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18449   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18450   ins_encode %{
18451     int vlen_enc = vector_length_encoding(this);
18452     int vector_len = Matcher::vector_length(this);
18453     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18454     __ lea($tmp$$Register, $mem$$Address);
18455     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18456     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18457                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18458   %}
18459   ins_pipe( pipe_slow );
18460 %}
18461 
18462 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18463   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18464   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18465   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18466   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18467   ins_encode %{
18468     int vlen_enc = vector_length_encoding(this);
18469     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18470     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18471     __ lea($tmp$$Register, $mem$$Address);
18472     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18473     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18474   %}
18475   ins_pipe( pipe_slow );
18476 %}
18477 
18478 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18479                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18480   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18481   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18482   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18483   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18484   ins_encode %{
18485     int vlen_enc = vector_length_encoding(this);
18486     int vector_len = Matcher::vector_length(this);
18487     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18488     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18489     __ lea($tmp$$Register, $mem$$Address);
18490     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18491     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18492     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18493                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18494   %}
18495   ins_pipe( pipe_slow );
18496 %}
18497 
18498 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18499   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18500   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18501   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18502   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18503   ins_encode %{
18504     int vlen_enc = vector_length_encoding(this);
18505     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18506     __ lea($tmp$$Register, $mem$$Address);
18507     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18508     if (elem_bt == T_SHORT) {
18509       __ movl($mask_idx$$Register, 0x55555555);
18510       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18511     }
18512     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18513     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18514   %}
18515   ins_pipe( pipe_slow );
18516 %}
18517 
18518 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18519                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18520   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18521   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18522   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18523   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18524   ins_encode %{
18525     int vlen_enc = vector_length_encoding(this);
18526     int vector_len = Matcher::vector_length(this);
18527     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18528     __ lea($tmp$$Register, $mem$$Address);
18529     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18530     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18531     if (elem_bt == T_SHORT) {
18532       __ movl($mask_idx$$Register, 0x55555555);
18533       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18534     }
18535     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18536     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18537                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18538   %}
18539   ins_pipe( pipe_slow );
18540 %}
18541 
18542 // ====================Scatter=======================================
18543 
18544 // Scatter INT, LONG, FLOAT, DOUBLE
18545 
18546 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18547   predicate(UseAVX > 2);
18548   match(Set mem (StoreVectorScatter mem (Binary src idx)));
18549   effect(TEMP tmp, TEMP ktmp);
18550   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18551   ins_encode %{
18552     int vlen_enc = vector_length_encoding(this, $src);
18553     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18554 
18555     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18556     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18557 
18558     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18559     __ lea($tmp$$Register, $mem$$Address);
18560     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18561   %}
18562   ins_pipe( pipe_slow );
18563 %}
18564 
18565 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18566   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18567   effect(TEMP tmp, TEMP ktmp);
18568   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18569   ins_encode %{
18570     int vlen_enc = vector_length_encoding(this, $src);
18571     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18572     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18573     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18574     // Note: Since scatter instruction partially updates the opmask register used
18575     // for predication hense moving mask operand to a temporary.
18576     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18577     __ lea($tmp$$Register, $mem$$Address);
18578     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18579   %}
18580   ins_pipe( pipe_slow );
18581 %}
18582 
18583 // ====================REPLICATE=======================================
18584 
18585 // Replicate byte scalar to be vector
18586 instruct vReplB_reg(vec dst, rRegI src) %{
18587   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18588   match(Set dst (Replicate src));
18589   format %{ "replicateB $dst,$src" %}
18590   ins_encode %{
18591     uint vlen = Matcher::vector_length(this);
18592     if (UseAVX >= 2) {
18593       int vlen_enc = vector_length_encoding(this);
18594       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18595         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18596         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18597       } else {
18598         __ movdl($dst$$XMMRegister, $src$$Register);
18599         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18600       }
18601     } else {
18602        assert(UseAVX < 2, "");
18603       __ movdl($dst$$XMMRegister, $src$$Register);
18604       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18605       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18606       if (vlen >= 16) {
18607         assert(vlen == 16, "");
18608         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18609       }
18610     }
18611   %}
18612   ins_pipe( pipe_slow );
18613 %}
18614 
18615 instruct ReplB_mem(vec dst, memory mem) %{
18616   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18617   match(Set dst (Replicate (LoadB mem)));
18618   format %{ "replicateB $dst,$mem" %}
18619   ins_encode %{
18620     int vlen_enc = vector_length_encoding(this);
18621     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18622   %}
18623   ins_pipe( pipe_slow );
18624 %}
18625 
18626 // ====================ReplicateS=======================================
18627 
18628 instruct vReplS_reg(vec dst, rRegI src) %{
18629   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18630   match(Set dst (Replicate src));
18631   format %{ "replicateS $dst,$src" %}
18632   ins_encode %{
18633     uint vlen = Matcher::vector_length(this);
18634     int vlen_enc = vector_length_encoding(this);
18635     if (UseAVX >= 2) {
18636       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18637         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18638         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18639       } else {
18640         __ movdl($dst$$XMMRegister, $src$$Register);
18641         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18642       }
18643     } else {
18644       assert(UseAVX < 2, "");
18645       __ movdl($dst$$XMMRegister, $src$$Register);
18646       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18647       if (vlen >= 8) {
18648         assert(vlen == 8, "");
18649         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18650       }
18651     }
18652   %}
18653   ins_pipe( pipe_slow );
18654 %}
18655 
18656 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18657   match(Set dst (Replicate con));
18658   effect(TEMP rtmp);
18659   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18660   ins_encode %{
18661     int vlen_enc = vector_length_encoding(this);
18662     BasicType bt = Matcher::vector_element_basic_type(this);
18663     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18664     __ movl($rtmp$$Register, $con$$constant);
18665     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18666   %}
18667   ins_pipe( pipe_slow );
18668 %}
18669 
18670 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18671   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18672   match(Set dst (Replicate src));
18673   effect(TEMP rtmp);
18674   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18675   ins_encode %{
18676     int vlen_enc = vector_length_encoding(this);
18677     __ vmovw($rtmp$$Register, $src$$XMMRegister);
18678     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18679   %}
18680   ins_pipe( pipe_slow );
18681 %}
18682 
18683 instruct ReplS_mem(vec dst, memory mem) %{
18684   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18685   match(Set dst (Replicate (LoadS mem)));
18686   format %{ "replicateS $dst,$mem" %}
18687   ins_encode %{
18688     int vlen_enc = vector_length_encoding(this);
18689     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18690   %}
18691   ins_pipe( pipe_slow );
18692 %}
18693 
18694 // ====================ReplicateI=======================================
18695 
18696 instruct ReplI_reg(vec dst, rRegI src) %{
18697   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18698   match(Set dst (Replicate src));
18699   format %{ "replicateI $dst,$src" %}
18700   ins_encode %{
18701     uint vlen = Matcher::vector_length(this);
18702     int vlen_enc = vector_length_encoding(this);
18703     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18704       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18705     } else if (VM_Version::supports_avx2()) {
18706       __ movdl($dst$$XMMRegister, $src$$Register);
18707       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18708     } else {
18709       __ movdl($dst$$XMMRegister, $src$$Register);
18710       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18711     }
18712   %}
18713   ins_pipe( pipe_slow );
18714 %}
18715 
18716 instruct ReplI_mem(vec dst, memory mem) %{
18717   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18718   match(Set dst (Replicate (LoadI mem)));
18719   format %{ "replicateI $dst,$mem" %}
18720   ins_encode %{
18721     int vlen_enc = vector_length_encoding(this);
18722     if (VM_Version::supports_avx2()) {
18723       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18724     } else if (VM_Version::supports_avx()) {
18725       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18726     } else {
18727       __ movdl($dst$$XMMRegister, $mem$$Address);
18728       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18729     }
18730   %}
18731   ins_pipe( pipe_slow );
18732 %}
18733 
18734 instruct ReplI_imm(vec dst, immI con) %{
18735   predicate(Matcher::is_non_long_integral_vector(n));
18736   match(Set dst (Replicate con));
18737   format %{ "replicateI $dst,$con" %}
18738   ins_encode %{
18739     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18740                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18741                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
18742     BasicType bt = Matcher::vector_element_basic_type(this);
18743     int vlen = Matcher::vector_length_in_bytes(this);
18744     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18745   %}
18746   ins_pipe( pipe_slow );
18747 %}
18748 
18749 // Replicate scalar zero to be vector
18750 instruct ReplI_zero(vec dst, immI_0 zero) %{
18751   predicate(Matcher::is_non_long_integral_vector(n));
18752   match(Set dst (Replicate zero));
18753   format %{ "replicateI $dst,$zero" %}
18754   ins_encode %{
18755     int vlen_enc = vector_length_encoding(this);
18756     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18757       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18758     } else {
18759       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18760     }
18761   %}
18762   ins_pipe( fpu_reg_reg );
18763 %}
18764 
18765 instruct ReplI_M1(vec dst, immI_M1 con) %{
18766   predicate(Matcher::is_non_long_integral_vector(n));
18767   match(Set dst (Replicate con));
18768   format %{ "vallones $dst" %}
18769   ins_encode %{
18770     int vector_len = vector_length_encoding(this);
18771     __ vallones($dst$$XMMRegister, vector_len);
18772   %}
18773   ins_pipe( pipe_slow );
18774 %}
18775 
18776 // ====================ReplicateL=======================================
18777 
18778 // Replicate long (8 byte) scalar to be vector
18779 instruct ReplL_reg(vec dst, rRegL src) %{
18780   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18781   match(Set dst (Replicate src));
18782   format %{ "replicateL $dst,$src" %}
18783   ins_encode %{
18784     int vlen = Matcher::vector_length(this);
18785     int vlen_enc = vector_length_encoding(this);
18786     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18787       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18788     } else if (VM_Version::supports_avx2()) {
18789       __ movdq($dst$$XMMRegister, $src$$Register);
18790       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18791     } else {
18792       __ movdq($dst$$XMMRegister, $src$$Register);
18793       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18794     }
18795   %}
18796   ins_pipe( pipe_slow );
18797 %}
18798 
18799 instruct ReplL_mem(vec dst, memory mem) %{
18800   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18801   match(Set dst (Replicate (LoadL mem)));
18802   format %{ "replicateL $dst,$mem" %}
18803   ins_encode %{
18804     int vlen_enc = vector_length_encoding(this);
18805     if (VM_Version::supports_avx2()) {
18806       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18807     } else if (VM_Version::supports_sse3()) {
18808       __ movddup($dst$$XMMRegister, $mem$$Address);
18809     } else {
18810       __ movq($dst$$XMMRegister, $mem$$Address);
18811       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18812     }
18813   %}
18814   ins_pipe( pipe_slow );
18815 %}
18816 
18817 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18818 instruct ReplL_imm(vec dst, immL con) %{
18819   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18820   match(Set dst (Replicate con));
18821   format %{ "replicateL $dst,$con" %}
18822   ins_encode %{
18823     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18824     int vlen = Matcher::vector_length_in_bytes(this);
18825     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18826   %}
18827   ins_pipe( pipe_slow );
18828 %}
18829 
18830 instruct ReplL_zero(vec dst, immL0 zero) %{
18831   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18832   match(Set dst (Replicate zero));
18833   format %{ "replicateL $dst,$zero" %}
18834   ins_encode %{
18835     int vlen_enc = vector_length_encoding(this);
18836     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18837       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18838     } else {
18839       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18840     }
18841   %}
18842   ins_pipe( fpu_reg_reg );
18843 %}
18844 
18845 instruct ReplL_M1(vec dst, immL_M1 con) %{
18846   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18847   match(Set dst (Replicate con));
18848   format %{ "vallones $dst" %}
18849   ins_encode %{
18850     int vector_len = vector_length_encoding(this);
18851     __ vallones($dst$$XMMRegister, vector_len);
18852   %}
18853   ins_pipe( pipe_slow );
18854 %}
18855 
18856 // ====================ReplicateF=======================================
18857 
18858 instruct vReplF_reg(vec dst, vlRegF src) %{
18859   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18860   match(Set dst (Replicate src));
18861   format %{ "replicateF $dst,$src" %}
18862   ins_encode %{
18863     uint vlen = Matcher::vector_length(this);
18864     int vlen_enc = vector_length_encoding(this);
18865     if (vlen <= 4) {
18866       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18867     } else if (VM_Version::supports_avx2()) {
18868       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18869     } else {
18870       assert(vlen == 8, "sanity");
18871       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18872       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18873     }
18874   %}
18875   ins_pipe( pipe_slow );
18876 %}
18877 
18878 instruct ReplF_reg(vec dst, vlRegF src) %{
18879   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18880   match(Set dst (Replicate src));
18881   format %{ "replicateF $dst,$src" %}
18882   ins_encode %{
18883     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
18884   %}
18885   ins_pipe( pipe_slow );
18886 %}
18887 
18888 instruct ReplF_mem(vec dst, memory mem) %{
18889   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18890   match(Set dst (Replicate (LoadF mem)));
18891   format %{ "replicateF $dst,$mem" %}
18892   ins_encode %{
18893     int vlen_enc = vector_length_encoding(this);
18894     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18895   %}
18896   ins_pipe( pipe_slow );
18897 %}
18898 
18899 // Replicate float scalar immediate to be vector by loading from const table.
18900 instruct ReplF_imm(vec dst, immF con) %{
18901   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18902   match(Set dst (Replicate con));
18903   format %{ "replicateF $dst,$con" %}
18904   ins_encode %{
18905     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
18906                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
18907     int vlen = Matcher::vector_length_in_bytes(this);
18908     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
18909   %}
18910   ins_pipe( pipe_slow );
18911 %}
18912 
18913 instruct ReplF_zero(vec dst, immF0 zero) %{
18914   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18915   match(Set dst (Replicate zero));
18916   format %{ "replicateF $dst,$zero" %}
18917   ins_encode %{
18918     int vlen_enc = vector_length_encoding(this);
18919     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18920       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18921     } else {
18922       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18923     }
18924   %}
18925   ins_pipe( fpu_reg_reg );
18926 %}
18927 
18928 // ====================ReplicateD=======================================
18929 
18930 // Replicate double (8 bytes) scalar to be vector
18931 instruct vReplD_reg(vec dst, vlRegD src) %{
18932   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18933   match(Set dst (Replicate src));
18934   format %{ "replicateD $dst,$src" %}
18935   ins_encode %{
18936     uint vlen = Matcher::vector_length(this);
18937     int vlen_enc = vector_length_encoding(this);
18938     if (vlen <= 2) {
18939       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18940     } else if (VM_Version::supports_avx2()) {
18941       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18942     } else {
18943       assert(vlen == 4, "sanity");
18944       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18945       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18946     }
18947   %}
18948   ins_pipe( pipe_slow );
18949 %}
18950 
18951 instruct ReplD_reg(vec dst, vlRegD src) %{
18952   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18953   match(Set dst (Replicate src));
18954   format %{ "replicateD $dst,$src" %}
18955   ins_encode %{
18956     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
18957   %}
18958   ins_pipe( pipe_slow );
18959 %}
18960 
18961 instruct ReplD_mem(vec dst, memory mem) %{
18962   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18963   match(Set dst (Replicate (LoadD mem)));
18964   format %{ "replicateD $dst,$mem" %}
18965   ins_encode %{
18966     if (Matcher::vector_length(this) >= 4) {
18967       int vlen_enc = vector_length_encoding(this);
18968       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18969     } else {
18970       __ movddup($dst$$XMMRegister, $mem$$Address);
18971     }
18972   %}
18973   ins_pipe( pipe_slow );
18974 %}
18975 
18976 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
18977 instruct ReplD_imm(vec dst, immD con) %{
18978   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18979   match(Set dst (Replicate con));
18980   format %{ "replicateD $dst,$con" %}
18981   ins_encode %{
18982     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18983     int vlen = Matcher::vector_length_in_bytes(this);
18984     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
18985   %}
18986   ins_pipe( pipe_slow );
18987 %}
18988 
18989 instruct ReplD_zero(vec dst, immD0 zero) %{
18990   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18991   match(Set dst (Replicate zero));
18992   format %{ "replicateD $dst,$zero" %}
18993   ins_encode %{
18994     int vlen_enc = vector_length_encoding(this);
18995     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18996       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18997     } else {
18998       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18999     }
19000   %}
19001   ins_pipe( fpu_reg_reg );
19002 %}
19003 
19004 // ====================VECTOR INSERT=======================================
19005 
19006 instruct insert(vec dst, rRegI val, immU8 idx) %{
19007   predicate(Matcher::vector_length_in_bytes(n) < 32);
19008   match(Set dst (VectorInsert (Binary dst val) idx));
19009   format %{ "vector_insert $dst,$val,$idx" %}
19010   ins_encode %{
19011     assert(UseSSE >= 4, "required");
19012     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19013 
19014     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19015 
19016     assert(is_integral_type(elem_bt), "");
19017     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19018 
19019     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19020   %}
19021   ins_pipe( pipe_slow );
19022 %}
19023 
19024 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19025   predicate(Matcher::vector_length_in_bytes(n) == 32);
19026   match(Set dst (VectorInsert (Binary src val) idx));
19027   effect(TEMP vtmp);
19028   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19029   ins_encode %{
19030     int vlen_enc = Assembler::AVX_256bit;
19031     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19032     int elem_per_lane = 16/type2aelembytes(elem_bt);
19033     int log2epr = log2(elem_per_lane);
19034 
19035     assert(is_integral_type(elem_bt), "sanity");
19036     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19037 
19038     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19039     uint y_idx = ($idx$$constant >> log2epr) & 1;
19040     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19041     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19042     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19043   %}
19044   ins_pipe( pipe_slow );
19045 %}
19046 
19047 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19048   predicate(Matcher::vector_length_in_bytes(n) == 64);
19049   match(Set dst (VectorInsert (Binary src val) idx));
19050   effect(TEMP vtmp);
19051   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19052   ins_encode %{
19053     assert(UseAVX > 2, "sanity");
19054 
19055     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19056     int elem_per_lane = 16/type2aelembytes(elem_bt);
19057     int log2epr = log2(elem_per_lane);
19058 
19059     assert(is_integral_type(elem_bt), "");
19060     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19061 
19062     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19063     uint y_idx = ($idx$$constant >> log2epr) & 3;
19064     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19065     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19066     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19067   %}
19068   ins_pipe( pipe_slow );
19069 %}
19070 
19071 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19072   predicate(Matcher::vector_length(n) == 2);
19073   match(Set dst (VectorInsert (Binary dst val) idx));
19074   format %{ "vector_insert $dst,$val,$idx" %}
19075   ins_encode %{
19076     assert(UseSSE >= 4, "required");
19077     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19078     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19079 
19080     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19081   %}
19082   ins_pipe( pipe_slow );
19083 %}
19084 
19085 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19086   predicate(Matcher::vector_length(n) == 4);
19087   match(Set dst (VectorInsert (Binary src val) idx));
19088   effect(TEMP vtmp);
19089   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19090   ins_encode %{
19091     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19092     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19093 
19094     uint x_idx = $idx$$constant & right_n_bits(1);
19095     uint y_idx = ($idx$$constant >> 1) & 1;
19096     int vlen_enc = Assembler::AVX_256bit;
19097     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19098     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19099     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19100   %}
19101   ins_pipe( pipe_slow );
19102 %}
19103 
19104 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19105   predicate(Matcher::vector_length(n) == 8);
19106   match(Set dst (VectorInsert (Binary src val) idx));
19107   effect(TEMP vtmp);
19108   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19109   ins_encode %{
19110     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19111     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19112 
19113     uint x_idx = $idx$$constant & right_n_bits(1);
19114     uint y_idx = ($idx$$constant >> 1) & 3;
19115     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19116     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19117     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19118   %}
19119   ins_pipe( pipe_slow );
19120 %}
19121 
19122 instruct insertF(vec dst, regF val, immU8 idx) %{
19123   predicate(Matcher::vector_length(n) < 8);
19124   match(Set dst (VectorInsert (Binary dst val) idx));
19125   format %{ "vector_insert $dst,$val,$idx" %}
19126   ins_encode %{
19127     assert(UseSSE >= 4, "sanity");
19128 
19129     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19130     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19131 
19132     uint x_idx = $idx$$constant & right_n_bits(2);
19133     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19134   %}
19135   ins_pipe( pipe_slow );
19136 %}
19137 
19138 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19139   predicate(Matcher::vector_length(n) >= 8);
19140   match(Set dst (VectorInsert (Binary src val) idx));
19141   effect(TEMP vtmp);
19142   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19143   ins_encode %{
19144     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19145     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19146 
19147     int vlen = Matcher::vector_length(this);
19148     uint x_idx = $idx$$constant & right_n_bits(2);
19149     if (vlen == 8) {
19150       uint y_idx = ($idx$$constant >> 2) & 1;
19151       int vlen_enc = Assembler::AVX_256bit;
19152       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19153       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19154       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19155     } else {
19156       assert(vlen == 16, "sanity");
19157       uint y_idx = ($idx$$constant >> 2) & 3;
19158       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19159       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19160       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19161     }
19162   %}
19163   ins_pipe( pipe_slow );
19164 %}
19165 
19166 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19167   predicate(Matcher::vector_length(n) == 2);
19168   match(Set dst (VectorInsert (Binary dst val) idx));
19169   effect(TEMP tmp);
19170   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19171   ins_encode %{
19172     assert(UseSSE >= 4, "sanity");
19173     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19174     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19175 
19176     __ movq($tmp$$Register, $val$$XMMRegister);
19177     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19178   %}
19179   ins_pipe( pipe_slow );
19180 %}
19181 
19182 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19183   predicate(Matcher::vector_length(n) == 4);
19184   match(Set dst (VectorInsert (Binary src val) idx));
19185   effect(TEMP vtmp, TEMP tmp);
19186   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19187   ins_encode %{
19188     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19189     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19190 
19191     uint x_idx = $idx$$constant & right_n_bits(1);
19192     uint y_idx = ($idx$$constant >> 1) & 1;
19193     int vlen_enc = Assembler::AVX_256bit;
19194     __ movq($tmp$$Register, $val$$XMMRegister);
19195     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19196     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19197     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19198   %}
19199   ins_pipe( pipe_slow );
19200 %}
19201 
19202 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19203   predicate(Matcher::vector_length(n) == 8);
19204   match(Set dst (VectorInsert (Binary src val) idx));
19205   effect(TEMP tmp, TEMP vtmp);
19206   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19207   ins_encode %{
19208     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19209     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19210 
19211     uint x_idx = $idx$$constant & right_n_bits(1);
19212     uint y_idx = ($idx$$constant >> 1) & 3;
19213     __ movq($tmp$$Register, $val$$XMMRegister);
19214     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19215     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19216     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19217   %}
19218   ins_pipe( pipe_slow );
19219 %}
19220 
19221 // ====================REDUCTION ARITHMETIC=======================================
19222 
19223 // =======================Int Reduction==========================================
19224 
19225 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19226   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19227   match(Set dst (AddReductionVI src1 src2));
19228   match(Set dst (MulReductionVI src1 src2));
19229   match(Set dst (AndReductionV  src1 src2));
19230   match(Set dst ( OrReductionV  src1 src2));
19231   match(Set dst (XorReductionV  src1 src2));
19232   match(Set dst (MinReductionV  src1 src2));
19233   match(Set dst (MaxReductionV  src1 src2));
19234   effect(TEMP vtmp1, TEMP vtmp2);
19235   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19236   ins_encode %{
19237     int opcode = this->ideal_Opcode();
19238     int vlen = Matcher::vector_length(this, $src2);
19239     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19240   %}
19241   ins_pipe( pipe_slow );
19242 %}
19243 
19244 // =======================Long Reduction==========================================
19245 
19246 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19247   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19248   match(Set dst (AddReductionVL src1 src2));
19249   match(Set dst (MulReductionVL src1 src2));
19250   match(Set dst (AndReductionV  src1 src2));
19251   match(Set dst ( OrReductionV  src1 src2));
19252   match(Set dst (XorReductionV  src1 src2));
19253   match(Set dst (MinReductionV  src1 src2));
19254   match(Set dst (MaxReductionV  src1 src2));
19255   effect(TEMP vtmp1, TEMP vtmp2);
19256   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19257   ins_encode %{
19258     int opcode = this->ideal_Opcode();
19259     int vlen = Matcher::vector_length(this, $src2);
19260     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19261   %}
19262   ins_pipe( pipe_slow );
19263 %}
19264 
19265 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19266   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19267   match(Set dst (AddReductionVL src1 src2));
19268   match(Set dst (MulReductionVL src1 src2));
19269   match(Set dst (AndReductionV  src1 src2));
19270   match(Set dst ( OrReductionV  src1 src2));
19271   match(Set dst (XorReductionV  src1 src2));
19272   match(Set dst (MinReductionV  src1 src2));
19273   match(Set dst (MaxReductionV  src1 src2));
19274   effect(TEMP vtmp1, TEMP vtmp2);
19275   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19276   ins_encode %{
19277     int opcode = this->ideal_Opcode();
19278     int vlen = Matcher::vector_length(this, $src2);
19279     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19280   %}
19281   ins_pipe( pipe_slow );
19282 %}
19283 
19284 // =======================Float Reduction==========================================
19285 
19286 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19287   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19288   match(Set dst (AddReductionVF dst src));
19289   match(Set dst (MulReductionVF dst src));
19290   effect(TEMP dst, TEMP vtmp);
19291   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
19292   ins_encode %{
19293     int opcode = this->ideal_Opcode();
19294     int vlen = Matcher::vector_length(this, $src);
19295     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19296   %}
19297   ins_pipe( pipe_slow );
19298 %}
19299 
19300 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19301   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19302   match(Set dst (AddReductionVF dst src));
19303   match(Set dst (MulReductionVF dst src));
19304   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19305   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19306   ins_encode %{
19307     int opcode = this->ideal_Opcode();
19308     int vlen = Matcher::vector_length(this, $src);
19309     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19310   %}
19311   ins_pipe( pipe_slow );
19312 %}
19313 
19314 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19315   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19316   match(Set dst (AddReductionVF dst src));
19317   match(Set dst (MulReductionVF dst src));
19318   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19319   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19320   ins_encode %{
19321     int opcode = this->ideal_Opcode();
19322     int vlen = Matcher::vector_length(this, $src);
19323     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19324   %}
19325   ins_pipe( pipe_slow );
19326 %}
19327 
19328 
19329 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19330   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19331   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19332   // src1 contains reduction identity
19333   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19334   match(Set dst (AddReductionVF src1 src2));
19335   match(Set dst (MulReductionVF src1 src2));
19336   effect(TEMP dst);
19337   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
19338   ins_encode %{
19339     int opcode = this->ideal_Opcode();
19340     int vlen = Matcher::vector_length(this, $src2);
19341     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19342   %}
19343   ins_pipe( pipe_slow );
19344 %}
19345 
19346 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19347   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19348   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19349   // src1 contains reduction identity
19350   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19351   match(Set dst (AddReductionVF src1 src2));
19352   match(Set dst (MulReductionVF src1 src2));
19353   effect(TEMP dst, TEMP vtmp);
19354   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19355   ins_encode %{
19356     int opcode = this->ideal_Opcode();
19357     int vlen = Matcher::vector_length(this, $src2);
19358     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19359   %}
19360   ins_pipe( pipe_slow );
19361 %}
19362 
19363 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19364   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19365   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19366   // src1 contains reduction identity
19367   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19368   match(Set dst (AddReductionVF src1 src2));
19369   match(Set dst (MulReductionVF src1 src2));
19370   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19371   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19372   ins_encode %{
19373     int opcode = this->ideal_Opcode();
19374     int vlen = Matcher::vector_length(this, $src2);
19375     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19376   %}
19377   ins_pipe( pipe_slow );
19378 %}
19379 
19380 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19381   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19382   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19383   // src1 contains reduction identity
19384   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19385   match(Set dst (AddReductionVF src1 src2));
19386   match(Set dst (MulReductionVF src1 src2));
19387   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19388   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19389   ins_encode %{
19390     int opcode = this->ideal_Opcode();
19391     int vlen = Matcher::vector_length(this, $src2);
19392     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19393   %}
19394   ins_pipe( pipe_slow );
19395 %}
19396 
19397 // =======================Double Reduction==========================================
19398 
19399 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19400   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19401   match(Set dst (AddReductionVD dst src));
19402   match(Set dst (MulReductionVD dst src));
19403   effect(TEMP dst, TEMP vtmp);
19404   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19405   ins_encode %{
19406     int opcode = this->ideal_Opcode();
19407     int vlen = Matcher::vector_length(this, $src);
19408     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19409 %}
19410   ins_pipe( pipe_slow );
19411 %}
19412 
19413 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19414   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19415   match(Set dst (AddReductionVD dst src));
19416   match(Set dst (MulReductionVD dst src));
19417   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19418   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19419   ins_encode %{
19420     int opcode = this->ideal_Opcode();
19421     int vlen = Matcher::vector_length(this, $src);
19422     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19423   %}
19424   ins_pipe( pipe_slow );
19425 %}
19426 
19427 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19428   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19429   match(Set dst (AddReductionVD dst src));
19430   match(Set dst (MulReductionVD dst src));
19431   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19432   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19433   ins_encode %{
19434     int opcode = this->ideal_Opcode();
19435     int vlen = Matcher::vector_length(this, $src);
19436     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19437   %}
19438   ins_pipe( pipe_slow );
19439 %}
19440 
19441 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19442   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19443   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19444   // src1 contains reduction identity
19445   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19446   match(Set dst (AddReductionVD src1 src2));
19447   match(Set dst (MulReductionVD src1 src2));
19448   effect(TEMP dst);
19449   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19450   ins_encode %{
19451     int opcode = this->ideal_Opcode();
19452     int vlen = Matcher::vector_length(this, $src2);
19453     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19454 %}
19455   ins_pipe( pipe_slow );
19456 %}
19457 
19458 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19459   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19460   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19461   // src1 contains reduction identity
19462   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19463   match(Set dst (AddReductionVD src1 src2));
19464   match(Set dst (MulReductionVD src1 src2));
19465   effect(TEMP dst, TEMP vtmp);
19466   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19467   ins_encode %{
19468     int opcode = this->ideal_Opcode();
19469     int vlen = Matcher::vector_length(this, $src2);
19470     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19471   %}
19472   ins_pipe( pipe_slow );
19473 %}
19474 
19475 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19476   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19477   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19478   // src1 contains reduction identity
19479   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19480   match(Set dst (AddReductionVD src1 src2));
19481   match(Set dst (MulReductionVD src1 src2));
19482   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19483   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19484   ins_encode %{
19485     int opcode = this->ideal_Opcode();
19486     int vlen = Matcher::vector_length(this, $src2);
19487     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19488   %}
19489   ins_pipe( pipe_slow );
19490 %}
19491 
19492 // =======================Byte Reduction==========================================
19493 
19494 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19495   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19496   match(Set dst (AddReductionVI src1 src2));
19497   match(Set dst (AndReductionV  src1 src2));
19498   match(Set dst ( OrReductionV  src1 src2));
19499   match(Set dst (XorReductionV  src1 src2));
19500   match(Set dst (MinReductionV  src1 src2));
19501   match(Set dst (MaxReductionV  src1 src2));
19502   effect(TEMP vtmp1, TEMP vtmp2);
19503   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19504   ins_encode %{
19505     int opcode = this->ideal_Opcode();
19506     int vlen = Matcher::vector_length(this, $src2);
19507     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19508   %}
19509   ins_pipe( pipe_slow );
19510 %}
19511 
19512 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19513   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19514   match(Set dst (AddReductionVI src1 src2));
19515   match(Set dst (AndReductionV  src1 src2));
19516   match(Set dst ( OrReductionV  src1 src2));
19517   match(Set dst (XorReductionV  src1 src2));
19518   match(Set dst (MinReductionV  src1 src2));
19519   match(Set dst (MaxReductionV  src1 src2));
19520   effect(TEMP vtmp1, TEMP vtmp2);
19521   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19522   ins_encode %{
19523     int opcode = this->ideal_Opcode();
19524     int vlen = Matcher::vector_length(this, $src2);
19525     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19526   %}
19527   ins_pipe( pipe_slow );
19528 %}
19529 
19530 // =======================Short Reduction==========================================
19531 
19532 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19533   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19534   match(Set dst (AddReductionVI src1 src2));
19535   match(Set dst (MulReductionVI src1 src2));
19536   match(Set dst (AndReductionV  src1 src2));
19537   match(Set dst ( OrReductionV  src1 src2));
19538   match(Set dst (XorReductionV  src1 src2));
19539   match(Set dst (MinReductionV  src1 src2));
19540   match(Set dst (MaxReductionV  src1 src2));
19541   effect(TEMP vtmp1, TEMP vtmp2);
19542   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19543   ins_encode %{
19544     int opcode = this->ideal_Opcode();
19545     int vlen = Matcher::vector_length(this, $src2);
19546     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19547   %}
19548   ins_pipe( pipe_slow );
19549 %}
19550 
19551 // =======================Mul Reduction==========================================
19552 
19553 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19554   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19555             Matcher::vector_length(n->in(2)) <= 32); // src2
19556   match(Set dst (MulReductionVI src1 src2));
19557   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19558   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19559   ins_encode %{
19560     int opcode = this->ideal_Opcode();
19561     int vlen = Matcher::vector_length(this, $src2);
19562     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19563   %}
19564   ins_pipe( pipe_slow );
19565 %}
19566 
19567 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19568   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19569             Matcher::vector_length(n->in(2)) == 64); // src2
19570   match(Set dst (MulReductionVI src1 src2));
19571   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19572   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19573   ins_encode %{
19574     int opcode = this->ideal_Opcode();
19575     int vlen = Matcher::vector_length(this, $src2);
19576     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19577   %}
19578   ins_pipe( pipe_slow );
19579 %}
19580 
19581 //--------------------Min/Max Float Reduction --------------------
19582 // Float Min Reduction
19583 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19584                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19585   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19586             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19587              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19588             Matcher::vector_length(n->in(2)) == 2);
19589   match(Set dst (MinReductionV src1 src2));
19590   match(Set dst (MaxReductionV src1 src2));
19591   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19592   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19593   ins_encode %{
19594     assert(UseAVX > 0, "sanity");
19595 
19596     int opcode = this->ideal_Opcode();
19597     int vlen = Matcher::vector_length(this, $src2);
19598     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19599                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19600   %}
19601   ins_pipe( pipe_slow );
19602 %}
19603 
19604 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19605                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19606   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19607             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19608              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19609             Matcher::vector_length(n->in(2)) >= 4);
19610   match(Set dst (MinReductionV src1 src2));
19611   match(Set dst (MaxReductionV src1 src2));
19612   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19613   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19614   ins_encode %{
19615     assert(UseAVX > 0, "sanity");
19616 
19617     int opcode = this->ideal_Opcode();
19618     int vlen = Matcher::vector_length(this, $src2);
19619     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19620                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19621   %}
19622   ins_pipe( pipe_slow );
19623 %}
19624 
19625 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19626                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19627   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19628             Matcher::vector_length(n->in(2)) == 2);
19629   match(Set dst (MinReductionV dst src));
19630   match(Set dst (MaxReductionV dst src));
19631   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19632   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19633   ins_encode %{
19634     assert(UseAVX > 0, "sanity");
19635 
19636     int opcode = this->ideal_Opcode();
19637     int vlen = Matcher::vector_length(this, $src);
19638     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19639                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19640   %}
19641   ins_pipe( pipe_slow );
19642 %}
19643 
19644 
19645 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19646                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19647   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19648             Matcher::vector_length(n->in(2)) >= 4);
19649   match(Set dst (MinReductionV dst src));
19650   match(Set dst (MaxReductionV dst src));
19651   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19652   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19653   ins_encode %{
19654     assert(UseAVX > 0, "sanity");
19655 
19656     int opcode = this->ideal_Opcode();
19657     int vlen = Matcher::vector_length(this, $src);
19658     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19659                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19660   %}
19661   ins_pipe( pipe_slow );
19662 %}
19663 
19664 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19665   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19666             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19667              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19668             Matcher::vector_length(n->in(2)) == 2);
19669   match(Set dst (MinReductionV src1 src2));
19670   match(Set dst (MaxReductionV src1 src2));
19671   effect(TEMP dst, TEMP xtmp1);
19672   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19673   ins_encode %{
19674     int opcode = this->ideal_Opcode();
19675     int vlen = Matcher::vector_length(this, $src2);
19676     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19677                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19678   %}
19679   ins_pipe( pipe_slow );
19680 %}
19681 
19682 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19683   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19684             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19685              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19686             Matcher::vector_length(n->in(2)) >= 4);
19687   match(Set dst (MinReductionV src1 src2));
19688   match(Set dst (MaxReductionV src1 src2));
19689   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19690   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19691   ins_encode %{
19692     int opcode = this->ideal_Opcode();
19693     int vlen = Matcher::vector_length(this, $src2);
19694     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19695                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19696   %}
19697   ins_pipe( pipe_slow );
19698 %}
19699 
19700 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19701   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19702             Matcher::vector_length(n->in(2)) == 2);
19703   match(Set dst (MinReductionV dst src));
19704   match(Set dst (MaxReductionV dst src));
19705   effect(TEMP dst, TEMP xtmp1);
19706   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19707   ins_encode %{
19708     int opcode = this->ideal_Opcode();
19709     int vlen = Matcher::vector_length(this, $src);
19710     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19711                          $xtmp1$$XMMRegister);
19712   %}
19713   ins_pipe( pipe_slow );
19714 %}
19715 
19716 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19717   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19718             Matcher::vector_length(n->in(2)) >= 4);
19719   match(Set dst (MinReductionV dst src));
19720   match(Set dst (MaxReductionV dst src));
19721   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19722   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19723   ins_encode %{
19724     int opcode = this->ideal_Opcode();
19725     int vlen = Matcher::vector_length(this, $src);
19726     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19727                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19728   %}
19729   ins_pipe( pipe_slow );
19730 %}
19731 
19732 //--------------------Min Double Reduction --------------------
19733 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19734                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19735   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19736             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19737              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19738             Matcher::vector_length(n->in(2)) == 2);
19739   match(Set dst (MinReductionV src1 src2));
19740   match(Set dst (MaxReductionV src1 src2));
19741   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19742   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19743   ins_encode %{
19744     assert(UseAVX > 0, "sanity");
19745 
19746     int opcode = this->ideal_Opcode();
19747     int vlen = Matcher::vector_length(this, $src2);
19748     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19749                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19750   %}
19751   ins_pipe( pipe_slow );
19752 %}
19753 
19754 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19755                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19756   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19757             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19758              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19759             Matcher::vector_length(n->in(2)) >= 4);
19760   match(Set dst (MinReductionV src1 src2));
19761   match(Set dst (MaxReductionV src1 src2));
19762   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19763   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19764   ins_encode %{
19765     assert(UseAVX > 0, "sanity");
19766 
19767     int opcode = this->ideal_Opcode();
19768     int vlen = Matcher::vector_length(this, $src2);
19769     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19770                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19771   %}
19772   ins_pipe( pipe_slow );
19773 %}
19774 
19775 
19776 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19777                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19778   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19779             Matcher::vector_length(n->in(2)) == 2);
19780   match(Set dst (MinReductionV dst src));
19781   match(Set dst (MaxReductionV dst src));
19782   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19783   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19784   ins_encode %{
19785     assert(UseAVX > 0, "sanity");
19786 
19787     int opcode = this->ideal_Opcode();
19788     int vlen = Matcher::vector_length(this, $src);
19789     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19790                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19791   %}
19792   ins_pipe( pipe_slow );
19793 %}
19794 
19795 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19796                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19797   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19798             Matcher::vector_length(n->in(2)) >= 4);
19799   match(Set dst (MinReductionV dst src));
19800   match(Set dst (MaxReductionV dst src));
19801   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19802   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19803   ins_encode %{
19804     assert(UseAVX > 0, "sanity");
19805 
19806     int opcode = this->ideal_Opcode();
19807     int vlen = Matcher::vector_length(this, $src);
19808     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19809                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19810   %}
19811   ins_pipe( pipe_slow );
19812 %}
19813 
19814 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19815   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19816             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19817              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19818             Matcher::vector_length(n->in(2)) == 2);
19819   match(Set dst (MinReductionV src1 src2));
19820   match(Set dst (MaxReductionV src1 src2));
19821   effect(TEMP dst, TEMP xtmp1);
19822   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19823   ins_encode %{
19824     int opcode = this->ideal_Opcode();
19825     int vlen = Matcher::vector_length(this, $src2);
19826     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
19827                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
19828   %}
19829   ins_pipe( pipe_slow );
19830 %}
19831 
19832 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
19833   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19834             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19835              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19836             Matcher::vector_length(n->in(2)) >= 4);
19837   match(Set dst (MinReductionV src1 src2));
19838   match(Set dst (MaxReductionV src1 src2));
19839   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19840   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
19841   ins_encode %{
19842     int opcode = this->ideal_Opcode();
19843     int vlen = Matcher::vector_length(this, $src2);
19844     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19845                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19846   %}
19847   ins_pipe( pipe_slow );
19848 %}
19849 
19850 
19851 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
19852   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19853             Matcher::vector_length(n->in(2)) == 2);
19854   match(Set dst (MinReductionV dst src));
19855   match(Set dst (MaxReductionV dst src));
19856   effect(TEMP dst, TEMP xtmp1);
19857   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
19858   ins_encode %{
19859     int opcode = this->ideal_Opcode();
19860     int vlen = Matcher::vector_length(this, $src);
19861     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19862                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19863   %}
19864   ins_pipe( pipe_slow );
19865 %}
19866 
19867 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
19868   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19869             Matcher::vector_length(n->in(2)) >= 4);
19870   match(Set dst (MinReductionV dst src));
19871   match(Set dst (MaxReductionV dst src));
19872   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19873   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
19874   ins_encode %{
19875     int opcode = this->ideal_Opcode();
19876     int vlen = Matcher::vector_length(this, $src);
19877     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19878                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19879   %}
19880   ins_pipe( pipe_slow );
19881 %}
19882 
19883 // ====================VECTOR ARITHMETIC=======================================
19884 
19885 // --------------------------------- ADD --------------------------------------
19886 
19887 // Bytes vector add
19888 instruct vaddB(vec dst, vec src) %{
19889   predicate(UseAVX == 0);
19890   match(Set dst (AddVB dst src));
19891   format %{ "paddb   $dst,$src\t! add packedB" %}
19892   ins_encode %{
19893     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
19894   %}
19895   ins_pipe( pipe_slow );
19896 %}
19897 
19898 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
19899   predicate(UseAVX > 0);
19900   match(Set dst (AddVB src1 src2));
19901   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
19902   ins_encode %{
19903     int vlen_enc = vector_length_encoding(this);
19904     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19905   %}
19906   ins_pipe( pipe_slow );
19907 %}
19908 
19909 instruct vaddB_mem(vec dst, vec src, memory mem) %{
19910   predicate((UseAVX > 0) &&
19911             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19912   match(Set dst (AddVB src (LoadVector mem)));
19913   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
19914   ins_encode %{
19915     int vlen_enc = vector_length_encoding(this);
19916     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19917   %}
19918   ins_pipe( pipe_slow );
19919 %}
19920 
19921 // Shorts/Chars vector add
19922 instruct vaddS(vec dst, vec src) %{
19923   predicate(UseAVX == 0);
19924   match(Set dst (AddVS dst src));
19925   format %{ "paddw   $dst,$src\t! add packedS" %}
19926   ins_encode %{
19927     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
19928   %}
19929   ins_pipe( pipe_slow );
19930 %}
19931 
19932 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
19933   predicate(UseAVX > 0);
19934   match(Set dst (AddVS src1 src2));
19935   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
19936   ins_encode %{
19937     int vlen_enc = vector_length_encoding(this);
19938     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19939   %}
19940   ins_pipe( pipe_slow );
19941 %}
19942 
19943 instruct vaddS_mem(vec dst, vec src, memory mem) %{
19944   predicate((UseAVX > 0) &&
19945             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19946   match(Set dst (AddVS src (LoadVector mem)));
19947   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
19948   ins_encode %{
19949     int vlen_enc = vector_length_encoding(this);
19950     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19951   %}
19952   ins_pipe( pipe_slow );
19953 %}
19954 
19955 // Integers vector add
19956 instruct vaddI(vec dst, vec src) %{
19957   predicate(UseAVX == 0);
19958   match(Set dst (AddVI dst src));
19959   format %{ "paddd   $dst,$src\t! add packedI" %}
19960   ins_encode %{
19961     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
19962   %}
19963   ins_pipe( pipe_slow );
19964 %}
19965 
19966 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
19967   predicate(UseAVX > 0);
19968   match(Set dst (AddVI src1 src2));
19969   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
19970   ins_encode %{
19971     int vlen_enc = vector_length_encoding(this);
19972     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19973   %}
19974   ins_pipe( pipe_slow );
19975 %}
19976 
19977 
19978 instruct vaddI_mem(vec dst, vec src, memory mem) %{
19979   predicate((UseAVX > 0) &&
19980             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19981   match(Set dst (AddVI src (LoadVector mem)));
19982   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
19983   ins_encode %{
19984     int vlen_enc = vector_length_encoding(this);
19985     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19986   %}
19987   ins_pipe( pipe_slow );
19988 %}
19989 
19990 // Longs vector add
19991 instruct vaddL(vec dst, vec src) %{
19992   predicate(UseAVX == 0);
19993   match(Set dst (AddVL dst src));
19994   format %{ "paddq   $dst,$src\t! add packedL" %}
19995   ins_encode %{
19996     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
19997   %}
19998   ins_pipe( pipe_slow );
19999 %}
20000 
20001 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20002   predicate(UseAVX > 0);
20003   match(Set dst (AddVL src1 src2));
20004   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
20005   ins_encode %{
20006     int vlen_enc = vector_length_encoding(this);
20007     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20008   %}
20009   ins_pipe( pipe_slow );
20010 %}
20011 
20012 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20013   predicate((UseAVX > 0) &&
20014             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20015   match(Set dst (AddVL src (LoadVector mem)));
20016   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
20017   ins_encode %{
20018     int vlen_enc = vector_length_encoding(this);
20019     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20020   %}
20021   ins_pipe( pipe_slow );
20022 %}
20023 
20024 // Floats vector add
20025 instruct vaddF(vec dst, vec src) %{
20026   predicate(UseAVX == 0);
20027   match(Set dst (AddVF dst src));
20028   format %{ "addps   $dst,$src\t! add packedF" %}
20029   ins_encode %{
20030     __ addps($dst$$XMMRegister, $src$$XMMRegister);
20031   %}
20032   ins_pipe( pipe_slow );
20033 %}
20034 
20035 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20036   predicate(UseAVX > 0);
20037   match(Set dst (AddVF src1 src2));
20038   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
20039   ins_encode %{
20040     int vlen_enc = vector_length_encoding(this);
20041     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20042   %}
20043   ins_pipe( pipe_slow );
20044 %}
20045 
20046 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20047   predicate((UseAVX > 0) &&
20048             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20049   match(Set dst (AddVF src (LoadVector mem)));
20050   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
20051   ins_encode %{
20052     int vlen_enc = vector_length_encoding(this);
20053     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20054   %}
20055   ins_pipe( pipe_slow );
20056 %}
20057 
20058 // Doubles vector add
20059 instruct vaddD(vec dst, vec src) %{
20060   predicate(UseAVX == 0);
20061   match(Set dst (AddVD dst src));
20062   format %{ "addpd   $dst,$src\t! add packedD" %}
20063   ins_encode %{
20064     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20065   %}
20066   ins_pipe( pipe_slow );
20067 %}
20068 
20069 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20070   predicate(UseAVX > 0);
20071   match(Set dst (AddVD src1 src2));
20072   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
20073   ins_encode %{
20074     int vlen_enc = vector_length_encoding(this);
20075     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20076   %}
20077   ins_pipe( pipe_slow );
20078 %}
20079 
20080 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20081   predicate((UseAVX > 0) &&
20082             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20083   match(Set dst (AddVD src (LoadVector mem)));
20084   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
20085   ins_encode %{
20086     int vlen_enc = vector_length_encoding(this);
20087     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20088   %}
20089   ins_pipe( pipe_slow );
20090 %}
20091 
20092 // --------------------------------- SUB --------------------------------------
20093 
20094 // Bytes vector sub
20095 instruct vsubB(vec dst, vec src) %{
20096   predicate(UseAVX == 0);
20097   match(Set dst (SubVB dst src));
20098   format %{ "psubb   $dst,$src\t! sub packedB" %}
20099   ins_encode %{
20100     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20101   %}
20102   ins_pipe( pipe_slow );
20103 %}
20104 
20105 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20106   predicate(UseAVX > 0);
20107   match(Set dst (SubVB src1 src2));
20108   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
20109   ins_encode %{
20110     int vlen_enc = vector_length_encoding(this);
20111     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20112   %}
20113   ins_pipe( pipe_slow );
20114 %}
20115 
20116 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20117   predicate((UseAVX > 0) &&
20118             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20119   match(Set dst (SubVB src (LoadVector mem)));
20120   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
20121   ins_encode %{
20122     int vlen_enc = vector_length_encoding(this);
20123     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20124   %}
20125   ins_pipe( pipe_slow );
20126 %}
20127 
20128 // Shorts/Chars vector sub
20129 instruct vsubS(vec dst, vec src) %{
20130   predicate(UseAVX == 0);
20131   match(Set dst (SubVS dst src));
20132   format %{ "psubw   $dst,$src\t! sub packedS" %}
20133   ins_encode %{
20134     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20135   %}
20136   ins_pipe( pipe_slow );
20137 %}
20138 
20139 
20140 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20141   predicate(UseAVX > 0);
20142   match(Set dst (SubVS src1 src2));
20143   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
20144   ins_encode %{
20145     int vlen_enc = vector_length_encoding(this);
20146     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20147   %}
20148   ins_pipe( pipe_slow );
20149 %}
20150 
20151 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20152   predicate((UseAVX > 0) &&
20153             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20154   match(Set dst (SubVS src (LoadVector mem)));
20155   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
20156   ins_encode %{
20157     int vlen_enc = vector_length_encoding(this);
20158     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20159   %}
20160   ins_pipe( pipe_slow );
20161 %}
20162 
20163 // Integers vector sub
20164 instruct vsubI(vec dst, vec src) %{
20165   predicate(UseAVX == 0);
20166   match(Set dst (SubVI dst src));
20167   format %{ "psubd   $dst,$src\t! sub packedI" %}
20168   ins_encode %{
20169     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20170   %}
20171   ins_pipe( pipe_slow );
20172 %}
20173 
20174 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20175   predicate(UseAVX > 0);
20176   match(Set dst (SubVI src1 src2));
20177   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
20178   ins_encode %{
20179     int vlen_enc = vector_length_encoding(this);
20180     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20181   %}
20182   ins_pipe( pipe_slow );
20183 %}
20184 
20185 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20186   predicate((UseAVX > 0) &&
20187             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20188   match(Set dst (SubVI src (LoadVector mem)));
20189   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
20190   ins_encode %{
20191     int vlen_enc = vector_length_encoding(this);
20192     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20193   %}
20194   ins_pipe( pipe_slow );
20195 %}
20196 
20197 // Longs vector sub
20198 instruct vsubL(vec dst, vec src) %{
20199   predicate(UseAVX == 0);
20200   match(Set dst (SubVL dst src));
20201   format %{ "psubq   $dst,$src\t! sub packedL" %}
20202   ins_encode %{
20203     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20204   %}
20205   ins_pipe( pipe_slow );
20206 %}
20207 
20208 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20209   predicate(UseAVX > 0);
20210   match(Set dst (SubVL src1 src2));
20211   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
20212   ins_encode %{
20213     int vlen_enc = vector_length_encoding(this);
20214     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20215   %}
20216   ins_pipe( pipe_slow );
20217 %}
20218 
20219 
20220 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20221   predicate((UseAVX > 0) &&
20222             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20223   match(Set dst (SubVL src (LoadVector mem)));
20224   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
20225   ins_encode %{
20226     int vlen_enc = vector_length_encoding(this);
20227     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20228   %}
20229   ins_pipe( pipe_slow );
20230 %}
20231 
20232 // Floats vector sub
20233 instruct vsubF(vec dst, vec src) %{
20234   predicate(UseAVX == 0);
20235   match(Set dst (SubVF dst src));
20236   format %{ "subps   $dst,$src\t! sub packedF" %}
20237   ins_encode %{
20238     __ subps($dst$$XMMRegister, $src$$XMMRegister);
20239   %}
20240   ins_pipe( pipe_slow );
20241 %}
20242 
20243 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20244   predicate(UseAVX > 0);
20245   match(Set dst (SubVF src1 src2));
20246   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
20247   ins_encode %{
20248     int vlen_enc = vector_length_encoding(this);
20249     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20250   %}
20251   ins_pipe( pipe_slow );
20252 %}
20253 
20254 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20255   predicate((UseAVX > 0) &&
20256             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20257   match(Set dst (SubVF src (LoadVector mem)));
20258   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
20259   ins_encode %{
20260     int vlen_enc = vector_length_encoding(this);
20261     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20262   %}
20263   ins_pipe( pipe_slow );
20264 %}
20265 
20266 // Doubles vector sub
20267 instruct vsubD(vec dst, vec src) %{
20268   predicate(UseAVX == 0);
20269   match(Set dst (SubVD dst src));
20270   format %{ "subpd   $dst,$src\t! sub packedD" %}
20271   ins_encode %{
20272     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20273   %}
20274   ins_pipe( pipe_slow );
20275 %}
20276 
20277 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20278   predicate(UseAVX > 0);
20279   match(Set dst (SubVD src1 src2));
20280   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
20281   ins_encode %{
20282     int vlen_enc = vector_length_encoding(this);
20283     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20284   %}
20285   ins_pipe( pipe_slow );
20286 %}
20287 
20288 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20289   predicate((UseAVX > 0) &&
20290             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20291   match(Set dst (SubVD src (LoadVector mem)));
20292   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
20293   ins_encode %{
20294     int vlen_enc = vector_length_encoding(this);
20295     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20296   %}
20297   ins_pipe( pipe_slow );
20298 %}
20299 
20300 // --------------------------------- MUL --------------------------------------
20301 
20302 // Byte vector mul
20303 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20304   predicate(Matcher::vector_length_in_bytes(n) <= 8);
20305   match(Set dst (MulVB src1 src2));
20306   effect(TEMP dst, TEMP xtmp);
20307   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20308   ins_encode %{
20309     assert(UseSSE > 3, "required");
20310     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20311     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20312     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20313     __ psllw($dst$$XMMRegister, 8);
20314     __ psrlw($dst$$XMMRegister, 8);
20315     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20316   %}
20317   ins_pipe( pipe_slow );
20318 %}
20319 
20320 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20321   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20322   match(Set dst (MulVB src1 src2));
20323   effect(TEMP dst, TEMP xtmp);
20324   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20325   ins_encode %{
20326     assert(UseSSE > 3, "required");
20327     // Odd-index elements
20328     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20329     __ psrlw($dst$$XMMRegister, 8);
20330     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20331     __ psrlw($xtmp$$XMMRegister, 8);
20332     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20333     __ psllw($dst$$XMMRegister, 8);
20334     // Even-index elements
20335     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20336     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20337     __ psllw($xtmp$$XMMRegister, 8);
20338     __ psrlw($xtmp$$XMMRegister, 8);
20339     // Combine
20340     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20341   %}
20342   ins_pipe( pipe_slow );
20343 %}
20344 
20345 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20346   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20347   match(Set dst (MulVB src1 src2));
20348   effect(TEMP xtmp1, TEMP xtmp2);
20349   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20350   ins_encode %{
20351     int vlen_enc = vector_length_encoding(this);
20352     // Odd-index elements
20353     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20354     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20355     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20356     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20357     // Even-index elements
20358     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20359     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20360     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20361     // Combine
20362     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20363   %}
20364   ins_pipe( pipe_slow );
20365 %}
20366 
20367 // Shorts/Chars vector mul
20368 instruct vmulS(vec dst, vec src) %{
20369   predicate(UseAVX == 0);
20370   match(Set dst (MulVS dst src));
20371   format %{ "pmullw  $dst,$src\t! mul packedS" %}
20372   ins_encode %{
20373     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20374   %}
20375   ins_pipe( pipe_slow );
20376 %}
20377 
20378 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20379   predicate(UseAVX > 0);
20380   match(Set dst (MulVS src1 src2));
20381   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20382   ins_encode %{
20383     int vlen_enc = vector_length_encoding(this);
20384     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20385   %}
20386   ins_pipe( pipe_slow );
20387 %}
20388 
20389 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20390   predicate((UseAVX > 0) &&
20391             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20392   match(Set dst (MulVS src (LoadVector mem)));
20393   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20394   ins_encode %{
20395     int vlen_enc = vector_length_encoding(this);
20396     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20397   %}
20398   ins_pipe( pipe_slow );
20399 %}
20400 
20401 // Integers vector mul
20402 instruct vmulI(vec dst, vec src) %{
20403   predicate(UseAVX == 0);
20404   match(Set dst (MulVI dst src));
20405   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20406   ins_encode %{
20407     assert(UseSSE > 3, "required");
20408     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20409   %}
20410   ins_pipe( pipe_slow );
20411 %}
20412 
20413 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20414   predicate(UseAVX > 0);
20415   match(Set dst (MulVI src1 src2));
20416   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20417   ins_encode %{
20418     int vlen_enc = vector_length_encoding(this);
20419     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20420   %}
20421   ins_pipe( pipe_slow );
20422 %}
20423 
20424 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20425   predicate((UseAVX > 0) &&
20426             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20427   match(Set dst (MulVI src (LoadVector mem)));
20428   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20429   ins_encode %{
20430     int vlen_enc = vector_length_encoding(this);
20431     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20432   %}
20433   ins_pipe( pipe_slow );
20434 %}
20435 
20436 // Longs vector mul
20437 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20438   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20439              VM_Version::supports_avx512dq()) ||
20440             VM_Version::supports_avx512vldq());
20441   match(Set dst (MulVL src1 src2));
20442   ins_cost(500);
20443   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20444   ins_encode %{
20445     assert(UseAVX > 2, "required");
20446     int vlen_enc = vector_length_encoding(this);
20447     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20448   %}
20449   ins_pipe( pipe_slow );
20450 %}
20451 
20452 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20453   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20454              VM_Version::supports_avx512dq()) ||
20455             (Matcher::vector_length_in_bytes(n) > 8 &&
20456              VM_Version::supports_avx512vldq()));
20457   match(Set dst (MulVL src (LoadVector mem)));
20458   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20459   ins_cost(500);
20460   ins_encode %{
20461     assert(UseAVX > 2, "required");
20462     int vlen_enc = vector_length_encoding(this);
20463     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20464   %}
20465   ins_pipe( pipe_slow );
20466 %}
20467 
20468 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20469   predicate(UseAVX == 0);
20470   match(Set dst (MulVL src1 src2));
20471   ins_cost(500);
20472   effect(TEMP dst, TEMP xtmp);
20473   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20474   ins_encode %{
20475     assert(VM_Version::supports_sse4_1(), "required");
20476     // Get the lo-hi products, only the lower 32 bits is in concerns
20477     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20478     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20479     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20480     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20481     __ psllq($dst$$XMMRegister, 32);
20482     // Get the lo-lo products
20483     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20484     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20485     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20486   %}
20487   ins_pipe( pipe_slow );
20488 %}
20489 
20490 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20491   predicate(UseAVX > 0 &&
20492             ((Matcher::vector_length_in_bytes(n) == 64 &&
20493               !VM_Version::supports_avx512dq()) ||
20494              (Matcher::vector_length_in_bytes(n) < 64 &&
20495               !VM_Version::supports_avx512vldq())));
20496   match(Set dst (MulVL src1 src2));
20497   effect(TEMP xtmp1, TEMP xtmp2);
20498   ins_cost(500);
20499   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20500   ins_encode %{
20501     int vlen_enc = vector_length_encoding(this);
20502     // Get the lo-hi products, only the lower 32 bits is in concerns
20503     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20504     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20505     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20506     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20507     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20508     // Get the lo-lo products
20509     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20510     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20511   %}
20512   ins_pipe( pipe_slow );
20513 %}
20514 
20515 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20516   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20517   match(Set dst (MulVL src1 src2));
20518   ins_cost(100);
20519   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20520   ins_encode %{
20521     int vlen_enc = vector_length_encoding(this);
20522     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20523   %}
20524   ins_pipe( pipe_slow );
20525 %}
20526 
20527 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20528   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20529   match(Set dst (MulVL src1 src2));
20530   ins_cost(100);
20531   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20532   ins_encode %{
20533     int vlen_enc = vector_length_encoding(this);
20534     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20535   %}
20536   ins_pipe( pipe_slow );
20537 %}
20538 
20539 // Floats vector mul
20540 instruct vmulF(vec dst, vec src) %{
20541   predicate(UseAVX == 0);
20542   match(Set dst (MulVF dst src));
20543   format %{ "mulps   $dst,$src\t! mul packedF" %}
20544   ins_encode %{
20545     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20546   %}
20547   ins_pipe( pipe_slow );
20548 %}
20549 
20550 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20551   predicate(UseAVX > 0);
20552   match(Set dst (MulVF src1 src2));
20553   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
20554   ins_encode %{
20555     int vlen_enc = vector_length_encoding(this);
20556     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20557   %}
20558   ins_pipe( pipe_slow );
20559 %}
20560 
20561 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20562   predicate((UseAVX > 0) &&
20563             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20564   match(Set dst (MulVF src (LoadVector mem)));
20565   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
20566   ins_encode %{
20567     int vlen_enc = vector_length_encoding(this);
20568     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20569   %}
20570   ins_pipe( pipe_slow );
20571 %}
20572 
20573 // Doubles vector mul
20574 instruct vmulD(vec dst, vec src) %{
20575   predicate(UseAVX == 0);
20576   match(Set dst (MulVD dst src));
20577   format %{ "mulpd   $dst,$src\t! mul packedD" %}
20578   ins_encode %{
20579     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20580   %}
20581   ins_pipe( pipe_slow );
20582 %}
20583 
20584 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20585   predicate(UseAVX > 0);
20586   match(Set dst (MulVD src1 src2));
20587   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
20588   ins_encode %{
20589     int vlen_enc = vector_length_encoding(this);
20590     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20591   %}
20592   ins_pipe( pipe_slow );
20593 %}
20594 
20595 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20596   predicate((UseAVX > 0) &&
20597             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20598   match(Set dst (MulVD src (LoadVector mem)));
20599   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
20600   ins_encode %{
20601     int vlen_enc = vector_length_encoding(this);
20602     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20603   %}
20604   ins_pipe( pipe_slow );
20605 %}
20606 
20607 // --------------------------------- DIV --------------------------------------
20608 
20609 // Floats vector div
20610 instruct vdivF(vec dst, vec src) %{
20611   predicate(UseAVX == 0);
20612   match(Set dst (DivVF dst src));
20613   format %{ "divps   $dst,$src\t! div packedF" %}
20614   ins_encode %{
20615     __ divps($dst$$XMMRegister, $src$$XMMRegister);
20616   %}
20617   ins_pipe( pipe_slow );
20618 %}
20619 
20620 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20621   predicate(UseAVX > 0);
20622   match(Set dst (DivVF src1 src2));
20623   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
20624   ins_encode %{
20625     int vlen_enc = vector_length_encoding(this);
20626     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20627   %}
20628   ins_pipe( pipe_slow );
20629 %}
20630 
20631 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20632   predicate((UseAVX > 0) &&
20633             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20634   match(Set dst (DivVF src (LoadVector mem)));
20635   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
20636   ins_encode %{
20637     int vlen_enc = vector_length_encoding(this);
20638     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20639   %}
20640   ins_pipe( pipe_slow );
20641 %}
20642 
20643 // Doubles vector div
20644 instruct vdivD(vec dst, vec src) %{
20645   predicate(UseAVX == 0);
20646   match(Set dst (DivVD dst src));
20647   format %{ "divpd   $dst,$src\t! div packedD" %}
20648   ins_encode %{
20649     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20650   %}
20651   ins_pipe( pipe_slow );
20652 %}
20653 
20654 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20655   predicate(UseAVX > 0);
20656   match(Set dst (DivVD src1 src2));
20657   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
20658   ins_encode %{
20659     int vlen_enc = vector_length_encoding(this);
20660     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20661   %}
20662   ins_pipe( pipe_slow );
20663 %}
20664 
20665 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20666   predicate((UseAVX > 0) &&
20667             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20668   match(Set dst (DivVD src (LoadVector mem)));
20669   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
20670   ins_encode %{
20671     int vlen_enc = vector_length_encoding(this);
20672     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20673   %}
20674   ins_pipe( pipe_slow );
20675 %}
20676 
20677 // ------------------------------ MinMax ---------------------------------------
20678 
20679 // Byte, Short, Int vector Min/Max
20680 instruct minmax_reg_sse(vec dst, vec src) %{
20681   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20682             UseAVX == 0);
20683   match(Set dst (MinV dst src));
20684   match(Set dst (MaxV dst src));
20685   format %{ "vector_minmax  $dst,$src\t!  " %}
20686   ins_encode %{
20687     assert(UseSSE >= 4, "required");
20688 
20689     int opcode = this->ideal_Opcode();
20690     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20691     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20692   %}
20693   ins_pipe( pipe_slow );
20694 %}
20695 
20696 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20697   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20698             UseAVX > 0);
20699   match(Set dst (MinV src1 src2));
20700   match(Set dst (MaxV src1 src2));
20701   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
20702   ins_encode %{
20703     int opcode = this->ideal_Opcode();
20704     int vlen_enc = vector_length_encoding(this);
20705     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20706 
20707     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20708   %}
20709   ins_pipe( pipe_slow );
20710 %}
20711 
20712 // Long vector Min/Max
20713 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20714   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20715             UseAVX == 0);
20716   match(Set dst (MinV dst src));
20717   match(Set dst (MaxV src dst));
20718   effect(TEMP dst, TEMP tmp);
20719   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
20720   ins_encode %{
20721     assert(UseSSE >= 4, "required");
20722 
20723     int opcode = this->ideal_Opcode();
20724     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20725     assert(elem_bt == T_LONG, "sanity");
20726 
20727     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20728   %}
20729   ins_pipe( pipe_slow );
20730 %}
20731 
20732 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20733   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20734             UseAVX > 0 && !VM_Version::supports_avx512vl());
20735   match(Set dst (MinV src1 src2));
20736   match(Set dst (MaxV src1 src2));
20737   effect(TEMP dst);
20738   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
20739   ins_encode %{
20740     int vlen_enc = vector_length_encoding(this);
20741     int opcode = this->ideal_Opcode();
20742     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20743     assert(elem_bt == T_LONG, "sanity");
20744 
20745     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20746   %}
20747   ins_pipe( pipe_slow );
20748 %}
20749 
20750 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20751   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20752             Matcher::vector_element_basic_type(n) == T_LONG);
20753   match(Set dst (MinV src1 src2));
20754   match(Set dst (MaxV src1 src2));
20755   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
20756   ins_encode %{
20757     assert(UseAVX > 2, "required");
20758 
20759     int vlen_enc = vector_length_encoding(this);
20760     int opcode = this->ideal_Opcode();
20761     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20762     assert(elem_bt == T_LONG, "sanity");
20763 
20764     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20765   %}
20766   ins_pipe( pipe_slow );
20767 %}
20768 
20769 // Float/Double vector Min/Max
20770 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20771   predicate(VM_Version::supports_avx10_2() &&
20772             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20773   match(Set dst (MinV a b));
20774   match(Set dst (MaxV a b));
20775   format %{ "vector_minmaxFP  $dst, $a, $b" %}
20776   ins_encode %{
20777     int vlen_enc = vector_length_encoding(this);
20778     int opcode = this->ideal_Opcode();
20779     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20780     __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20781   %}
20782   ins_pipe( pipe_slow );
20783 %}
20784 
20785 // Float/Double vector Min/Max
20786 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20787   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20788             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20789             UseAVX > 0);
20790   match(Set dst (MinV a b));
20791   match(Set dst (MaxV a b));
20792   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20793   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20794   ins_encode %{
20795     assert(UseAVX > 0, "required");
20796 
20797     int opcode = this->ideal_Opcode();
20798     int vlen_enc = vector_length_encoding(this);
20799     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20800 
20801     __ vminmax_fp(opcode, elem_bt,
20802                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20803                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20804   %}
20805   ins_pipe( pipe_slow );
20806 %}
20807 
20808 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20809   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20810             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20811   match(Set dst (MinV a b));
20812   match(Set dst (MaxV a b));
20813   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20814   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20815   ins_encode %{
20816     assert(UseAVX > 2, "required");
20817 
20818     int opcode = this->ideal_Opcode();
20819     int vlen_enc = vector_length_encoding(this);
20820     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20821 
20822     __ evminmax_fp(opcode, elem_bt,
20823                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20824                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20825   %}
20826   ins_pipe( pipe_slow );
20827 %}
20828 
20829 // ------------------------------ Unsigned vector Min/Max ----------------------
20830 
20831 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
20832   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20833   match(Set dst (UMinV a b));
20834   match(Set dst (UMaxV a b));
20835   format %{ "vector_uminmax $dst,$a,$b\t!" %}
20836   ins_encode %{
20837     int opcode = this->ideal_Opcode();
20838     int vlen_enc = vector_length_encoding(this);
20839     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20840     assert(is_integral_type(elem_bt), "");
20841     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20842   %}
20843   ins_pipe( pipe_slow );
20844 %}
20845 
20846 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
20847   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20848   match(Set dst (UMinV a (LoadVector b)));
20849   match(Set dst (UMaxV a (LoadVector b)));
20850   format %{ "vector_uminmax $dst,$a,$b\t!" %}
20851   ins_encode %{
20852     int opcode = this->ideal_Opcode();
20853     int vlen_enc = vector_length_encoding(this);
20854     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20855     assert(is_integral_type(elem_bt), "");
20856     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
20857   %}
20858   ins_pipe( pipe_slow );
20859 %}
20860 
20861 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
20862   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
20863   match(Set dst (UMinV a b));
20864   match(Set dst (UMaxV a b));
20865   effect(TEMP xtmp1, TEMP xtmp2);
20866   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
20867   ins_encode %{
20868     int opcode = this->ideal_Opcode();
20869     int vlen_enc = vector_length_encoding(this);
20870     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20871   %}
20872   ins_pipe( pipe_slow );
20873 %}
20874 
20875 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
20876   match(Set dst (UMinV (Binary dst src2) mask));
20877   match(Set dst (UMaxV (Binary dst src2) mask));
20878   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20879   ins_encode %{
20880     int vlen_enc = vector_length_encoding(this);
20881     BasicType bt = Matcher::vector_element_basic_type(this);
20882     int opc = this->ideal_Opcode();
20883     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20884                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
20885   %}
20886   ins_pipe( pipe_slow );
20887 %}
20888 
20889 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
20890   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
20891   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
20892   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20893   ins_encode %{
20894     int vlen_enc = vector_length_encoding(this);
20895     BasicType bt = Matcher::vector_element_basic_type(this);
20896     int opc = this->ideal_Opcode();
20897     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20898                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
20899   %}
20900   ins_pipe( pipe_slow );
20901 %}
20902 
20903 // --------------------------------- Signum/CopySign ---------------------------
20904 
20905 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
20906   match(Set dst (SignumF dst (Binary zero one)));
20907   effect(KILL cr);
20908   format %{ "signumF $dst, $dst" %}
20909   ins_encode %{
20910     int opcode = this->ideal_Opcode();
20911     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20912   %}
20913   ins_pipe( pipe_slow );
20914 %}
20915 
20916 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
20917   match(Set dst (SignumD dst (Binary zero one)));
20918   effect(KILL cr);
20919   format %{ "signumD $dst, $dst" %}
20920   ins_encode %{
20921     int opcode = this->ideal_Opcode();
20922     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20923   %}
20924   ins_pipe( pipe_slow );
20925 %}
20926 
20927 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
20928   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
20929   match(Set dst (SignumVF src (Binary zero one)));
20930   match(Set dst (SignumVD src (Binary zero one)));
20931   effect(TEMP dst, TEMP xtmp1);
20932   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
20933   ins_encode %{
20934     int opcode = this->ideal_Opcode();
20935     int vec_enc = vector_length_encoding(this);
20936     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20937                          $xtmp1$$XMMRegister, vec_enc);
20938   %}
20939   ins_pipe( pipe_slow );
20940 %}
20941 
20942 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
20943   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
20944   match(Set dst (SignumVF src (Binary zero one)));
20945   match(Set dst (SignumVD src (Binary zero one)));
20946   effect(TEMP dst, TEMP ktmp1);
20947   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
20948   ins_encode %{
20949     int opcode = this->ideal_Opcode();
20950     int vec_enc = vector_length_encoding(this);
20951     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20952                           $ktmp1$$KRegister, vec_enc);
20953   %}
20954   ins_pipe( pipe_slow );
20955 %}
20956 
20957 // ---------------------------------------
20958 // For copySign use 0xE4 as writemask for vpternlog
20959 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
20960 // C (xmm2) is set to 0x7FFFFFFF
20961 // Wherever xmm2 is 0, we want to pick from B (sign)
20962 // Wherever xmm2 is 1, we want to pick from A (src)
20963 //
20964 // A B C Result
20965 // 0 0 0 0
20966 // 0 0 1 0
20967 // 0 1 0 1
20968 // 0 1 1 0
20969 // 1 0 0 0
20970 // 1 0 1 1
20971 // 1 1 0 1
20972 // 1 1 1 1
20973 //
20974 // Result going from high bit to low bit is 0x11100100 = 0xe4
20975 // ---------------------------------------
20976 
20977 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
20978   match(Set dst (CopySignF dst src));
20979   effect(TEMP tmp1, TEMP tmp2);
20980   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20981   ins_encode %{
20982     __ movl($tmp2$$Register, 0x7FFFFFFF);
20983     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
20984     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20985   %}
20986   ins_pipe( pipe_slow );
20987 %}
20988 
20989 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
20990   match(Set dst (CopySignD dst (Binary src zero)));
20991   ins_cost(100);
20992   effect(TEMP tmp1, TEMP tmp2);
20993   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20994   ins_encode %{
20995     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
20996     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
20997     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20998   %}
20999   ins_pipe( pipe_slow );
21000 %}
21001 
21002 //----------------------------- CompressBits/ExpandBits ------------------------
21003 
21004 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21005   predicate(n->bottom_type()->isa_int());
21006   match(Set dst (CompressBits src mask));
21007   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21008   ins_encode %{
21009     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21010   %}
21011   ins_pipe( pipe_slow );
21012 %}
21013 
21014 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21015   predicate(n->bottom_type()->isa_int());
21016   match(Set dst (ExpandBits src mask));
21017   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21018   ins_encode %{
21019     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21020   %}
21021   ins_pipe( pipe_slow );
21022 %}
21023 
21024 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21025   predicate(n->bottom_type()->isa_int());
21026   match(Set dst (CompressBits src (LoadI mask)));
21027   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21028   ins_encode %{
21029     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21030   %}
21031   ins_pipe( pipe_slow );
21032 %}
21033 
21034 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21035   predicate(n->bottom_type()->isa_int());
21036   match(Set dst (ExpandBits src (LoadI mask)));
21037   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21038   ins_encode %{
21039     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21040   %}
21041   ins_pipe( pipe_slow );
21042 %}
21043 
21044 // --------------------------------- Sqrt --------------------------------------
21045 
21046 instruct vsqrtF_reg(vec dst, vec src) %{
21047   match(Set dst (SqrtVF src));
21048   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
21049   ins_encode %{
21050     assert(UseAVX > 0, "required");
21051     int vlen_enc = vector_length_encoding(this);
21052     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21053   %}
21054   ins_pipe( pipe_slow );
21055 %}
21056 
21057 instruct vsqrtF_mem(vec dst, memory mem) %{
21058   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21059   match(Set dst (SqrtVF (LoadVector mem)));
21060   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
21061   ins_encode %{
21062     assert(UseAVX > 0, "required");
21063     int vlen_enc = vector_length_encoding(this);
21064     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21065   %}
21066   ins_pipe( pipe_slow );
21067 %}
21068 
21069 // Floating point vector sqrt
21070 instruct vsqrtD_reg(vec dst, vec src) %{
21071   match(Set dst (SqrtVD src));
21072   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
21073   ins_encode %{
21074     assert(UseAVX > 0, "required");
21075     int vlen_enc = vector_length_encoding(this);
21076     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21077   %}
21078   ins_pipe( pipe_slow );
21079 %}
21080 
21081 instruct vsqrtD_mem(vec dst, memory mem) %{
21082   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21083   match(Set dst (SqrtVD (LoadVector mem)));
21084   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
21085   ins_encode %{
21086     assert(UseAVX > 0, "required");
21087     int vlen_enc = vector_length_encoding(this);
21088     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21089   %}
21090   ins_pipe( pipe_slow );
21091 %}
21092 
21093 // ------------------------------ Shift ---------------------------------------
21094 
21095 // Left and right shift count vectors are the same on x86
21096 // (only lowest bits of xmm reg are used for count).
21097 instruct vshiftcnt(vec dst, rRegI cnt) %{
21098   match(Set dst (LShiftCntV cnt));
21099   match(Set dst (RShiftCntV cnt));
21100   format %{ "movdl    $dst,$cnt\t! load shift count" %}
21101   ins_encode %{
21102     __ movdl($dst$$XMMRegister, $cnt$$Register);
21103   %}
21104   ins_pipe( pipe_slow );
21105 %}
21106 
21107 // Byte vector shift
21108 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21109   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21110   match(Set dst ( LShiftVB src shift));
21111   match(Set dst ( RShiftVB src shift));
21112   match(Set dst (URShiftVB src shift));
21113   effect(TEMP dst, USE src, USE shift, TEMP tmp);
21114   format %{"vector_byte_shift $dst,$src,$shift" %}
21115   ins_encode %{
21116     assert(UseSSE > 3, "required");
21117     int opcode = this->ideal_Opcode();
21118     bool sign = (opcode != Op_URShiftVB);
21119     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21120     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21121     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21122     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21123     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21124   %}
21125   ins_pipe( pipe_slow );
21126 %}
21127 
21128 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21129   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21130             UseAVX <= 1);
21131   match(Set dst ( LShiftVB src shift));
21132   match(Set dst ( RShiftVB src shift));
21133   match(Set dst (URShiftVB src shift));
21134   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21135   format %{"vector_byte_shift $dst,$src,$shift" %}
21136   ins_encode %{
21137     assert(UseSSE > 3, "required");
21138     int opcode = this->ideal_Opcode();
21139     bool sign = (opcode != Op_URShiftVB);
21140     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21141     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21142     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21143     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21144     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21145     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21146     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21147     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21148     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21149   %}
21150   ins_pipe( pipe_slow );
21151 %}
21152 
21153 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21154   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21155             UseAVX > 1);
21156   match(Set dst ( LShiftVB src shift));
21157   match(Set dst ( RShiftVB src shift));
21158   match(Set dst (URShiftVB src shift));
21159   effect(TEMP dst, TEMP tmp);
21160   format %{"vector_byte_shift $dst,$src,$shift" %}
21161   ins_encode %{
21162     int opcode = this->ideal_Opcode();
21163     bool sign = (opcode != Op_URShiftVB);
21164     int vlen_enc = Assembler::AVX_256bit;
21165     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21166     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21167     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21168     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21169     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21170   %}
21171   ins_pipe( pipe_slow );
21172 %}
21173 
21174 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21175   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21176   match(Set dst ( LShiftVB src shift));
21177   match(Set dst ( RShiftVB src shift));
21178   match(Set dst (URShiftVB src shift));
21179   effect(TEMP dst, TEMP tmp);
21180   format %{"vector_byte_shift $dst,$src,$shift" %}
21181   ins_encode %{
21182     assert(UseAVX > 1, "required");
21183     int opcode = this->ideal_Opcode();
21184     bool sign = (opcode != Op_URShiftVB);
21185     int vlen_enc = Assembler::AVX_256bit;
21186     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21187     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21188     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21189     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21190     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21191     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21192     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21193     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21194     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21195   %}
21196   ins_pipe( pipe_slow );
21197 %}
21198 
21199 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21200   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21201   match(Set dst ( LShiftVB src shift));
21202   match(Set dst  (RShiftVB src shift));
21203   match(Set dst (URShiftVB src shift));
21204   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21205   format %{"vector_byte_shift $dst,$src,$shift" %}
21206   ins_encode %{
21207     assert(UseAVX > 2, "required");
21208     int opcode = this->ideal_Opcode();
21209     bool sign = (opcode != Op_URShiftVB);
21210     int vlen_enc = Assembler::AVX_512bit;
21211     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21212     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21213     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21214     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21215     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21216     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21217     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21218     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21219     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21220     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21221     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21222     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21223   %}
21224   ins_pipe( pipe_slow );
21225 %}
21226 
21227 // Shorts vector logical right shift produces incorrect Java result
21228 // for negative data because java code convert short value into int with
21229 // sign extension before a shift. But char vectors are fine since chars are
21230 // unsigned values.
21231 // Shorts/Chars vector left shift
21232 instruct vshiftS(vec dst, vec src, vec shift) %{
21233   predicate(!n->as_ShiftV()->is_var_shift());
21234   match(Set dst ( LShiftVS src shift));
21235   match(Set dst ( RShiftVS src shift));
21236   match(Set dst (URShiftVS src shift));
21237   effect(TEMP dst, USE src, USE shift);
21238   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
21239   ins_encode %{
21240     int opcode = this->ideal_Opcode();
21241     if (UseAVX > 0) {
21242       int vlen_enc = vector_length_encoding(this);
21243       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21244     } else {
21245       int vlen = Matcher::vector_length(this);
21246       if (vlen == 2) {
21247         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21248         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21249       } else if (vlen == 4) {
21250         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21251         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21252       } else {
21253         assert (vlen == 8, "sanity");
21254         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21255         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21256       }
21257     }
21258   %}
21259   ins_pipe( pipe_slow );
21260 %}
21261 
21262 // Integers vector left shift
21263 instruct vshiftI(vec dst, vec src, vec shift) %{
21264   predicate(!n->as_ShiftV()->is_var_shift());
21265   match(Set dst ( LShiftVI src shift));
21266   match(Set dst ( RShiftVI src shift));
21267   match(Set dst (URShiftVI src shift));
21268   effect(TEMP dst, USE src, USE shift);
21269   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
21270   ins_encode %{
21271     int opcode = this->ideal_Opcode();
21272     if (UseAVX > 0) {
21273       int vlen_enc = vector_length_encoding(this);
21274       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21275     } else {
21276       int vlen = Matcher::vector_length(this);
21277       if (vlen == 2) {
21278         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21279         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21280       } else {
21281         assert(vlen == 4, "sanity");
21282         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21283         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21284       }
21285     }
21286   %}
21287   ins_pipe( pipe_slow );
21288 %}
21289 
21290 // Integers vector left constant shift
21291 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21292   match(Set dst (LShiftVI src (LShiftCntV shift)));
21293   match(Set dst (RShiftVI src (RShiftCntV shift)));
21294   match(Set dst (URShiftVI src (RShiftCntV shift)));
21295   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
21296   ins_encode %{
21297     int opcode = this->ideal_Opcode();
21298     if (UseAVX > 0) {
21299       int vector_len = vector_length_encoding(this);
21300       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21301     } else {
21302       int vlen = Matcher::vector_length(this);
21303       if (vlen == 2) {
21304         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21305         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21306       } else {
21307         assert(vlen == 4, "sanity");
21308         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21309         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21310       }
21311     }
21312   %}
21313   ins_pipe( pipe_slow );
21314 %}
21315 
21316 // Longs vector shift
21317 instruct vshiftL(vec dst, vec src, vec shift) %{
21318   predicate(!n->as_ShiftV()->is_var_shift());
21319   match(Set dst ( LShiftVL src shift));
21320   match(Set dst (URShiftVL src shift));
21321   effect(TEMP dst, USE src, USE shift);
21322   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
21323   ins_encode %{
21324     int opcode = this->ideal_Opcode();
21325     if (UseAVX > 0) {
21326       int vlen_enc = vector_length_encoding(this);
21327       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21328     } else {
21329       assert(Matcher::vector_length(this) == 2, "");
21330       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21331       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21332     }
21333   %}
21334   ins_pipe( pipe_slow );
21335 %}
21336 
21337 // Longs vector constant shift
21338 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21339   match(Set dst (LShiftVL src (LShiftCntV shift)));
21340   match(Set dst (URShiftVL src (RShiftCntV shift)));
21341   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
21342   ins_encode %{
21343     int opcode = this->ideal_Opcode();
21344     if (UseAVX > 0) {
21345       int vector_len = vector_length_encoding(this);
21346       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21347     } else {
21348       assert(Matcher::vector_length(this) == 2, "");
21349       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21350       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21351     }
21352   %}
21353   ins_pipe( pipe_slow );
21354 %}
21355 
21356 // -------------------ArithmeticRightShift -----------------------------------
21357 // Long vector arithmetic right shift
21358 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21359   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21360   match(Set dst (RShiftVL src shift));
21361   effect(TEMP dst, TEMP tmp);
21362   format %{ "vshiftq $dst,$src,$shift" %}
21363   ins_encode %{
21364     uint vlen = Matcher::vector_length(this);
21365     if (vlen == 2) {
21366       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21367       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21368       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21369       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21370       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21371       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21372     } else {
21373       assert(vlen == 4, "sanity");
21374       assert(UseAVX > 1, "required");
21375       int vlen_enc = Assembler::AVX_256bit;
21376       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21377       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21378       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21379       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21380       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21381     }
21382   %}
21383   ins_pipe( pipe_slow );
21384 %}
21385 
21386 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21387   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21388   match(Set dst (RShiftVL src shift));
21389   format %{ "vshiftq $dst,$src,$shift" %}
21390   ins_encode %{
21391     int vlen_enc = vector_length_encoding(this);
21392     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21393   %}
21394   ins_pipe( pipe_slow );
21395 %}
21396 
21397 // ------------------- Variable Shift -----------------------------
21398 // Byte variable shift
21399 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21400   predicate(Matcher::vector_length(n) <= 8 &&
21401             n->as_ShiftV()->is_var_shift() &&
21402             !VM_Version::supports_avx512bw());
21403   match(Set dst ( LShiftVB src shift));
21404   match(Set dst ( RShiftVB src shift));
21405   match(Set dst (URShiftVB src shift));
21406   effect(TEMP dst, TEMP vtmp);
21407   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21408   ins_encode %{
21409     assert(UseAVX >= 2, "required");
21410 
21411     int opcode = this->ideal_Opcode();
21412     int vlen_enc = Assembler::AVX_128bit;
21413     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21414     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21415   %}
21416   ins_pipe( pipe_slow );
21417 %}
21418 
21419 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21420   predicate(Matcher::vector_length(n) == 16 &&
21421             n->as_ShiftV()->is_var_shift() &&
21422             !VM_Version::supports_avx512bw());
21423   match(Set dst ( LShiftVB src shift));
21424   match(Set dst ( RShiftVB src shift));
21425   match(Set dst (URShiftVB src shift));
21426   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21427   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21428   ins_encode %{
21429     assert(UseAVX >= 2, "required");
21430 
21431     int opcode = this->ideal_Opcode();
21432     int vlen_enc = Assembler::AVX_128bit;
21433     // Shift lower half and get word result in dst
21434     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21435 
21436     // Shift upper half and get word result in vtmp1
21437     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21438     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21439     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21440 
21441     // Merge and down convert the two word results to byte in dst
21442     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21443   %}
21444   ins_pipe( pipe_slow );
21445 %}
21446 
21447 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21448   predicate(Matcher::vector_length(n) == 32 &&
21449             n->as_ShiftV()->is_var_shift() &&
21450             !VM_Version::supports_avx512bw());
21451   match(Set dst ( LShiftVB src shift));
21452   match(Set dst ( RShiftVB src shift));
21453   match(Set dst (URShiftVB src shift));
21454   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21455   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21456   ins_encode %{
21457     assert(UseAVX >= 2, "required");
21458 
21459     int opcode = this->ideal_Opcode();
21460     int vlen_enc = Assembler::AVX_128bit;
21461     // Process lower 128 bits and get result in dst
21462     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21463     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21464     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21465     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21466     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21467 
21468     // Process higher 128 bits and get result in vtmp3
21469     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21470     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21471     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21472     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21473     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21474     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21475     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21476 
21477     // Merge the two results in dst
21478     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21479   %}
21480   ins_pipe( pipe_slow );
21481 %}
21482 
21483 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21484   predicate(Matcher::vector_length(n) <= 32 &&
21485             n->as_ShiftV()->is_var_shift() &&
21486             VM_Version::supports_avx512bw());
21487   match(Set dst ( LShiftVB src shift));
21488   match(Set dst ( RShiftVB src shift));
21489   match(Set dst (URShiftVB src shift));
21490   effect(TEMP dst, TEMP vtmp);
21491   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21492   ins_encode %{
21493     assert(UseAVX > 2, "required");
21494 
21495     int opcode = this->ideal_Opcode();
21496     int vlen_enc = vector_length_encoding(this);
21497     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21498   %}
21499   ins_pipe( pipe_slow );
21500 %}
21501 
21502 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21503   predicate(Matcher::vector_length(n) == 64 &&
21504             n->as_ShiftV()->is_var_shift() &&
21505             VM_Version::supports_avx512bw());
21506   match(Set dst ( LShiftVB src shift));
21507   match(Set dst ( RShiftVB src shift));
21508   match(Set dst (URShiftVB src shift));
21509   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21510   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21511   ins_encode %{
21512     assert(UseAVX > 2, "required");
21513 
21514     int opcode = this->ideal_Opcode();
21515     int vlen_enc = Assembler::AVX_256bit;
21516     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21517     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21518     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21519     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21520     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21521   %}
21522   ins_pipe( pipe_slow );
21523 %}
21524 
21525 // Short variable shift
21526 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21527   predicate(Matcher::vector_length(n) <= 8 &&
21528             n->as_ShiftV()->is_var_shift() &&
21529             !VM_Version::supports_avx512bw());
21530   match(Set dst ( LShiftVS src shift));
21531   match(Set dst ( RShiftVS src shift));
21532   match(Set dst (URShiftVS src shift));
21533   effect(TEMP dst, TEMP vtmp);
21534   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21535   ins_encode %{
21536     assert(UseAVX >= 2, "required");
21537 
21538     int opcode = this->ideal_Opcode();
21539     bool sign = (opcode != Op_URShiftVS);
21540     int vlen_enc = Assembler::AVX_256bit;
21541     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21542     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21543     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21544     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21545     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21546     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21547   %}
21548   ins_pipe( pipe_slow );
21549 %}
21550 
21551 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21552   predicate(Matcher::vector_length(n) == 16 &&
21553             n->as_ShiftV()->is_var_shift() &&
21554             !VM_Version::supports_avx512bw());
21555   match(Set dst ( LShiftVS src shift));
21556   match(Set dst ( RShiftVS src shift));
21557   match(Set dst (URShiftVS src shift));
21558   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21559   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21560   ins_encode %{
21561     assert(UseAVX >= 2, "required");
21562 
21563     int opcode = this->ideal_Opcode();
21564     bool sign = (opcode != Op_URShiftVS);
21565     int vlen_enc = Assembler::AVX_256bit;
21566     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21567     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21568     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21569     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21570     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21571 
21572     // Shift upper half, with result in dst using vtmp1 as TEMP
21573     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21574     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21575     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21576     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21577     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21578     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21579 
21580     // Merge lower and upper half result into dst
21581     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21582     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21583   %}
21584   ins_pipe( pipe_slow );
21585 %}
21586 
21587 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21588   predicate(n->as_ShiftV()->is_var_shift() &&
21589             VM_Version::supports_avx512bw());
21590   match(Set dst ( LShiftVS src shift));
21591   match(Set dst ( RShiftVS src shift));
21592   match(Set dst (URShiftVS src shift));
21593   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21594   ins_encode %{
21595     assert(UseAVX > 2, "required");
21596 
21597     int opcode = this->ideal_Opcode();
21598     int vlen_enc = vector_length_encoding(this);
21599     if (!VM_Version::supports_avx512vl()) {
21600       vlen_enc = Assembler::AVX_512bit;
21601     }
21602     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21603   %}
21604   ins_pipe( pipe_slow );
21605 %}
21606 
21607 //Integer variable shift
21608 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21609   predicate(n->as_ShiftV()->is_var_shift());
21610   match(Set dst ( LShiftVI src shift));
21611   match(Set dst ( RShiftVI src shift));
21612   match(Set dst (URShiftVI src shift));
21613   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21614   ins_encode %{
21615     assert(UseAVX >= 2, "required");
21616 
21617     int opcode = this->ideal_Opcode();
21618     int vlen_enc = vector_length_encoding(this);
21619     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21620   %}
21621   ins_pipe( pipe_slow );
21622 %}
21623 
21624 //Long variable shift
21625 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21626   predicate(n->as_ShiftV()->is_var_shift());
21627   match(Set dst ( LShiftVL src shift));
21628   match(Set dst (URShiftVL src shift));
21629   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21630   ins_encode %{
21631     assert(UseAVX >= 2, "required");
21632 
21633     int opcode = this->ideal_Opcode();
21634     int vlen_enc = vector_length_encoding(this);
21635     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21636   %}
21637   ins_pipe( pipe_slow );
21638 %}
21639 
21640 //Long variable right shift arithmetic
21641 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21642   predicate(Matcher::vector_length(n) <= 4 &&
21643             n->as_ShiftV()->is_var_shift() &&
21644             UseAVX == 2);
21645   match(Set dst (RShiftVL src shift));
21646   effect(TEMP dst, TEMP vtmp);
21647   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21648   ins_encode %{
21649     int opcode = this->ideal_Opcode();
21650     int vlen_enc = vector_length_encoding(this);
21651     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21652                  $vtmp$$XMMRegister);
21653   %}
21654   ins_pipe( pipe_slow );
21655 %}
21656 
21657 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21658   predicate(n->as_ShiftV()->is_var_shift() &&
21659             UseAVX > 2);
21660   match(Set dst (RShiftVL src shift));
21661   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21662   ins_encode %{
21663     int opcode = this->ideal_Opcode();
21664     int vlen_enc = vector_length_encoding(this);
21665     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21666   %}
21667   ins_pipe( pipe_slow );
21668 %}
21669 
21670 // --------------------------------- AND --------------------------------------
21671 
21672 instruct vand(vec dst, vec src) %{
21673   predicate(UseAVX == 0);
21674   match(Set dst (AndV dst src));
21675   format %{ "pand    $dst,$src\t! and vectors" %}
21676   ins_encode %{
21677     __ pand($dst$$XMMRegister, $src$$XMMRegister);
21678   %}
21679   ins_pipe( pipe_slow );
21680 %}
21681 
21682 instruct vand_reg(vec dst, vec src1, vec src2) %{
21683   predicate(UseAVX > 0);
21684   match(Set dst (AndV src1 src2));
21685   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
21686   ins_encode %{
21687     int vlen_enc = vector_length_encoding(this);
21688     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21689   %}
21690   ins_pipe( pipe_slow );
21691 %}
21692 
21693 instruct vand_mem(vec dst, vec src, memory mem) %{
21694   predicate((UseAVX > 0) &&
21695             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21696   match(Set dst (AndV src (LoadVector mem)));
21697   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
21698   ins_encode %{
21699     int vlen_enc = vector_length_encoding(this);
21700     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21701   %}
21702   ins_pipe( pipe_slow );
21703 %}
21704 
21705 // --------------------------------- OR ---------------------------------------
21706 
21707 instruct vor(vec dst, vec src) %{
21708   predicate(UseAVX == 0);
21709   match(Set dst (OrV dst src));
21710   format %{ "por     $dst,$src\t! or vectors" %}
21711   ins_encode %{
21712     __ por($dst$$XMMRegister, $src$$XMMRegister);
21713   %}
21714   ins_pipe( pipe_slow );
21715 %}
21716 
21717 instruct vor_reg(vec dst, vec src1, vec src2) %{
21718   predicate(UseAVX > 0);
21719   match(Set dst (OrV src1 src2));
21720   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
21721   ins_encode %{
21722     int vlen_enc = vector_length_encoding(this);
21723     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21724   %}
21725   ins_pipe( pipe_slow );
21726 %}
21727 
21728 instruct vor_mem(vec dst, vec src, memory mem) %{
21729   predicate((UseAVX > 0) &&
21730             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21731   match(Set dst (OrV src (LoadVector mem)));
21732   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
21733   ins_encode %{
21734     int vlen_enc = vector_length_encoding(this);
21735     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21736   %}
21737   ins_pipe( pipe_slow );
21738 %}
21739 
21740 // --------------------------------- XOR --------------------------------------
21741 
21742 instruct vxor(vec dst, vec src) %{
21743   predicate(UseAVX == 0);
21744   match(Set dst (XorV dst src));
21745   format %{ "pxor    $dst,$src\t! xor vectors" %}
21746   ins_encode %{
21747     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21748   %}
21749   ins_pipe( pipe_slow );
21750 %}
21751 
21752 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21753   predicate(UseAVX > 0);
21754   match(Set dst (XorV src1 src2));
21755   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
21756   ins_encode %{
21757     int vlen_enc = vector_length_encoding(this);
21758     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21759   %}
21760   ins_pipe( pipe_slow );
21761 %}
21762 
21763 instruct vxor_mem(vec dst, vec src, memory mem) %{
21764   predicate((UseAVX > 0) &&
21765             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21766   match(Set dst (XorV src (LoadVector mem)));
21767   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
21768   ins_encode %{
21769     int vlen_enc = vector_length_encoding(this);
21770     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21771   %}
21772   ins_pipe( pipe_slow );
21773 %}
21774 
21775 // --------------------------------- VectorCast --------------------------------------
21776 
21777 instruct vcastBtoX(vec dst, vec src) %{
21778   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21779   match(Set dst (VectorCastB2X src));
21780   format %{ "vector_cast_b2x $dst,$src\t!" %}
21781   ins_encode %{
21782     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21783     int vlen_enc = vector_length_encoding(this);
21784     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21785   %}
21786   ins_pipe( pipe_slow );
21787 %}
21788 
21789 instruct vcastBtoD(legVec dst, legVec src) %{
21790   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21791   match(Set dst (VectorCastB2X src));
21792   format %{ "vector_cast_b2x $dst,$src\t!" %}
21793   ins_encode %{
21794     int vlen_enc = vector_length_encoding(this);
21795     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21796   %}
21797   ins_pipe( pipe_slow );
21798 %}
21799 
21800 instruct castStoX(vec dst, vec src) %{
21801   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21802             Matcher::vector_length(n->in(1)) <= 8 && // src
21803             Matcher::vector_element_basic_type(n) == T_BYTE);
21804   match(Set dst (VectorCastS2X src));
21805   format %{ "vector_cast_s2x $dst,$src" %}
21806   ins_encode %{
21807     assert(UseAVX > 0, "required");
21808 
21809     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21810     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21811   %}
21812   ins_pipe( pipe_slow );
21813 %}
21814 
21815 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21816   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21817             Matcher::vector_length(n->in(1)) == 16 && // src
21818             Matcher::vector_element_basic_type(n) == T_BYTE);
21819   effect(TEMP dst, TEMP vtmp);
21820   match(Set dst (VectorCastS2X src));
21821   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21822   ins_encode %{
21823     assert(UseAVX > 0, "required");
21824 
21825     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
21826     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21827     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
21828     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21829   %}
21830   ins_pipe( pipe_slow );
21831 %}
21832 
21833 instruct vcastStoX_evex(vec dst, vec src) %{
21834   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
21835             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21836   match(Set dst (VectorCastS2X src));
21837   format %{ "vector_cast_s2x $dst,$src\t!" %}
21838   ins_encode %{
21839     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21840     int src_vlen_enc = vector_length_encoding(this, $src);
21841     int vlen_enc = vector_length_encoding(this);
21842     switch (to_elem_bt) {
21843       case T_BYTE:
21844         if (!VM_Version::supports_avx512vl()) {
21845           vlen_enc = Assembler::AVX_512bit;
21846         }
21847         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21848         break;
21849       case T_INT:
21850         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21851         break;
21852       case T_FLOAT:
21853         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21854         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21855         break;
21856       case T_LONG:
21857         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21858         break;
21859       case T_DOUBLE: {
21860         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
21861         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
21862         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21863         break;
21864       }
21865       default:
21866         ShouldNotReachHere();
21867     }
21868   %}
21869   ins_pipe( pipe_slow );
21870 %}
21871 
21872 instruct castItoX(vec dst, vec src) %{
21873   predicate(UseAVX <= 2 &&
21874             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
21875             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21876   match(Set dst (VectorCastI2X src));
21877   format %{ "vector_cast_i2x $dst,$src" %}
21878   ins_encode %{
21879     assert(UseAVX > 0, "required");
21880 
21881     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21882     int vlen_enc = vector_length_encoding(this, $src);
21883 
21884     if (to_elem_bt == T_BYTE) {
21885       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21886       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21887       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21888     } else {
21889       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21890       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21891       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21892     }
21893   %}
21894   ins_pipe( pipe_slow );
21895 %}
21896 
21897 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
21898   predicate(UseAVX <= 2 &&
21899             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
21900             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21901   match(Set dst (VectorCastI2X src));
21902   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
21903   effect(TEMP dst, TEMP vtmp);
21904   ins_encode %{
21905     assert(UseAVX > 0, "required");
21906 
21907     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21908     int vlen_enc = vector_length_encoding(this, $src);
21909 
21910     if (to_elem_bt == T_BYTE) {
21911       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21912       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21913       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21914       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21915     } else {
21916       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21917       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21918       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21919       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21920     }
21921   %}
21922   ins_pipe( pipe_slow );
21923 %}
21924 
21925 instruct vcastItoX_evex(vec dst, vec src) %{
21926   predicate(UseAVX > 2 ||
21927             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21928   match(Set dst (VectorCastI2X src));
21929   format %{ "vector_cast_i2x $dst,$src\t!" %}
21930   ins_encode %{
21931     assert(UseAVX > 0, "required");
21932 
21933     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
21934     int src_vlen_enc = vector_length_encoding(this, $src);
21935     int dst_vlen_enc = vector_length_encoding(this);
21936     switch (dst_elem_bt) {
21937       case T_BYTE:
21938         if (!VM_Version::supports_avx512vl()) {
21939           src_vlen_enc = Assembler::AVX_512bit;
21940         }
21941         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21942         break;
21943       case T_SHORT:
21944         if (!VM_Version::supports_avx512vl()) {
21945           src_vlen_enc = Assembler::AVX_512bit;
21946         }
21947         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21948         break;
21949       case T_FLOAT:
21950         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21951         break;
21952       case T_LONG:
21953         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21954         break;
21955       case T_DOUBLE:
21956         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21957         break;
21958       default:
21959         ShouldNotReachHere();
21960     }
21961   %}
21962   ins_pipe( pipe_slow );
21963 %}
21964 
21965 instruct vcastLtoBS(vec dst, vec src) %{
21966   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
21967             UseAVX <= 2);
21968   match(Set dst (VectorCastL2X src));
21969   format %{ "vector_cast_l2x  $dst,$src" %}
21970   ins_encode %{
21971     assert(UseAVX > 0, "required");
21972 
21973     int vlen = Matcher::vector_length_in_bytes(this, $src);
21974     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
21975     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
21976                                                       : ExternalAddress(vector_int_to_short_mask());
21977     if (vlen <= 16) {
21978       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
21979       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21980       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21981     } else {
21982       assert(vlen <= 32, "required");
21983       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
21984       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
21985       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21986       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21987     }
21988     if (to_elem_bt == T_BYTE) {
21989       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21990     }
21991   %}
21992   ins_pipe( pipe_slow );
21993 %}
21994 
21995 instruct vcastLtoX_evex(vec dst, vec src) %{
21996   predicate(UseAVX > 2 ||
21997             (Matcher::vector_element_basic_type(n) == T_INT ||
21998              Matcher::vector_element_basic_type(n) == T_FLOAT ||
21999              Matcher::vector_element_basic_type(n) == T_DOUBLE));
22000   match(Set dst (VectorCastL2X src));
22001   format %{ "vector_cast_l2x  $dst,$src\t!" %}
22002   ins_encode %{
22003     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22004     int vlen = Matcher::vector_length_in_bytes(this, $src);
22005     int vlen_enc = vector_length_encoding(this, $src);
22006     switch (to_elem_bt) {
22007       case T_BYTE:
22008         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22009           vlen_enc = Assembler::AVX_512bit;
22010         }
22011         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22012         break;
22013       case T_SHORT:
22014         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22015           vlen_enc = Assembler::AVX_512bit;
22016         }
22017         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22018         break;
22019       case T_INT:
22020         if (vlen == 8) {
22021           if ($dst$$XMMRegister != $src$$XMMRegister) {
22022             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22023           }
22024         } else if (vlen == 16) {
22025           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22026         } else if (vlen == 32) {
22027           if (UseAVX > 2) {
22028             if (!VM_Version::supports_avx512vl()) {
22029               vlen_enc = Assembler::AVX_512bit;
22030             }
22031             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22032           } else {
22033             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22034             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22035           }
22036         } else { // vlen == 64
22037           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22038         }
22039         break;
22040       case T_FLOAT:
22041         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22042         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22043         break;
22044       case T_DOUBLE:
22045         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22046         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22047         break;
22048 
22049       default: assert(false, "%s", type2name(to_elem_bt));
22050     }
22051   %}
22052   ins_pipe( pipe_slow );
22053 %}
22054 
22055 instruct vcastFtoD_reg(vec dst, vec src) %{
22056   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22057   match(Set dst (VectorCastF2X src));
22058   format %{ "vector_cast_f2d  $dst,$src\t!" %}
22059   ins_encode %{
22060     int vlen_enc = vector_length_encoding(this);
22061     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22062   %}
22063   ins_pipe( pipe_slow );
22064 %}
22065 
22066 
22067 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22068   predicate(!VM_Version::supports_avx10_2() &&
22069             !VM_Version::supports_avx512vl() &&
22070             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22071             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22072             is_integral_type(Matcher::vector_element_basic_type(n)));
22073   match(Set dst (VectorCastF2X src));
22074   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22075   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22076   ins_encode %{
22077     int vlen_enc = vector_length_encoding(this, $src);
22078     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22079     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22080     // 32 bit addresses for register indirect addressing mode since stub constants
22081     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22082     // However, targets are free to increase this limit, but having a large code cache size
22083     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22084     // cap we save a temporary register allocation which in limiting case can prevent
22085     // spilling in high register pressure blocks.
22086     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22087                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22088                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22089   %}
22090   ins_pipe( pipe_slow );
22091 %}
22092 
22093 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22094   predicate(!VM_Version::supports_avx10_2() &&
22095             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22096             is_integral_type(Matcher::vector_element_basic_type(n)));
22097   match(Set dst (VectorCastF2X src));
22098   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22099   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22100   ins_encode %{
22101     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22102     if (to_elem_bt == T_LONG) {
22103       int vlen_enc = vector_length_encoding(this);
22104       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22105                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22106                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22107     } else {
22108       int vlen_enc = vector_length_encoding(this, $src);
22109       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22110                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22111                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22112     }
22113   %}
22114   ins_pipe( pipe_slow );
22115 %}
22116 
22117 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22118   predicate(VM_Version::supports_avx10_2() &&
22119             is_integral_type(Matcher::vector_element_basic_type(n)));
22120   match(Set dst (VectorCastF2X src));
22121   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22122   ins_encode %{
22123     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22124     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22125     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22126   %}
22127   ins_pipe( pipe_slow );
22128 %}
22129 
22130 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22131   predicate(VM_Version::supports_avx10_2() &&
22132             is_integral_type(Matcher::vector_element_basic_type(n)));
22133   match(Set dst (VectorCastF2X (LoadVector src)));
22134   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22135   ins_encode %{
22136     int vlen = Matcher::vector_length(this);
22137     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22138     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22139     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22140   %}
22141   ins_pipe( pipe_slow );
22142 %}
22143 
22144 instruct vcastDtoF_reg(vec dst, vec src) %{
22145   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22146   match(Set dst (VectorCastD2X src));
22147   format %{ "vector_cast_d2x  $dst,$src\t!" %}
22148   ins_encode %{
22149     int vlen_enc = vector_length_encoding(this, $src);
22150     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22151   %}
22152   ins_pipe( pipe_slow );
22153 %}
22154 
22155 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22156   predicate(!VM_Version::supports_avx10_2() &&
22157             !VM_Version::supports_avx512vl() &&
22158             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22159             is_integral_type(Matcher::vector_element_basic_type(n)));
22160   match(Set dst (VectorCastD2X src));
22161   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22162   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22163   ins_encode %{
22164     int vlen_enc = vector_length_encoding(this, $src);
22165     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22166     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22167                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22168                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22169   %}
22170   ins_pipe( pipe_slow );
22171 %}
22172 
22173 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22174   predicate(!VM_Version::supports_avx10_2() &&
22175             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22176             is_integral_type(Matcher::vector_element_basic_type(n)));
22177   match(Set dst (VectorCastD2X src));
22178   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22179   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22180   ins_encode %{
22181     int vlen_enc = vector_length_encoding(this, $src);
22182     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22183     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22184                               ExternalAddress(vector_float_signflip());
22185     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22186                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22187   %}
22188   ins_pipe( pipe_slow );
22189 %}
22190 
22191 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22192   predicate(VM_Version::supports_avx10_2() &&
22193             is_integral_type(Matcher::vector_element_basic_type(n)));
22194   match(Set dst (VectorCastD2X src));
22195   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22196   ins_encode %{
22197     int vlen_enc = vector_length_encoding(this, $src);
22198     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22199     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22200   %}
22201   ins_pipe( pipe_slow );
22202 %}
22203 
22204 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22205   predicate(VM_Version::supports_avx10_2() &&
22206             is_integral_type(Matcher::vector_element_basic_type(n)));
22207   match(Set dst (VectorCastD2X (LoadVector src)));
22208   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22209   ins_encode %{
22210     int vlen = Matcher::vector_length(this);
22211     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22212     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22213     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22214   %}
22215   ins_pipe( pipe_slow );
22216 %}
22217 
22218 instruct vucast(vec dst, vec src) %{
22219   match(Set dst (VectorUCastB2X src));
22220   match(Set dst (VectorUCastS2X src));
22221   match(Set dst (VectorUCastI2X src));
22222   format %{ "vector_ucast $dst,$src\t!" %}
22223   ins_encode %{
22224     assert(UseAVX > 0, "required");
22225 
22226     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22227     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22228     int vlen_enc = vector_length_encoding(this);
22229     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22230   %}
22231   ins_pipe( pipe_slow );
22232 %}
22233 
22234 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22235   predicate(!VM_Version::supports_avx512vl() &&
22236             Matcher::vector_length_in_bytes(n) < 64 &&
22237             Matcher::vector_element_basic_type(n) == T_INT);
22238   match(Set dst (RoundVF src));
22239   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22240   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22241   ins_encode %{
22242     int vlen_enc = vector_length_encoding(this);
22243     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22244     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22245                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22246                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22247   %}
22248   ins_pipe( pipe_slow );
22249 %}
22250 
22251 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22252   predicate((VM_Version::supports_avx512vl() ||
22253              Matcher::vector_length_in_bytes(n) == 64) &&
22254              Matcher::vector_element_basic_type(n) == T_INT);
22255   match(Set dst (RoundVF src));
22256   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22257   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22258   ins_encode %{
22259     int vlen_enc = vector_length_encoding(this);
22260     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22261     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22262                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22263                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22264   %}
22265   ins_pipe( pipe_slow );
22266 %}
22267 
22268 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22269   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22270   match(Set dst (RoundVD src));
22271   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
22272   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22273   ins_encode %{
22274     int vlen_enc = vector_length_encoding(this);
22275     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22276     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22277                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22278                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22279   %}
22280   ins_pipe( pipe_slow );
22281 %}
22282 
22283 // --------------------------------- VectorMaskCmp --------------------------------------
22284 
22285 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22286   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22287             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
22288             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22289             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22290   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22291   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22292   ins_encode %{
22293     int vlen_enc = vector_length_encoding(this, $src1);
22294     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22295     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22296       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22297     } else {
22298       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22299     }
22300   %}
22301   ins_pipe( pipe_slow );
22302 %}
22303 
22304 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22305   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22306             n->bottom_type()->isa_vectmask() == nullptr &&
22307             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22308   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22309   effect(TEMP ktmp);
22310   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22311   ins_encode %{
22312     int vlen_enc = Assembler::AVX_512bit;
22313     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22314     KRegister mask = k0; // The comparison itself is not being masked.
22315     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22316       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22317       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22318     } else {
22319       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22320       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22321     }
22322   %}
22323   ins_pipe( pipe_slow );
22324 %}
22325 
22326 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22327   predicate(n->bottom_type()->isa_vectmask() &&
22328             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22329   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22330   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22331   ins_encode %{
22332     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22333     int vlen_enc = vector_length_encoding(this, $src1);
22334     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22335     KRegister mask = k0; // The comparison itself is not being masked.
22336     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22337       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22338     } else {
22339       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22340     }
22341   %}
22342   ins_pipe( pipe_slow );
22343 %}
22344 
22345 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22346   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22347             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22348             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22349             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22350             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22351             (n->in(2)->get_int() == BoolTest::eq ||
22352              n->in(2)->get_int() == BoolTest::lt ||
22353              n->in(2)->get_int() == BoolTest::gt)); // cond
22354   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22355   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22356   ins_encode %{
22357     int vlen_enc = vector_length_encoding(this, $src1);
22358     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22359     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22360     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22361   %}
22362   ins_pipe( pipe_slow );
22363 %}
22364 
22365 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22366   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22367             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22368             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22369             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22370             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22371             (n->in(2)->get_int() == BoolTest::ne ||
22372              n->in(2)->get_int() == BoolTest::le ||
22373              n->in(2)->get_int() == BoolTest::ge)); // cond
22374   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22375   effect(TEMP dst, TEMP xtmp);
22376   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22377   ins_encode %{
22378     int vlen_enc = vector_length_encoding(this, $src1);
22379     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22380     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22381     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22382   %}
22383   ins_pipe( pipe_slow );
22384 %}
22385 
22386 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22387   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22388             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22389             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22390             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22391             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22392   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22393   effect(TEMP dst, TEMP xtmp);
22394   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22395   ins_encode %{
22396     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22397     int vlen_enc = vector_length_encoding(this, $src1);
22398     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22399     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22400 
22401     if (vlen_enc == Assembler::AVX_128bit) {
22402       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22403     } else {
22404       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22405     }
22406     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22407     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22408     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22409   %}
22410   ins_pipe( pipe_slow );
22411 %}
22412 
22413 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22414   predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22415              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22416              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22417   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22418   effect(TEMP ktmp);
22419   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22420   ins_encode %{
22421     assert(UseAVX > 2, "required");
22422 
22423     int vlen_enc = vector_length_encoding(this, $src1);
22424     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22425     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22426     KRegister mask = k0; // The comparison itself is not being masked.
22427     bool merge = false;
22428     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22429 
22430     switch (src1_elem_bt) {
22431       case T_INT: {
22432         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22433         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22434         break;
22435       }
22436       case T_LONG: {
22437         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22438         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22439         break;
22440       }
22441       default: assert(false, "%s", type2name(src1_elem_bt));
22442     }
22443   %}
22444   ins_pipe( pipe_slow );
22445 %}
22446 
22447 
22448 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22449   predicate(n->bottom_type()->isa_vectmask() &&
22450             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22451   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22452   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22453   ins_encode %{
22454     assert(UseAVX > 2, "required");
22455     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22456 
22457     int vlen_enc = vector_length_encoding(this, $src1);
22458     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22459     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22460     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22461 
22462     // Comparison i
22463     switch (src1_elem_bt) {
22464       case T_BYTE: {
22465         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22466         break;
22467       }
22468       case T_SHORT: {
22469         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22470         break;
22471       }
22472       case T_INT: {
22473         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22474         break;
22475       }
22476       case T_LONG: {
22477         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22478         break;
22479       }
22480       default: assert(false, "%s", type2name(src1_elem_bt));
22481     }
22482   %}
22483   ins_pipe( pipe_slow );
22484 %}
22485 
22486 // Extract
22487 
22488 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22489   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22490   match(Set dst (ExtractI src idx));
22491   match(Set dst (ExtractS src idx));
22492   match(Set dst (ExtractB src idx));
22493   format %{ "extractI $dst,$src,$idx\t!" %}
22494   ins_encode %{
22495     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22496 
22497     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22498     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22499   %}
22500   ins_pipe( pipe_slow );
22501 %}
22502 
22503 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22504   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22505             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
22506   match(Set dst (ExtractI src idx));
22507   match(Set dst (ExtractS src idx));
22508   match(Set dst (ExtractB src idx));
22509   effect(TEMP vtmp);
22510   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22511   ins_encode %{
22512     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22513 
22514     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22515     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22516     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22517   %}
22518   ins_pipe( pipe_slow );
22519 %}
22520 
22521 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22522   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22523   match(Set dst (ExtractL src idx));
22524   format %{ "extractL $dst,$src,$idx\t!" %}
22525   ins_encode %{
22526     assert(UseSSE >= 4, "required");
22527     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22528 
22529     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22530   %}
22531   ins_pipe( pipe_slow );
22532 %}
22533 
22534 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22535   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22536             Matcher::vector_length(n->in(1)) == 8);  // src
22537   match(Set dst (ExtractL src idx));
22538   effect(TEMP vtmp);
22539   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22540   ins_encode %{
22541     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22542 
22543     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22544     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22545   %}
22546   ins_pipe( pipe_slow );
22547 %}
22548 
22549 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22550   predicate(Matcher::vector_length(n->in(1)) <= 4);
22551   match(Set dst (ExtractF src idx));
22552   effect(TEMP dst, TEMP vtmp);
22553   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22554   ins_encode %{
22555     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22556 
22557     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22558   %}
22559   ins_pipe( pipe_slow );
22560 %}
22561 
22562 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22563   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22564             Matcher::vector_length(n->in(1)/*src*/) == 16);
22565   match(Set dst (ExtractF src idx));
22566   effect(TEMP vtmp);
22567   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22568   ins_encode %{
22569     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22570 
22571     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22572     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22573   %}
22574   ins_pipe( pipe_slow );
22575 %}
22576 
22577 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22578   predicate(Matcher::vector_length(n->in(1)) == 2); // src
22579   match(Set dst (ExtractD src idx));
22580   format %{ "extractD $dst,$src,$idx\t!" %}
22581   ins_encode %{
22582     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22583 
22584     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22585   %}
22586   ins_pipe( pipe_slow );
22587 %}
22588 
22589 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22590   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22591             Matcher::vector_length(n->in(1)) == 8);  // src
22592   match(Set dst (ExtractD src idx));
22593   effect(TEMP vtmp);
22594   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22595   ins_encode %{
22596     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22597 
22598     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22599     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22600   %}
22601   ins_pipe( pipe_slow );
22602 %}
22603 
22604 // --------------------------------- Vector Blend --------------------------------------
22605 
22606 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22607   predicate(UseAVX == 0);
22608   match(Set dst (VectorBlend (Binary dst src) mask));
22609   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
22610   effect(TEMP tmp);
22611   ins_encode %{
22612     assert(UseSSE >= 4, "required");
22613 
22614     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22615       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22616     }
22617     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22618   %}
22619   ins_pipe( pipe_slow );
22620 %}
22621 
22622 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22623   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22624             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22625             Matcher::vector_length_in_bytes(n) <= 32 &&
22626             is_integral_type(Matcher::vector_element_basic_type(n)));
22627   match(Set dst (VectorBlend (Binary src1 src2) mask));
22628   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22629   ins_encode %{
22630     int vlen_enc = vector_length_encoding(this);
22631     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22632   %}
22633   ins_pipe( pipe_slow );
22634 %}
22635 
22636 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22637   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22638             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22639             Matcher::vector_length_in_bytes(n) <= 32 &&
22640             !is_integral_type(Matcher::vector_element_basic_type(n)));
22641   match(Set dst (VectorBlend (Binary src1 src2) mask));
22642   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22643   ins_encode %{
22644     int vlen_enc = vector_length_encoding(this);
22645     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22646   %}
22647   ins_pipe( pipe_slow );
22648 %}
22649 
22650 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22651   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22652             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22653             Matcher::vector_length_in_bytes(n) <= 32);
22654   match(Set dst (VectorBlend (Binary src1 src2) mask));
22655   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22656   effect(TEMP vtmp, TEMP dst);
22657   ins_encode %{
22658     int vlen_enc = vector_length_encoding(this);
22659     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22660     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22661     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
22662   %}
22663   ins_pipe( pipe_slow );
22664 %}
22665 
22666 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22667   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22668             n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22669   match(Set dst (VectorBlend (Binary src1 src2) mask));
22670   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22671   effect(TEMP ktmp);
22672   ins_encode %{
22673      int vlen_enc = Assembler::AVX_512bit;
22674      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22675     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22676     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22677   %}
22678   ins_pipe( pipe_slow );
22679 %}
22680 
22681 
22682 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22683   predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22684             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22685              VM_Version::supports_avx512bw()));
22686   match(Set dst (VectorBlend (Binary src1 src2) mask));
22687   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22688   ins_encode %{
22689     int vlen_enc = vector_length_encoding(this);
22690     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22691     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22692   %}
22693   ins_pipe( pipe_slow );
22694 %}
22695 
22696 // --------------------------------- ABS --------------------------------------
22697 // a = |a|
22698 instruct vabsB_reg(vec dst, vec src) %{
22699   match(Set dst (AbsVB  src));
22700   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22701   ins_encode %{
22702     uint vlen = Matcher::vector_length(this);
22703     if (vlen <= 16) {
22704       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22705     } else {
22706       int vlen_enc = vector_length_encoding(this);
22707       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22708     }
22709   %}
22710   ins_pipe( pipe_slow );
22711 %}
22712 
22713 instruct vabsS_reg(vec dst, vec src) %{
22714   match(Set dst (AbsVS  src));
22715   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22716   ins_encode %{
22717     uint vlen = Matcher::vector_length(this);
22718     if (vlen <= 8) {
22719       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22720     } else {
22721       int vlen_enc = vector_length_encoding(this);
22722       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22723     }
22724   %}
22725   ins_pipe( pipe_slow );
22726 %}
22727 
22728 instruct vabsI_reg(vec dst, vec src) %{
22729   match(Set dst (AbsVI  src));
22730   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22731   ins_encode %{
22732     uint vlen = Matcher::vector_length(this);
22733     if (vlen <= 4) {
22734       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22735     } else {
22736       int vlen_enc = vector_length_encoding(this);
22737       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22738     }
22739   %}
22740   ins_pipe( pipe_slow );
22741 %}
22742 
22743 instruct vabsL_reg(vec dst, vec src) %{
22744   match(Set dst (AbsVL  src));
22745   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22746   ins_encode %{
22747     assert(UseAVX > 2, "required");
22748     int vlen_enc = vector_length_encoding(this);
22749     if (!VM_Version::supports_avx512vl()) {
22750       vlen_enc = Assembler::AVX_512bit;
22751     }
22752     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22753   %}
22754   ins_pipe( pipe_slow );
22755 %}
22756 
22757 // --------------------------------- ABSNEG --------------------------------------
22758 
22759 instruct vabsnegF(vec dst, vec src) %{
22760   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22761   match(Set dst (AbsVF src));
22762   match(Set dst (NegVF src));
22763   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22764   ins_cost(150);
22765   ins_encode %{
22766     int opcode = this->ideal_Opcode();
22767     int vlen = Matcher::vector_length(this);
22768     if (vlen == 2) {
22769       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22770     } else {
22771       assert(vlen == 8 || vlen == 16, "required");
22772       int vlen_enc = vector_length_encoding(this);
22773       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22774     }
22775   %}
22776   ins_pipe( pipe_slow );
22777 %}
22778 
22779 instruct vabsneg4F(vec dst) %{
22780   predicate(Matcher::vector_length(n) == 4);
22781   match(Set dst (AbsVF dst));
22782   match(Set dst (NegVF dst));
22783   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22784   ins_cost(150);
22785   ins_encode %{
22786     int opcode = this->ideal_Opcode();
22787     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22788   %}
22789   ins_pipe( pipe_slow );
22790 %}
22791 
22792 instruct vabsnegD(vec dst, vec src) %{
22793   match(Set dst (AbsVD  src));
22794   match(Set dst (NegVD  src));
22795   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22796   ins_encode %{
22797     int opcode = this->ideal_Opcode();
22798     uint vlen = Matcher::vector_length(this);
22799     if (vlen == 2) {
22800       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22801     } else {
22802       int vlen_enc = vector_length_encoding(this);
22803       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22804     }
22805   %}
22806   ins_pipe( pipe_slow );
22807 %}
22808 
22809 //------------------------------------- VectorTest --------------------------------------------
22810 
22811 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22812   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22813   match(Set cr (VectorTest src1 src2));
22814   effect(TEMP vtmp);
22815   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
22816   ins_encode %{
22817     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22818     int vlen = Matcher::vector_length_in_bytes(this, $src1);
22819     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22820   %}
22821   ins_pipe( pipe_slow );
22822 %}
22823 
22824 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
22825   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
22826   match(Set cr (VectorTest src1 src2));
22827   format %{ "vptest_ge16  $src1, $src2\n\t" %}
22828   ins_encode %{
22829     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22830     int vlen = Matcher::vector_length_in_bytes(this, $src1);
22831     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
22832   %}
22833   ins_pipe( pipe_slow );
22834 %}
22835 
22836 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22837   predicate((Matcher::vector_length(n->in(1)) < 8 ||
22838              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22839             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
22840   match(Set cr (VectorTest src1 src2));
22841   effect(TEMP tmp);
22842   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
22843   ins_encode %{
22844     uint masklen = Matcher::vector_length(this, $src1);
22845     __ kmovwl($tmp$$Register, $src1$$KRegister);
22846     __ andl($tmp$$Register, (1 << masklen) - 1);
22847     __ cmpl($tmp$$Register, (1 << masklen) - 1);
22848   %}
22849   ins_pipe( pipe_slow );
22850 %}
22851 
22852 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22853   predicate((Matcher::vector_length(n->in(1)) < 8 ||
22854              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22855             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
22856   match(Set cr (VectorTest src1 src2));
22857   effect(TEMP tmp);
22858   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
22859   ins_encode %{
22860     uint masklen = Matcher::vector_length(this, $src1);
22861     __ kmovwl($tmp$$Register, $src1$$KRegister);
22862     __ andl($tmp$$Register, (1 << masklen) - 1);
22863   %}
22864   ins_pipe( pipe_slow );
22865 %}
22866 
22867 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
22868   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
22869             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
22870   match(Set cr (VectorTest src1 src2));
22871   format %{ "ktest_ge8  $src1, $src2\n\t" %}
22872   ins_encode %{
22873     uint masklen = Matcher::vector_length(this, $src1);
22874     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
22875   %}
22876   ins_pipe( pipe_slow );
22877 %}
22878 
22879 //------------------------------------- LoadMask --------------------------------------------
22880 
22881 instruct loadMask(legVec dst, legVec src) %{
22882   predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
22883   match(Set dst (VectorLoadMask src));
22884   effect(TEMP dst);
22885   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
22886   ins_encode %{
22887     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22888     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22889     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
22890   %}
22891   ins_pipe( pipe_slow );
22892 %}
22893 
22894 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
22895   predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
22896   match(Set dst (VectorLoadMask src));
22897   effect(TEMP xtmp);
22898   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
22899   ins_encode %{
22900     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22901                         true, Assembler::AVX_512bit);
22902   %}
22903   ins_pipe( pipe_slow );
22904 %}
22905 
22906 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
22907   predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
22908   match(Set dst (VectorLoadMask src));
22909   effect(TEMP xtmp);
22910   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
22911   ins_encode %{
22912     int vlen_enc = vector_length_encoding(in(1));
22913     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22914                         false, vlen_enc);
22915   %}
22916   ins_pipe( pipe_slow );
22917 %}
22918 
22919 //------------------------------------- StoreMask --------------------------------------------
22920 
22921 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
22922   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22923   match(Set dst (VectorStoreMask src size));
22924   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22925   ins_encode %{
22926     int vlen = Matcher::vector_length(this);
22927     if (vlen <= 16 && UseAVX <= 2) {
22928       assert(UseSSE >= 3, "required");
22929       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22930     } else {
22931       assert(UseAVX > 0, "required");
22932       int src_vlen_enc = vector_length_encoding(this, $src);
22933       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22934     }
22935   %}
22936   ins_pipe( pipe_slow );
22937 %}
22938 
22939 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
22940   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22941   match(Set dst (VectorStoreMask src size));
22942   effect(TEMP_DEF dst, TEMP xtmp);
22943   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22944   ins_encode %{
22945     int vlen_enc = Assembler::AVX_128bit;
22946     int vlen = Matcher::vector_length(this);
22947     if (vlen <= 8) {
22948       assert(UseSSE >= 3, "required");
22949       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22950       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22951       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22952     } else {
22953       assert(UseAVX > 0, "required");
22954       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22955       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22956       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22957     }
22958   %}
22959   ins_pipe( pipe_slow );
22960 %}
22961 
22962 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
22963   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22964   match(Set dst (VectorStoreMask src size));
22965   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22966   effect(TEMP_DEF dst, TEMP xtmp);
22967   ins_encode %{
22968     int vlen_enc = Assembler::AVX_128bit;
22969     int vlen = Matcher::vector_length(this);
22970     if (vlen <= 4) {
22971       assert(UseSSE >= 3, "required");
22972       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22973       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22974       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22975       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22976     } else {
22977       assert(UseAVX > 0, "required");
22978       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22979       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22980       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22981       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22982       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22983     }
22984   %}
22985   ins_pipe( pipe_slow );
22986 %}
22987 
22988 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
22989   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
22990   match(Set dst (VectorStoreMask src size));
22991   effect(TEMP_DEF dst, TEMP xtmp);
22992   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22993   ins_encode %{
22994     assert(UseSSE >= 3, "required");
22995     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22996     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
22997     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
22998     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22999     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23000   %}
23001   ins_pipe( pipe_slow );
23002 %}
23003 
23004 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23005   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23006   match(Set dst (VectorStoreMask src size));
23007   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23008   effect(TEMP_DEF dst, TEMP vtmp);
23009   ins_encode %{
23010     int vlen_enc = Assembler::AVX_128bit;
23011     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23012     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23013     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23014     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23015     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23016     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23017     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23018   %}
23019   ins_pipe( pipe_slow );
23020 %}
23021 
23022 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23023   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23024   match(Set dst (VectorStoreMask src size));
23025   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23026   ins_encode %{
23027     int src_vlen_enc = vector_length_encoding(this, $src);
23028     int dst_vlen_enc = vector_length_encoding(this);
23029     if (!VM_Version::supports_avx512vl()) {
23030       src_vlen_enc = Assembler::AVX_512bit;
23031     }
23032     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23033     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23034   %}
23035   ins_pipe( pipe_slow );
23036 %}
23037 
23038 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23039   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23040   match(Set dst (VectorStoreMask src size));
23041   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23042   ins_encode %{
23043     int src_vlen_enc = vector_length_encoding(this, $src);
23044     int dst_vlen_enc = vector_length_encoding(this);
23045     if (!VM_Version::supports_avx512vl()) {
23046       src_vlen_enc = Assembler::AVX_512bit;
23047     }
23048     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23049     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23050   %}
23051   ins_pipe( pipe_slow );
23052 %}
23053 
23054 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23055   predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23056   match(Set dst (VectorStoreMask mask size));
23057   effect(TEMP_DEF dst);
23058   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23059   ins_encode %{
23060     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23061     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23062                  false, Assembler::AVX_512bit, noreg);
23063     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23064   %}
23065   ins_pipe( pipe_slow );
23066 %}
23067 
23068 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23069   predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23070   match(Set dst (VectorStoreMask mask size));
23071   effect(TEMP_DEF dst);
23072   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23073   ins_encode %{
23074     int dst_vlen_enc = vector_length_encoding(this);
23075     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23076     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23077   %}
23078   ins_pipe( pipe_slow );
23079 %}
23080 
23081 instruct vmaskcast_evex(kReg dst) %{
23082   match(Set dst (VectorMaskCast dst));
23083   ins_cost(0);
23084   format %{ "vector_mask_cast $dst" %}
23085   ins_encode %{
23086     // empty
23087   %}
23088   ins_pipe(empty);
23089 %}
23090 
23091 instruct vmaskcast(vec dst) %{
23092   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23093   match(Set dst (VectorMaskCast dst));
23094   ins_cost(0);
23095   format %{ "vector_mask_cast $dst" %}
23096   ins_encode %{
23097     // empty
23098   %}
23099   ins_pipe(empty);
23100 %}
23101 
23102 instruct vmaskcast_avx(vec dst, vec src) %{
23103   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23104   match(Set dst (VectorMaskCast src));
23105   format %{ "vector_mask_cast $dst, $src" %}
23106   ins_encode %{
23107     int vlen = Matcher::vector_length(this);
23108     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23109     BasicType dst_bt = Matcher::vector_element_basic_type(this);
23110     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23111   %}
23112   ins_pipe(pipe_slow);
23113 %}
23114 
23115 //-------------------------------- Load Iota Indices ----------------------------------
23116 
23117 instruct loadIotaIndices(vec dst, immI_0 src) %{
23118   match(Set dst (VectorLoadConst src));
23119   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23120   ins_encode %{
23121      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23122      BasicType bt = Matcher::vector_element_basic_type(this);
23123      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23124   %}
23125   ins_pipe( pipe_slow );
23126 %}
23127 
23128 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23129   match(Set dst (PopulateIndex src1 src2));
23130   effect(TEMP dst, TEMP vtmp);
23131   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23132   ins_encode %{
23133      assert($src2$$constant == 1, "required");
23134      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23135      int vlen_enc = vector_length_encoding(this);
23136      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23137      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23138      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23139      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23140   %}
23141   ins_pipe( pipe_slow );
23142 %}
23143 
23144 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23145   match(Set dst (PopulateIndex src1 src2));
23146   effect(TEMP dst, TEMP vtmp);
23147   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23148   ins_encode %{
23149      assert($src2$$constant == 1, "required");
23150      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23151      int vlen_enc = vector_length_encoding(this);
23152      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23153      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23154      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23155      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23156   %}
23157   ins_pipe( pipe_slow );
23158 %}
23159 
23160 //-------------------------------- Rearrange ----------------------------------
23161 
23162 // LoadShuffle/Rearrange for Byte
23163 instruct rearrangeB(vec dst, vec shuffle) %{
23164   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23165             Matcher::vector_length(n) < 32);
23166   match(Set dst (VectorRearrange dst shuffle));
23167   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23168   ins_encode %{
23169     assert(UseSSE >= 4, "required");
23170     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23171   %}
23172   ins_pipe( pipe_slow );
23173 %}
23174 
23175 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23176   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23177             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23178   match(Set dst (VectorRearrange src shuffle));
23179   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23180   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23181   ins_encode %{
23182     assert(UseAVX >= 2, "required");
23183     // Swap src into vtmp1
23184     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23185     // Shuffle swapped src to get entries from other 128 bit lane
23186     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23187     // Shuffle original src to get entries from self 128 bit lane
23188     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23189     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23190     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23191     // Perform the blend
23192     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23193   %}
23194   ins_pipe( pipe_slow );
23195 %}
23196 
23197 
23198 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23199   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23200             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23201   match(Set dst (VectorRearrange src shuffle));
23202   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23203   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23204   ins_encode %{
23205     int vlen_enc = vector_length_encoding(this);
23206     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23207                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23208                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23209   %}
23210   ins_pipe( pipe_slow );
23211 %}
23212 
23213 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23214   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23215             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23216   match(Set dst (VectorRearrange src shuffle));
23217   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23218   ins_encode %{
23219     int vlen_enc = vector_length_encoding(this);
23220     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23221   %}
23222   ins_pipe( pipe_slow );
23223 %}
23224 
23225 // LoadShuffle/Rearrange for Short
23226 
23227 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23228   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23229             !VM_Version::supports_avx512bw());
23230   match(Set dst (VectorLoadShuffle src));
23231   effect(TEMP dst, TEMP vtmp);
23232   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23233   ins_encode %{
23234     // Create a byte shuffle mask from short shuffle mask
23235     // only byte shuffle instruction available on these platforms
23236     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23237     if (UseAVX == 0) {
23238       assert(vlen_in_bytes <= 16, "required");
23239       // Multiply each shuffle by two to get byte index
23240       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23241       __ psllw($vtmp$$XMMRegister, 1);
23242 
23243       // Duplicate to create 2 copies of byte index
23244       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23245       __ psllw($dst$$XMMRegister, 8);
23246       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23247 
23248       // Add one to get alternate byte index
23249       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23250       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23251     } else {
23252       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23253       int vlen_enc = vector_length_encoding(this);
23254       // Multiply each shuffle by two to get byte index
23255       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23256 
23257       // Duplicate to create 2 copies of byte index
23258       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
23259       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23260 
23261       // Add one to get alternate byte index
23262       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23263     }
23264   %}
23265   ins_pipe( pipe_slow );
23266 %}
23267 
23268 instruct rearrangeS(vec dst, vec shuffle) %{
23269   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23270             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23271   match(Set dst (VectorRearrange dst shuffle));
23272   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23273   ins_encode %{
23274     assert(UseSSE >= 4, "required");
23275     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23276   %}
23277   ins_pipe( pipe_slow );
23278 %}
23279 
23280 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23281   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23282             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23283   match(Set dst (VectorRearrange src shuffle));
23284   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23285   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23286   ins_encode %{
23287     assert(UseAVX >= 2, "required");
23288     // Swap src into vtmp1
23289     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23290     // Shuffle swapped src to get entries from other 128 bit lane
23291     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23292     // Shuffle original src to get entries from self 128 bit lane
23293     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23294     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23295     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23296     // Perform the blend
23297     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23298   %}
23299   ins_pipe( pipe_slow );
23300 %}
23301 
23302 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23303   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23304             VM_Version::supports_avx512bw());
23305   match(Set dst (VectorRearrange src shuffle));
23306   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23307   ins_encode %{
23308     int vlen_enc = vector_length_encoding(this);
23309     if (!VM_Version::supports_avx512vl()) {
23310       vlen_enc = Assembler::AVX_512bit;
23311     }
23312     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23313   %}
23314   ins_pipe( pipe_slow );
23315 %}
23316 
23317 // LoadShuffle/Rearrange for Integer and Float
23318 
23319 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23320   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23321             Matcher::vector_length(n) == 4 && UseAVX == 0);
23322   match(Set dst (VectorLoadShuffle src));
23323   effect(TEMP dst, TEMP vtmp);
23324   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23325   ins_encode %{
23326     assert(UseSSE >= 4, "required");
23327 
23328     // Create a byte shuffle mask from int shuffle mask
23329     // only byte shuffle instruction available on these platforms
23330 
23331     // Duplicate and multiply each shuffle by 4
23332     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23333     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23334     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23335     __ psllw($vtmp$$XMMRegister, 2);
23336 
23337     // Duplicate again to create 4 copies of byte index
23338     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23339     __ psllw($dst$$XMMRegister, 8);
23340     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23341 
23342     // Add 3,2,1,0 to get alternate byte index
23343     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23344     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23345   %}
23346   ins_pipe( pipe_slow );
23347 %}
23348 
23349 instruct rearrangeI(vec dst, vec shuffle) %{
23350   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23351             UseAVX == 0);
23352   match(Set dst (VectorRearrange dst shuffle));
23353   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23354   ins_encode %{
23355     assert(UseSSE >= 4, "required");
23356     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23357   %}
23358   ins_pipe( pipe_slow );
23359 %}
23360 
23361 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23362   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23363             UseAVX > 0);
23364   match(Set dst (VectorRearrange src shuffle));
23365   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23366   ins_encode %{
23367     int vlen_enc = vector_length_encoding(this);
23368     BasicType bt = Matcher::vector_element_basic_type(this);
23369     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23370   %}
23371   ins_pipe( pipe_slow );
23372 %}
23373 
23374 // LoadShuffle/Rearrange for Long and Double
23375 
23376 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23377   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23378             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23379   match(Set dst (VectorLoadShuffle src));
23380   effect(TEMP dst, TEMP vtmp);
23381   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23382   ins_encode %{
23383     assert(UseAVX >= 2, "required");
23384 
23385     int vlen_enc = vector_length_encoding(this);
23386     // Create a double word shuffle mask from long shuffle mask
23387     // only double word shuffle instruction available on these platforms
23388 
23389     // Multiply each shuffle by two to get double word index
23390     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23391 
23392     // Duplicate each double word shuffle
23393     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23394     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23395 
23396     // Add one to get alternate double word index
23397     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23398   %}
23399   ins_pipe( pipe_slow );
23400 %}
23401 
23402 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23403   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23404             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23405   match(Set dst (VectorRearrange src shuffle));
23406   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23407   ins_encode %{
23408     assert(UseAVX >= 2, "required");
23409 
23410     int vlen_enc = vector_length_encoding(this);
23411     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23412   %}
23413   ins_pipe( pipe_slow );
23414 %}
23415 
23416 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23417   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23418             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23419   match(Set dst (VectorRearrange src shuffle));
23420   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23421   ins_encode %{
23422     assert(UseAVX > 2, "required");
23423 
23424     int vlen_enc = vector_length_encoding(this);
23425     if (vlen_enc == Assembler::AVX_128bit) {
23426       vlen_enc = Assembler::AVX_256bit;
23427     }
23428     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23429   %}
23430   ins_pipe( pipe_slow );
23431 %}
23432 
23433 // --------------------------------- FMA --------------------------------------
23434 // a * b + c
23435 
23436 instruct vfmaF_reg(vec a, vec b, vec c) %{
23437   match(Set c (FmaVF  c (Binary a b)));
23438   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23439   ins_cost(150);
23440   ins_encode %{
23441     assert(UseFMA, "not enabled");
23442     int vlen_enc = vector_length_encoding(this);
23443     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23444   %}
23445   ins_pipe( pipe_slow );
23446 %}
23447 
23448 instruct vfmaF_mem(vec a, memory b, vec c) %{
23449   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23450   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23451   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23452   ins_cost(150);
23453   ins_encode %{
23454     assert(UseFMA, "not enabled");
23455     int vlen_enc = vector_length_encoding(this);
23456     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23457   %}
23458   ins_pipe( pipe_slow );
23459 %}
23460 
23461 instruct vfmaD_reg(vec a, vec b, vec c) %{
23462   match(Set c (FmaVD  c (Binary a b)));
23463   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23464   ins_cost(150);
23465   ins_encode %{
23466     assert(UseFMA, "not enabled");
23467     int vlen_enc = vector_length_encoding(this);
23468     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23469   %}
23470   ins_pipe( pipe_slow );
23471 %}
23472 
23473 instruct vfmaD_mem(vec a, memory b, vec c) %{
23474   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23475   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23476   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23477   ins_cost(150);
23478   ins_encode %{
23479     assert(UseFMA, "not enabled");
23480     int vlen_enc = vector_length_encoding(this);
23481     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23482   %}
23483   ins_pipe( pipe_slow );
23484 %}
23485 
23486 // --------------------------------- Vector Multiply Add --------------------------------------
23487 
23488 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23489   predicate(UseAVX == 0);
23490   match(Set dst (MulAddVS2VI dst src1));
23491   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23492   ins_encode %{
23493     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23494   %}
23495   ins_pipe( pipe_slow );
23496 %}
23497 
23498 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23499   predicate(UseAVX > 0);
23500   match(Set dst (MulAddVS2VI src1 src2));
23501   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23502   ins_encode %{
23503     int vlen_enc = vector_length_encoding(this);
23504     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23505   %}
23506   ins_pipe( pipe_slow );
23507 %}
23508 
23509 // --------------------------------- Vector Multiply Add Add ----------------------------------
23510 
23511 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23512   predicate(VM_Version::supports_avx512_vnni());
23513   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23514   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23515   ins_encode %{
23516     assert(UseAVX > 2, "required");
23517     int vlen_enc = vector_length_encoding(this);
23518     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23519   %}
23520   ins_pipe( pipe_slow );
23521   ins_cost(10);
23522 %}
23523 
23524 // --------------------------------- PopCount --------------------------------------
23525 
23526 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23527   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23528   match(Set dst (PopCountVI src));
23529   match(Set dst (PopCountVL src));
23530   format %{ "vector_popcount_integral $dst, $src" %}
23531   ins_encode %{
23532     int opcode = this->ideal_Opcode();
23533     int vlen_enc = vector_length_encoding(this, $src);
23534     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23535     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23536   %}
23537   ins_pipe( pipe_slow );
23538 %}
23539 
23540 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23541   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23542   match(Set dst (PopCountVI src mask));
23543   match(Set dst (PopCountVL src mask));
23544   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23545   ins_encode %{
23546     int vlen_enc = vector_length_encoding(this, $src);
23547     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23548     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23549     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23550   %}
23551   ins_pipe( pipe_slow );
23552 %}
23553 
23554 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23555   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23556   match(Set dst (PopCountVI src));
23557   match(Set dst (PopCountVL src));
23558   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23559   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23560   ins_encode %{
23561     int opcode = this->ideal_Opcode();
23562     int vlen_enc = vector_length_encoding(this, $src);
23563     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23564     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23565                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23566   %}
23567   ins_pipe( pipe_slow );
23568 %}
23569 
23570 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23571 
23572 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23573   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23574                                               Matcher::vector_length_in_bytes(n->in(1))));
23575   match(Set dst (CountTrailingZerosV src));
23576   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23577   ins_cost(400);
23578   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23579   ins_encode %{
23580     int vlen_enc = vector_length_encoding(this, $src);
23581     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23582     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23583                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23584   %}
23585   ins_pipe( pipe_slow );
23586 %}
23587 
23588 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23589   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23590             VM_Version::supports_avx512cd() &&
23591             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23592   match(Set dst (CountTrailingZerosV src));
23593   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23594   ins_cost(400);
23595   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23596   ins_encode %{
23597     int vlen_enc = vector_length_encoding(this, $src);
23598     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23599     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23600                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23601   %}
23602   ins_pipe( pipe_slow );
23603 %}
23604 
23605 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23606   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23607   match(Set dst (CountTrailingZerosV src));
23608   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23609   ins_cost(400);
23610   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23611   ins_encode %{
23612     int vlen_enc = vector_length_encoding(this, $src);
23613     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23614     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23615                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23616                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23617   %}
23618   ins_pipe( pipe_slow );
23619 %}
23620 
23621 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23622   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23623   match(Set dst (CountTrailingZerosV src));
23624   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23625   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23626   ins_encode %{
23627     int vlen_enc = vector_length_encoding(this, $src);
23628     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23629     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23630                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23631   %}
23632   ins_pipe( pipe_slow );
23633 %}
23634 
23635 
23636 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23637 
23638 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23639   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23640   effect(TEMP dst);
23641   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23642   ins_encode %{
23643     int vector_len = vector_length_encoding(this);
23644     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23645   %}
23646   ins_pipe( pipe_slow );
23647 %}
23648 
23649 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23650   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23651   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23652   effect(TEMP dst);
23653   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23654   ins_encode %{
23655     int vector_len = vector_length_encoding(this);
23656     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23657   %}
23658   ins_pipe( pipe_slow );
23659 %}
23660 
23661 // --------------------------------- Rotation Operations ----------------------------------
23662 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23663   match(Set dst (RotateLeftV src shift));
23664   match(Set dst (RotateRightV src shift));
23665   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23666   ins_encode %{
23667     int opcode      = this->ideal_Opcode();
23668     int vector_len  = vector_length_encoding(this);
23669     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23670     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23671   %}
23672   ins_pipe( pipe_slow );
23673 %}
23674 
23675 instruct vprorate(vec dst, vec src, vec shift) %{
23676   match(Set dst (RotateLeftV src shift));
23677   match(Set dst (RotateRightV src shift));
23678   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23679   ins_encode %{
23680     int opcode      = this->ideal_Opcode();
23681     int vector_len  = vector_length_encoding(this);
23682     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23683     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23684   %}
23685   ins_pipe( pipe_slow );
23686 %}
23687 
23688 // ---------------------------------- Masked Operations ------------------------------------
23689 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23690   predicate(!n->in(3)->bottom_type()->isa_vectmask());
23691   match(Set dst (LoadVectorMasked mem mask));
23692   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23693   ins_encode %{
23694     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23695     int vlen_enc = vector_length_encoding(this);
23696     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23697   %}
23698   ins_pipe( pipe_slow );
23699 %}
23700 
23701 
23702 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23703   predicate(n->in(3)->bottom_type()->isa_vectmask());
23704   match(Set dst (LoadVectorMasked mem mask));
23705   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23706   ins_encode %{
23707     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
23708     int vector_len = vector_length_encoding(this);
23709     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23710   %}
23711   ins_pipe( pipe_slow );
23712 %}
23713 
23714 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23715   predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23716   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23717   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23718   ins_encode %{
23719     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23720     int vlen_enc = vector_length_encoding(src_node);
23721     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23722     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23723   %}
23724   ins_pipe( pipe_slow );
23725 %}
23726 
23727 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23728   predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23729   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23730   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23731   ins_encode %{
23732     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23733     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23734     int vlen_enc = vector_length_encoding(src_node);
23735     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23736   %}
23737   ins_pipe( pipe_slow );
23738 %}
23739 
23740 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23741   match(Set addr (VerifyVectorAlignment addr mask));
23742   effect(KILL cr);
23743   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23744   ins_encode %{
23745     Label Lskip;
23746     // check if masked bits of addr are zero
23747     __ testq($addr$$Register, $mask$$constant);
23748     __ jccb(Assembler::equal, Lskip);
23749     __ stop("verify_vector_alignment found a misaligned vector memory access");
23750     __ bind(Lskip);
23751   %}
23752   ins_pipe(pipe_slow);
23753 %}
23754 
23755 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23756   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23757   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23758   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23759   ins_encode %{
23760     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23761     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23762 
23763     Label DONE;
23764     int vlen_enc = vector_length_encoding(this, $src1);
23765     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23766 
23767     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23768     __ mov64($dst$$Register, -1L);
23769     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23770     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23771     __ jccb(Assembler::carrySet, DONE);
23772     __ kmovql($dst$$Register, $ktmp1$$KRegister);
23773     __ notq($dst$$Register);
23774     __ tzcntq($dst$$Register, $dst$$Register);
23775     __ bind(DONE);
23776   %}
23777   ins_pipe( pipe_slow );
23778 %}
23779 
23780 
23781 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23782   match(Set dst (VectorMaskGen len));
23783   effect(TEMP temp, KILL cr);
23784   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23785   ins_encode %{
23786     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23787   %}
23788   ins_pipe( pipe_slow );
23789 %}
23790 
23791 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23792   match(Set dst (VectorMaskGen len));
23793   format %{ "vector_mask_gen $len \t! vector mask generator" %}
23794   effect(TEMP temp);
23795   ins_encode %{
23796     __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
23797     __ kmovql($dst$$KRegister, $temp$$Register);
23798   %}
23799   ins_pipe( pipe_slow );
23800 %}
23801 
23802 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23803   predicate(n->in(1)->bottom_type()->isa_vectmask());
23804   match(Set dst (VectorMaskToLong mask));
23805   effect(TEMP dst, KILL cr);
23806   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23807   ins_encode %{
23808     int opcode = this->ideal_Opcode();
23809     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23810     int mask_len = Matcher::vector_length(this, $mask);
23811     int mask_size = mask_len * type2aelembytes(mbt);
23812     int vlen_enc = vector_length_encoding(this, $mask);
23813     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23814                              $dst$$Register, mask_len, mask_size, vlen_enc);
23815   %}
23816   ins_pipe( pipe_slow );
23817 %}
23818 
23819 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23820   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23821   match(Set dst (VectorMaskToLong mask));
23822   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23823   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23824   ins_encode %{
23825     int opcode = this->ideal_Opcode();
23826     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23827     int mask_len = Matcher::vector_length(this, $mask);
23828     int vlen_enc = vector_length_encoding(this, $mask);
23829     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23830                              $dst$$Register, mask_len, mbt, vlen_enc);
23831   %}
23832   ins_pipe( pipe_slow );
23833 %}
23834 
23835 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
23836   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23837   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
23838   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
23839   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23840   ins_encode %{
23841     int opcode = this->ideal_Opcode();
23842     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23843     int mask_len = Matcher::vector_length(this, $mask);
23844     int vlen_enc = vector_length_encoding(this, $mask);
23845     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23846                              $dst$$Register, mask_len, mbt, vlen_enc);
23847   %}
23848   ins_pipe( pipe_slow );
23849 %}
23850 
23851 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23852   predicate(n->in(1)->bottom_type()->isa_vectmask());
23853   match(Set dst (VectorMaskTrueCount mask));
23854   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23855   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
23856   ins_encode %{
23857     int opcode = this->ideal_Opcode();
23858     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23859     int mask_len = Matcher::vector_length(this, $mask);
23860     int mask_size = mask_len * type2aelembytes(mbt);
23861     int vlen_enc = vector_length_encoding(this, $mask);
23862     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23863                              $tmp$$Register, mask_len, mask_size, vlen_enc);
23864   %}
23865   ins_pipe( pipe_slow );
23866 %}
23867 
23868 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23869   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23870   match(Set dst (VectorMaskTrueCount mask));
23871   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23872   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23873   ins_encode %{
23874     int opcode = this->ideal_Opcode();
23875     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23876     int mask_len = Matcher::vector_length(this, $mask);
23877     int vlen_enc = vector_length_encoding(this, $mask);
23878     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23879                              $tmp$$Register, mask_len, mbt, vlen_enc);
23880   %}
23881   ins_pipe( pipe_slow );
23882 %}
23883 
23884 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23885   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23886   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
23887   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23888   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23889   ins_encode %{
23890     int opcode = this->ideal_Opcode();
23891     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23892     int mask_len = Matcher::vector_length(this, $mask);
23893     int vlen_enc = vector_length_encoding(this, $mask);
23894     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23895                              $tmp$$Register, mask_len, mbt, vlen_enc);
23896   %}
23897   ins_pipe( pipe_slow );
23898 %}
23899 
23900 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23901   predicate(n->in(1)->bottom_type()->isa_vectmask());
23902   match(Set dst (VectorMaskFirstTrue mask));
23903   match(Set dst (VectorMaskLastTrue mask));
23904   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23905   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
23906   ins_encode %{
23907     int opcode = this->ideal_Opcode();
23908     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23909     int mask_len = Matcher::vector_length(this, $mask);
23910     int mask_size = mask_len * type2aelembytes(mbt);
23911     int vlen_enc = vector_length_encoding(this, $mask);
23912     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23913                              $tmp$$Register, mask_len, mask_size, vlen_enc);
23914   %}
23915   ins_pipe( pipe_slow );
23916 %}
23917 
23918 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23919   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23920   match(Set dst (VectorMaskFirstTrue mask));
23921   match(Set dst (VectorMaskLastTrue mask));
23922   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23923   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23924   ins_encode %{
23925     int opcode = this->ideal_Opcode();
23926     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23927     int mask_len = Matcher::vector_length(this, $mask);
23928     int vlen_enc = vector_length_encoding(this, $mask);
23929     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23930                              $tmp$$Register, mask_len, mbt, vlen_enc);
23931   %}
23932   ins_pipe( pipe_slow );
23933 %}
23934 
23935 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23936   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23937   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
23938   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
23939   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23940   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23941   ins_encode %{
23942     int opcode = this->ideal_Opcode();
23943     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23944     int mask_len = Matcher::vector_length(this, $mask);
23945     int vlen_enc = vector_length_encoding(this, $mask);
23946     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23947                              $tmp$$Register, mask_len, mbt, vlen_enc);
23948   %}
23949   ins_pipe( pipe_slow );
23950 %}
23951 
23952 // --------------------------------- Compress/Expand Operations ---------------------------
23953 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
23954   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
23955   match(Set dst (CompressV src mask));
23956   match(Set dst (ExpandV src mask));
23957   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
23958   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
23959   ins_encode %{
23960     int opcode = this->ideal_Opcode();
23961     int vlen_enc = vector_length_encoding(this);
23962     BasicType bt  = Matcher::vector_element_basic_type(this);
23963     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
23964                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
23965   %}
23966   ins_pipe( pipe_slow );
23967 %}
23968 
23969 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
23970   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
23971   match(Set dst (CompressV src mask));
23972   match(Set dst (ExpandV src mask));
23973   format %{ "vector_compress_expand $dst, $src, $mask" %}
23974   ins_encode %{
23975     int opcode = this->ideal_Opcode();
23976     int vector_len = vector_length_encoding(this);
23977     BasicType bt  = Matcher::vector_element_basic_type(this);
23978     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
23979   %}
23980   ins_pipe( pipe_slow );
23981 %}
23982 
23983 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
23984   match(Set dst (CompressM mask));
23985   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
23986   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
23987   ins_encode %{
23988     assert(this->in(1)->bottom_type()->isa_vectmask(), "");
23989     int mask_len = Matcher::vector_length(this);
23990     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
23991   %}
23992   ins_pipe( pipe_slow );
23993 %}
23994 
23995 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
23996 
23997 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23998   predicate(!VM_Version::supports_gfni());
23999   match(Set dst (ReverseV src));
24000   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24001   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24002   ins_encode %{
24003     int vec_enc = vector_length_encoding(this);
24004     BasicType bt = Matcher::vector_element_basic_type(this);
24005     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24006                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24007   %}
24008   ins_pipe( pipe_slow );
24009 %}
24010 
24011 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24012   predicate(VM_Version::supports_gfni());
24013   match(Set dst (ReverseV src));
24014   effect(TEMP dst, TEMP xtmp);
24015   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24016   ins_encode %{
24017     int vec_enc = vector_length_encoding(this);
24018     BasicType bt  = Matcher::vector_element_basic_type(this);
24019     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24020     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24021                                $xtmp$$XMMRegister);
24022   %}
24023   ins_pipe( pipe_slow );
24024 %}
24025 
24026 instruct vreverse_byte_reg(vec dst, vec src) %{
24027   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24028   match(Set dst (ReverseBytesV src));
24029   effect(TEMP dst);
24030   format %{ "vector_reverse_byte $dst, $src" %}
24031   ins_encode %{
24032     int vec_enc = vector_length_encoding(this);
24033     BasicType bt = Matcher::vector_element_basic_type(this);
24034     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24035   %}
24036   ins_pipe( pipe_slow );
24037 %}
24038 
24039 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24040   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24041   match(Set dst (ReverseBytesV src));
24042   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24043   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24044   ins_encode %{
24045     int vec_enc = vector_length_encoding(this);
24046     BasicType bt = Matcher::vector_element_basic_type(this);
24047     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24048                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24049   %}
24050   ins_pipe( pipe_slow );
24051 %}
24052 
24053 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24054 
24055 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24056   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24057                                               Matcher::vector_length_in_bytes(n->in(1))));
24058   match(Set dst (CountLeadingZerosV src));
24059   format %{ "vector_count_leading_zeros $dst, $src" %}
24060   ins_encode %{
24061      int vlen_enc = vector_length_encoding(this, $src);
24062      BasicType bt = Matcher::vector_element_basic_type(this, $src);
24063      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24064                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24065   %}
24066   ins_pipe( pipe_slow );
24067 %}
24068 
24069 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24070   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24071                                               Matcher::vector_length_in_bytes(n->in(1))));
24072   match(Set dst (CountLeadingZerosV src mask));
24073   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24074   ins_encode %{
24075     int vlen_enc = vector_length_encoding(this, $src);
24076     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24077     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24078     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24079                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24080   %}
24081   ins_pipe( pipe_slow );
24082 %}
24083 
24084 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24085   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24086             VM_Version::supports_avx512cd() &&
24087             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24088   match(Set dst (CountLeadingZerosV src));
24089   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24090   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24091   ins_encode %{
24092     int vlen_enc = vector_length_encoding(this, $src);
24093     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24094     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24095                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24096   %}
24097   ins_pipe( pipe_slow );
24098 %}
24099 
24100 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24101   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24102   match(Set dst (CountLeadingZerosV src));
24103   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24104   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24105   ins_encode %{
24106     int vlen_enc = vector_length_encoding(this, $src);
24107     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24108     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24109                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24110                                        $rtmp$$Register, true, vlen_enc);
24111   %}
24112   ins_pipe( pipe_slow );
24113 %}
24114 
24115 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24116   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24117             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24118   match(Set dst (CountLeadingZerosV src));
24119   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24120   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24121   ins_encode %{
24122     int vlen_enc = vector_length_encoding(this, $src);
24123     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24124     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24125                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24126   %}
24127   ins_pipe( pipe_slow );
24128 %}
24129 
24130 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24131   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24132             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24133   match(Set dst (CountLeadingZerosV src));
24134   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24135   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24136   ins_encode %{
24137     int vlen_enc = vector_length_encoding(this, $src);
24138     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24139     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24140                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24141   %}
24142   ins_pipe( pipe_slow );
24143 %}
24144 
24145 // ---------------------------------- Vector Masked Operations ------------------------------------
24146 
24147 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24148   match(Set dst (AddVB (Binary dst src2) mask));
24149   match(Set dst (AddVS (Binary dst src2) mask));
24150   match(Set dst (AddVI (Binary dst src2) mask));
24151   match(Set dst (AddVL (Binary dst src2) mask));
24152   match(Set dst (AddVF (Binary dst src2) mask));
24153   match(Set dst (AddVD (Binary dst src2) mask));
24154   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24155   ins_encode %{
24156     int vlen_enc = vector_length_encoding(this);
24157     BasicType bt = Matcher::vector_element_basic_type(this);
24158     int opc = this->ideal_Opcode();
24159     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24160                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24161   %}
24162   ins_pipe( pipe_slow );
24163 %}
24164 
24165 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24166   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24167   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24168   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24169   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24170   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24171   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24172   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24173   ins_encode %{
24174     int vlen_enc = vector_length_encoding(this);
24175     BasicType bt = Matcher::vector_element_basic_type(this);
24176     int opc = this->ideal_Opcode();
24177     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24178                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24179   %}
24180   ins_pipe( pipe_slow );
24181 %}
24182 
24183 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24184   match(Set dst (XorV (Binary dst src2) mask));
24185   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24186   ins_encode %{
24187     int vlen_enc = vector_length_encoding(this);
24188     BasicType bt = Matcher::vector_element_basic_type(this);
24189     int opc = this->ideal_Opcode();
24190     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24191                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24192   %}
24193   ins_pipe( pipe_slow );
24194 %}
24195 
24196 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24197   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24198   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24199   ins_encode %{
24200     int vlen_enc = vector_length_encoding(this);
24201     BasicType bt = Matcher::vector_element_basic_type(this);
24202     int opc = this->ideal_Opcode();
24203     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24204                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24205   %}
24206   ins_pipe( pipe_slow );
24207 %}
24208 
24209 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24210   match(Set dst (OrV (Binary dst src2) mask));
24211   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24212   ins_encode %{
24213     int vlen_enc = vector_length_encoding(this);
24214     BasicType bt = Matcher::vector_element_basic_type(this);
24215     int opc = this->ideal_Opcode();
24216     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24217                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24218   %}
24219   ins_pipe( pipe_slow );
24220 %}
24221 
24222 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24223   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24224   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24225   ins_encode %{
24226     int vlen_enc = vector_length_encoding(this);
24227     BasicType bt = Matcher::vector_element_basic_type(this);
24228     int opc = this->ideal_Opcode();
24229     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24230                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24231   %}
24232   ins_pipe( pipe_slow );
24233 %}
24234 
24235 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24236   match(Set dst (AndV (Binary dst src2) mask));
24237   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24238   ins_encode %{
24239     int vlen_enc = vector_length_encoding(this);
24240     BasicType bt = Matcher::vector_element_basic_type(this);
24241     int opc = this->ideal_Opcode();
24242     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24243                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24244   %}
24245   ins_pipe( pipe_slow );
24246 %}
24247 
24248 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24249   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24250   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24251   ins_encode %{
24252     int vlen_enc = vector_length_encoding(this);
24253     BasicType bt = Matcher::vector_element_basic_type(this);
24254     int opc = this->ideal_Opcode();
24255     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24256                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24257   %}
24258   ins_pipe( pipe_slow );
24259 %}
24260 
24261 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24262   match(Set dst (SubVB (Binary dst src2) mask));
24263   match(Set dst (SubVS (Binary dst src2) mask));
24264   match(Set dst (SubVI (Binary dst src2) mask));
24265   match(Set dst (SubVL (Binary dst src2) mask));
24266   match(Set dst (SubVF (Binary dst src2) mask));
24267   match(Set dst (SubVD (Binary dst src2) mask));
24268   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24269   ins_encode %{
24270     int vlen_enc = vector_length_encoding(this);
24271     BasicType bt = Matcher::vector_element_basic_type(this);
24272     int opc = this->ideal_Opcode();
24273     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24274                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24275   %}
24276   ins_pipe( pipe_slow );
24277 %}
24278 
24279 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24280   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24281   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24282   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24283   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24284   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24285   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24286   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24287   ins_encode %{
24288     int vlen_enc = vector_length_encoding(this);
24289     BasicType bt = Matcher::vector_element_basic_type(this);
24290     int opc = this->ideal_Opcode();
24291     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24292                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24293   %}
24294   ins_pipe( pipe_slow );
24295 %}
24296 
24297 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24298   match(Set dst (MulVS (Binary dst src2) mask));
24299   match(Set dst (MulVI (Binary dst src2) mask));
24300   match(Set dst (MulVL (Binary dst src2) mask));
24301   match(Set dst (MulVF (Binary dst src2) mask));
24302   match(Set dst (MulVD (Binary dst src2) mask));
24303   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24304   ins_encode %{
24305     int vlen_enc = vector_length_encoding(this);
24306     BasicType bt = Matcher::vector_element_basic_type(this);
24307     int opc = this->ideal_Opcode();
24308     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24309                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24310   %}
24311   ins_pipe( pipe_slow );
24312 %}
24313 
24314 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24315   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24316   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24317   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24318   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24319   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24320   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24321   ins_encode %{
24322     int vlen_enc = vector_length_encoding(this);
24323     BasicType bt = Matcher::vector_element_basic_type(this);
24324     int opc = this->ideal_Opcode();
24325     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24326                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24327   %}
24328   ins_pipe( pipe_slow );
24329 %}
24330 
24331 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24332   match(Set dst (SqrtVF dst mask));
24333   match(Set dst (SqrtVD dst mask));
24334   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24335   ins_encode %{
24336     int vlen_enc = vector_length_encoding(this);
24337     BasicType bt = Matcher::vector_element_basic_type(this);
24338     int opc = this->ideal_Opcode();
24339     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24340                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24341   %}
24342   ins_pipe( pipe_slow );
24343 %}
24344 
24345 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24346   match(Set dst (DivVF (Binary dst src2) mask));
24347   match(Set dst (DivVD (Binary dst src2) mask));
24348   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24349   ins_encode %{
24350     int vlen_enc = vector_length_encoding(this);
24351     BasicType bt = Matcher::vector_element_basic_type(this);
24352     int opc = this->ideal_Opcode();
24353     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24354                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24355   %}
24356   ins_pipe( pipe_slow );
24357 %}
24358 
24359 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24360   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24361   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24362   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24363   ins_encode %{
24364     int vlen_enc = vector_length_encoding(this);
24365     BasicType bt = Matcher::vector_element_basic_type(this);
24366     int opc = this->ideal_Opcode();
24367     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24368                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24369   %}
24370   ins_pipe( pipe_slow );
24371 %}
24372 
24373 
24374 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24375   match(Set dst (RotateLeftV (Binary dst shift) mask));
24376   match(Set dst (RotateRightV (Binary dst shift) mask));
24377   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24378   ins_encode %{
24379     int vlen_enc = vector_length_encoding(this);
24380     BasicType bt = Matcher::vector_element_basic_type(this);
24381     int opc = this->ideal_Opcode();
24382     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24383                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24384   %}
24385   ins_pipe( pipe_slow );
24386 %}
24387 
24388 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24389   match(Set dst (RotateLeftV (Binary dst src2) mask));
24390   match(Set dst (RotateRightV (Binary dst src2) mask));
24391   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24392   ins_encode %{
24393     int vlen_enc = vector_length_encoding(this);
24394     BasicType bt = Matcher::vector_element_basic_type(this);
24395     int opc = this->ideal_Opcode();
24396     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24397                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24398   %}
24399   ins_pipe( pipe_slow );
24400 %}
24401 
24402 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24403   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24404   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24405   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24406   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24407   ins_encode %{
24408     int vlen_enc = vector_length_encoding(this);
24409     BasicType bt = Matcher::vector_element_basic_type(this);
24410     int opc = this->ideal_Opcode();
24411     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24412                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24413   %}
24414   ins_pipe( pipe_slow );
24415 %}
24416 
24417 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24418   predicate(!n->as_ShiftV()->is_var_shift());
24419   match(Set dst (LShiftVS (Binary dst src2) mask));
24420   match(Set dst (LShiftVI (Binary dst src2) mask));
24421   match(Set dst (LShiftVL (Binary dst src2) mask));
24422   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24423   ins_encode %{
24424     int vlen_enc = vector_length_encoding(this);
24425     BasicType bt = Matcher::vector_element_basic_type(this);
24426     int opc = this->ideal_Opcode();
24427     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24428                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24429   %}
24430   ins_pipe( pipe_slow );
24431 %}
24432 
24433 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24434   predicate(n->as_ShiftV()->is_var_shift());
24435   match(Set dst (LShiftVS (Binary dst src2) mask));
24436   match(Set dst (LShiftVI (Binary dst src2) mask));
24437   match(Set dst (LShiftVL (Binary dst src2) mask));
24438   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24439   ins_encode %{
24440     int vlen_enc = vector_length_encoding(this);
24441     BasicType bt = Matcher::vector_element_basic_type(this);
24442     int opc = this->ideal_Opcode();
24443     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24444                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24445   %}
24446   ins_pipe( pipe_slow );
24447 %}
24448 
24449 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24450   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24451   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24452   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24453   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24454   ins_encode %{
24455     int vlen_enc = vector_length_encoding(this);
24456     BasicType bt = Matcher::vector_element_basic_type(this);
24457     int opc = this->ideal_Opcode();
24458     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24459                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24460   %}
24461   ins_pipe( pipe_slow );
24462 %}
24463 
24464 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24465   predicate(!n->as_ShiftV()->is_var_shift());
24466   match(Set dst (RShiftVS (Binary dst src2) mask));
24467   match(Set dst (RShiftVI (Binary dst src2) mask));
24468   match(Set dst (RShiftVL (Binary dst src2) mask));
24469   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24470   ins_encode %{
24471     int vlen_enc = vector_length_encoding(this);
24472     BasicType bt = Matcher::vector_element_basic_type(this);
24473     int opc = this->ideal_Opcode();
24474     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24475                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24476   %}
24477   ins_pipe( pipe_slow );
24478 %}
24479 
24480 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24481   predicate(n->as_ShiftV()->is_var_shift());
24482   match(Set dst (RShiftVS (Binary dst src2) mask));
24483   match(Set dst (RShiftVI (Binary dst src2) mask));
24484   match(Set dst (RShiftVL (Binary dst src2) mask));
24485   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24486   ins_encode %{
24487     int vlen_enc = vector_length_encoding(this);
24488     BasicType bt = Matcher::vector_element_basic_type(this);
24489     int opc = this->ideal_Opcode();
24490     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24491                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24492   %}
24493   ins_pipe( pipe_slow );
24494 %}
24495 
24496 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24497   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24498   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24499   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24500   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24501   ins_encode %{
24502     int vlen_enc = vector_length_encoding(this);
24503     BasicType bt = Matcher::vector_element_basic_type(this);
24504     int opc = this->ideal_Opcode();
24505     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24506                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24507   %}
24508   ins_pipe( pipe_slow );
24509 %}
24510 
24511 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24512   predicate(!n->as_ShiftV()->is_var_shift());
24513   match(Set dst (URShiftVS (Binary dst src2) mask));
24514   match(Set dst (URShiftVI (Binary dst src2) mask));
24515   match(Set dst (URShiftVL (Binary dst src2) mask));
24516   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24517   ins_encode %{
24518     int vlen_enc = vector_length_encoding(this);
24519     BasicType bt = Matcher::vector_element_basic_type(this);
24520     int opc = this->ideal_Opcode();
24521     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24522                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24523   %}
24524   ins_pipe( pipe_slow );
24525 %}
24526 
24527 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24528   predicate(n->as_ShiftV()->is_var_shift());
24529   match(Set dst (URShiftVS (Binary dst src2) mask));
24530   match(Set dst (URShiftVI (Binary dst src2) mask));
24531   match(Set dst (URShiftVL (Binary dst src2) mask));
24532   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24533   ins_encode %{
24534     int vlen_enc = vector_length_encoding(this);
24535     BasicType bt = Matcher::vector_element_basic_type(this);
24536     int opc = this->ideal_Opcode();
24537     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24538                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24539   %}
24540   ins_pipe( pipe_slow );
24541 %}
24542 
24543 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24544   match(Set dst (MaxV (Binary dst src2) mask));
24545   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24546   ins_encode %{
24547     int vlen_enc = vector_length_encoding(this);
24548     BasicType bt = Matcher::vector_element_basic_type(this);
24549     int opc = this->ideal_Opcode();
24550     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24551                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24552   %}
24553   ins_pipe( pipe_slow );
24554 %}
24555 
24556 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24557   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24558   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24559   ins_encode %{
24560     int vlen_enc = vector_length_encoding(this);
24561     BasicType bt = Matcher::vector_element_basic_type(this);
24562     int opc = this->ideal_Opcode();
24563     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24564                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24565   %}
24566   ins_pipe( pipe_slow );
24567 %}
24568 
24569 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24570   match(Set dst (MinV (Binary dst src2) mask));
24571   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24572   ins_encode %{
24573     int vlen_enc = vector_length_encoding(this);
24574     BasicType bt = Matcher::vector_element_basic_type(this);
24575     int opc = this->ideal_Opcode();
24576     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24577                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24578   %}
24579   ins_pipe( pipe_slow );
24580 %}
24581 
24582 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24583   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24584   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24585   ins_encode %{
24586     int vlen_enc = vector_length_encoding(this);
24587     BasicType bt = Matcher::vector_element_basic_type(this);
24588     int opc = this->ideal_Opcode();
24589     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24590                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24591   %}
24592   ins_pipe( pipe_slow );
24593 %}
24594 
24595 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24596   match(Set dst (VectorRearrange (Binary dst src2) mask));
24597   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24598   ins_encode %{
24599     int vlen_enc = vector_length_encoding(this);
24600     BasicType bt = Matcher::vector_element_basic_type(this);
24601     int opc = this->ideal_Opcode();
24602     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24603                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24604   %}
24605   ins_pipe( pipe_slow );
24606 %}
24607 
24608 instruct vabs_masked(vec dst, kReg mask) %{
24609   match(Set dst (AbsVB dst mask));
24610   match(Set dst (AbsVS dst mask));
24611   match(Set dst (AbsVI dst mask));
24612   match(Set dst (AbsVL dst mask));
24613   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24614   ins_encode %{
24615     int vlen_enc = vector_length_encoding(this);
24616     BasicType bt = Matcher::vector_element_basic_type(this);
24617     int opc = this->ideal_Opcode();
24618     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24619                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24620   %}
24621   ins_pipe( pipe_slow );
24622 %}
24623 
24624 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24625   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24626   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24627   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24628   ins_encode %{
24629     assert(UseFMA, "Needs FMA instructions support.");
24630     int vlen_enc = vector_length_encoding(this);
24631     BasicType bt = Matcher::vector_element_basic_type(this);
24632     int opc = this->ideal_Opcode();
24633     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24634                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24635   %}
24636   ins_pipe( pipe_slow );
24637 %}
24638 
24639 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24640   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24641   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24642   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24643   ins_encode %{
24644     assert(UseFMA, "Needs FMA instructions support.");
24645     int vlen_enc = vector_length_encoding(this);
24646     BasicType bt = Matcher::vector_element_basic_type(this);
24647     int opc = this->ideal_Opcode();
24648     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24649                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24650   %}
24651   ins_pipe( pipe_slow );
24652 %}
24653 
24654 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24655   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24656   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24657   ins_encode %{
24658     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24659     int vlen_enc = vector_length_encoding(this, $src1);
24660     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24661 
24662     // Comparison i
24663     switch (src1_elem_bt) {
24664       case T_BYTE: {
24665         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24666         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24667         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24668         break;
24669       }
24670       case T_SHORT: {
24671         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24672         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24673         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24674         break;
24675       }
24676       case T_INT: {
24677         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24678         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24679         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24680         break;
24681       }
24682       case T_LONG: {
24683         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24684         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24685         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24686         break;
24687       }
24688       case T_FLOAT: {
24689         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24690         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24691         break;
24692       }
24693       case T_DOUBLE: {
24694         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24695         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24696         break;
24697       }
24698       default: assert(false, "%s", type2name(src1_elem_bt)); break;
24699     }
24700   %}
24701   ins_pipe( pipe_slow );
24702 %}
24703 
24704 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24705   predicate(Matcher::vector_length(n) <= 32);
24706   match(Set dst (MaskAll src));
24707   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24708   ins_encode %{
24709     int mask_len = Matcher::vector_length(this);
24710     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24711   %}
24712   ins_pipe( pipe_slow );
24713 %}
24714 
24715 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24716   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24717   match(Set dst (XorVMask src (MaskAll cnt)));
24718   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24719   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24720   ins_encode %{
24721     uint masklen = Matcher::vector_length(this);
24722     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24723   %}
24724   ins_pipe( pipe_slow );
24725 %}
24726 
24727 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24728   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24729             (Matcher::vector_length(n) == 16) ||
24730             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24731   match(Set dst (XorVMask src (MaskAll cnt)));
24732   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24733   ins_encode %{
24734     uint masklen = Matcher::vector_length(this);
24735     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24736   %}
24737   ins_pipe( pipe_slow );
24738 %}
24739 
24740 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24741   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24742   match(Set dst (VectorLongToMask src));
24743   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24744   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24745   ins_encode %{
24746     int mask_len = Matcher::vector_length(this);
24747     int vec_enc  = vector_length_encoding(mask_len);
24748     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24749                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24750   %}
24751   ins_pipe( pipe_slow );
24752 %}
24753 
24754 
24755 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24756   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24757   match(Set dst (VectorLongToMask src));
24758   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24759   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24760   ins_encode %{
24761     int mask_len = Matcher::vector_length(this);
24762     assert(mask_len <= 32, "invalid mask length");
24763     int vec_enc  = vector_length_encoding(mask_len);
24764     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24765                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24766   %}
24767   ins_pipe( pipe_slow );
24768 %}
24769 
24770 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24771   predicate(n->bottom_type()->isa_vectmask());
24772   match(Set dst (VectorLongToMask src));
24773   format %{ "long_to_mask_evex $dst, $src\t!" %}
24774   ins_encode %{
24775     __ kmov($dst$$KRegister, $src$$Register);
24776   %}
24777   ins_pipe( pipe_slow );
24778 %}
24779 
24780 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24781   match(Set dst (AndVMask src1 src2));
24782   match(Set dst (OrVMask src1 src2));
24783   match(Set dst (XorVMask src1 src2));
24784   effect(TEMP kscratch);
24785   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24786   ins_encode %{
24787     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24788     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24789     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24790     uint masklen = Matcher::vector_length(this);
24791     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24792     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24793   %}
24794   ins_pipe( pipe_slow );
24795 %}
24796 
24797 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24798   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24799   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24800   ins_encode %{
24801     int vlen_enc = vector_length_encoding(this);
24802     BasicType bt = Matcher::vector_element_basic_type(this);
24803     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24804                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24805   %}
24806   ins_pipe( pipe_slow );
24807 %}
24808 
24809 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24810   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24811   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24812   ins_encode %{
24813     int vlen_enc = vector_length_encoding(this);
24814     BasicType bt = Matcher::vector_element_basic_type(this);
24815     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24816                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24817   %}
24818   ins_pipe( pipe_slow );
24819 %}
24820 
24821 instruct castMM(kReg dst)
24822 %{
24823   match(Set dst (CastVV dst));
24824 
24825   size(0);
24826   format %{ "# castVV of $dst" %}
24827   ins_encode(/* empty encoding */);
24828   ins_cost(0);
24829   ins_pipe(empty);
24830 %}
24831 
24832 instruct castVV(vec dst)
24833 %{
24834   match(Set dst (CastVV dst));
24835 
24836   size(0);
24837   format %{ "# castVV of $dst" %}
24838   ins_encode(/* empty encoding */);
24839   ins_cost(0);
24840   ins_pipe(empty);
24841 %}
24842 
24843 instruct castVVLeg(legVec dst)
24844 %{
24845   match(Set dst (CastVV dst));
24846 
24847   size(0);
24848   format %{ "# castVV of $dst" %}
24849   ins_encode(/* empty encoding */);
24850   ins_cost(0);
24851   ins_pipe(empty);
24852 %}
24853 
24854 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
24855 %{
24856   match(Set dst (IsInfiniteF src));
24857   effect(TEMP ktmp, KILL cr);
24858   format %{ "float_class_check $dst, $src" %}
24859   ins_encode %{
24860     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24861     __ kmovbl($dst$$Register, $ktmp$$KRegister);
24862   %}
24863   ins_pipe(pipe_slow);
24864 %}
24865 
24866 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
24867 %{
24868   match(Set dst (IsInfiniteD src));
24869   effect(TEMP ktmp, KILL cr);
24870   format %{ "double_class_check $dst, $src" %}
24871   ins_encode %{
24872     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24873     __ kmovbl($dst$$Register, $ktmp$$KRegister);
24874   %}
24875   ins_pipe(pipe_slow);
24876 %}
24877 
24878 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
24879 %{
24880   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24881             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24882   match(Set dst (SaturatingAddV src1 src2));
24883   match(Set dst (SaturatingSubV src1 src2));
24884   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24885   ins_encode %{
24886     int vlen_enc = vector_length_encoding(this);
24887     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24888     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24889                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24890   %}
24891   ins_pipe(pipe_slow);
24892 %}
24893 
24894 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
24895 %{
24896   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24897             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24898   match(Set dst (SaturatingAddV src1 src2));
24899   match(Set dst (SaturatingSubV src1 src2));
24900   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24901   ins_encode %{
24902     int vlen_enc = vector_length_encoding(this);
24903     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24904     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24905                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24906   %}
24907   ins_pipe(pipe_slow);
24908 %}
24909 
24910 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
24911 %{
24912   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24913             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24914             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24915   match(Set dst (SaturatingAddV src1 src2));
24916   match(Set dst (SaturatingSubV src1 src2));
24917   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
24918   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
24919   ins_encode %{
24920     int vlen_enc = vector_length_encoding(this);
24921     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24922     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24923                                         $src1$$XMMRegister, $src2$$XMMRegister,
24924                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24925                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
24926   %}
24927   ins_pipe(pipe_slow);
24928 %}
24929 
24930 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
24931 %{
24932   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24933             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24934             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24935   match(Set dst (SaturatingAddV src1 src2));
24936   match(Set dst (SaturatingSubV src1 src2));
24937   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
24938   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
24939   ins_encode %{
24940     int vlen_enc = vector_length_encoding(this);
24941     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24942     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24943                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24944                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
24945   %}
24946   ins_pipe(pipe_slow);
24947 %}
24948 
24949 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
24950 %{
24951   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24952             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24953             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24954   match(Set dst (SaturatingAddV src1 src2));
24955   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
24956   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
24957   ins_encode %{
24958     int vlen_enc = vector_length_encoding(this);
24959     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24960     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24961                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24962   %}
24963   ins_pipe(pipe_slow);
24964 %}
24965 
24966 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
24967 %{
24968   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24969             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24970             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24971   match(Set dst (SaturatingAddV src1 src2));
24972   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24973   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24974   ins_encode %{
24975     int vlen_enc = vector_length_encoding(this);
24976     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24977     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24978                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
24979   %}
24980   ins_pipe(pipe_slow);
24981 %}
24982 
24983 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
24984 %{
24985   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24986             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24987             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24988   match(Set dst (SaturatingSubV src1 src2));
24989   effect(TEMP ktmp);
24990   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
24991   ins_encode %{
24992     int vlen_enc = vector_length_encoding(this);
24993     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24994     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24995                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24996   %}
24997   ins_pipe(pipe_slow);
24998 %}
24999 
25000 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25001 %{
25002   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25003             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25004             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25005   match(Set dst (SaturatingSubV src1 src2));
25006   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25007   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25008   ins_encode %{
25009     int vlen_enc = vector_length_encoding(this);
25010     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25011     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25012                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25013   %}
25014   ins_pipe(pipe_slow);
25015 %}
25016 
25017 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25018 %{
25019   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25020             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25021   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25022   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25023   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25024   ins_encode %{
25025     int vlen_enc = vector_length_encoding(this);
25026     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25027     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25028                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25029   %}
25030   ins_pipe(pipe_slow);
25031 %}
25032 
25033 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25034 %{
25035   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25036             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25037   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25038   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25039   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25040   ins_encode %{
25041     int vlen_enc = vector_length_encoding(this);
25042     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25043     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25044                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25045   %}
25046   ins_pipe(pipe_slow);
25047 %}
25048 
25049 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25050   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25051             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25052   match(Set dst (SaturatingAddV (Binary dst src) mask));
25053   match(Set dst (SaturatingSubV (Binary dst src) mask));
25054   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25055   ins_encode %{
25056     int vlen_enc = vector_length_encoding(this);
25057     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25058     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25059                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25060   %}
25061   ins_pipe( pipe_slow );
25062 %}
25063 
25064 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25065   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25066             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25067   match(Set dst (SaturatingAddV (Binary dst src) mask));
25068   match(Set dst (SaturatingSubV (Binary dst src) mask));
25069   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25070   ins_encode %{
25071     int vlen_enc = vector_length_encoding(this);
25072     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25073     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25074                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25075   %}
25076   ins_pipe( pipe_slow );
25077 %}
25078 
25079 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25080   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25081             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25082   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25083   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25084   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25085   ins_encode %{
25086     int vlen_enc = vector_length_encoding(this);
25087     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25088     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25089                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25090   %}
25091   ins_pipe( pipe_slow );
25092 %}
25093 
25094 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25095   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25096             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25097   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25098   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25099   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25100   ins_encode %{
25101     int vlen_enc = vector_length_encoding(this);
25102     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25103     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25104                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25105   %}
25106   ins_pipe( pipe_slow );
25107 %}
25108 
25109 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25110 %{
25111   match(Set index (SelectFromTwoVector (Binary index src1) src2));
25112   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25113   ins_encode %{
25114     int vlen_enc = vector_length_encoding(this);
25115     BasicType bt = Matcher::vector_element_basic_type(this);
25116     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25117   %}
25118   ins_pipe(pipe_slow);
25119 %}
25120 
25121 instruct reinterpretS2HF(regF dst, rRegI src)
25122 %{
25123   match(Set dst (ReinterpretS2HF src));
25124   format %{ "vmovw $dst, $src" %}
25125   ins_encode %{
25126     __ vmovw($dst$$XMMRegister, $src$$Register);
25127   %}
25128   ins_pipe(pipe_slow);
25129 %}
25130 
25131 instruct reinterpretHF2S(rRegI dst, regF src)
25132 %{
25133   match(Set dst (ReinterpretHF2S src));
25134   format %{ "vmovw $dst, $src" %}
25135   ins_encode %{
25136     __ vmovw($dst$$Register, $src$$XMMRegister);
25137   %}
25138   ins_pipe(pipe_slow);
25139 %}
25140 
25141 instruct convF2HFAndS2HF(regF dst, regF src)
25142 %{
25143   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25144   format %{ "convF2HFAndS2HF $dst, $src" %}
25145   ins_encode %{
25146     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25147   %}
25148   ins_pipe(pipe_slow);
25149 %}
25150 
25151 instruct convHF2SAndHF2F(regF dst, regF src)
25152 %{
25153   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25154   format %{ "convHF2SAndHF2F $dst, $src" %}
25155   ins_encode %{
25156     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25157   %}
25158   ins_pipe(pipe_slow);
25159 %}
25160 
25161 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25162 %{
25163   match(Set dst (SqrtHF src));
25164   format %{ "scalar_sqrt_fp16 $dst, $src" %}
25165   ins_encode %{
25166     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25167   %}
25168   ins_pipe(pipe_slow);
25169 %}
25170 
25171 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25172 %{
25173   match(Set dst (AddHF src1 src2));
25174   match(Set dst (DivHF src1 src2));
25175   match(Set dst (MulHF src1 src2));
25176   match(Set dst (SubHF src1 src2));
25177   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25178   ins_encode %{
25179     int opcode = this->ideal_Opcode();
25180     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25181   %}
25182   ins_pipe(pipe_slow);
25183 %}
25184 
25185 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25186 %{
25187   predicate(VM_Version::supports_avx10_2());
25188   match(Set dst (MaxHF src1 src2));
25189   match(Set dst (MinHF src1 src2));
25190   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25191   ins_encode %{
25192     int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25193     __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25194   %}
25195   ins_pipe( pipe_slow );
25196 %}
25197 
25198 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25199 %{
25200   predicate(!VM_Version::supports_avx10_2());
25201   match(Set dst (MaxHF src1 src2));
25202   match(Set dst (MinHF src1 src2));
25203   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25204   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25205   ins_encode %{
25206     int opcode = this->ideal_Opcode();
25207     __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25208                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25209   %}
25210   ins_pipe( pipe_slow );
25211 %}
25212 
25213 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25214 %{
25215   match(Set dst (FmaHF  src2 (Binary dst src1)));
25216   effect(DEF dst);
25217   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25218   ins_encode %{
25219     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25220   %}
25221   ins_pipe( pipe_slow );
25222 %}
25223 
25224 
25225 instruct vector_sqrt_HF_reg(vec dst, vec src)
25226 %{
25227   match(Set dst (SqrtVHF src));
25228   format %{ "vector_sqrt_fp16 $dst, $src" %}
25229   ins_encode %{
25230     int vlen_enc = vector_length_encoding(this);
25231     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25232   %}
25233   ins_pipe(pipe_slow);
25234 %}
25235 
25236 instruct vector_sqrt_HF_mem(vec dst, memory src)
25237 %{
25238   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25239   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25240   ins_encode %{
25241     int vlen_enc = vector_length_encoding(this);
25242     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25243   %}
25244   ins_pipe(pipe_slow);
25245 %}
25246 
25247 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25248 %{
25249   match(Set dst (AddVHF src1 src2));
25250   match(Set dst (DivVHF src1 src2));
25251   match(Set dst (MulVHF src1 src2));
25252   match(Set dst (SubVHF src1 src2));
25253   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25254   ins_encode %{
25255     int vlen_enc = vector_length_encoding(this);
25256     int opcode = this->ideal_Opcode();
25257     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25258   %}
25259   ins_pipe(pipe_slow);
25260 %}
25261 
25262 
25263 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25264 %{
25265   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25266   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25267   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25268   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25269   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25270   ins_encode %{
25271     int vlen_enc = vector_length_encoding(this);
25272     int opcode = this->ideal_Opcode();
25273     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25274   %}
25275   ins_pipe(pipe_slow);
25276 %}
25277 
25278 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25279 %{
25280   match(Set dst (FmaVHF src2 (Binary dst src1)));
25281   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25282   ins_encode %{
25283     int vlen_enc = vector_length_encoding(this);
25284     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25285   %}
25286   ins_pipe( pipe_slow );
25287 %}
25288 
25289 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25290 %{
25291   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25292   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25293   ins_encode %{
25294     int vlen_enc = vector_length_encoding(this);
25295     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25296   %}
25297   ins_pipe( pipe_slow );
25298 %}
25299 
25300 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25301 %{
25302   predicate(VM_Version::supports_avx10_2());
25303   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25304   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25305   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25306   ins_encode %{
25307     int vlen_enc = vector_length_encoding(this);
25308     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25309     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25310   %}
25311   ins_pipe( pipe_slow );
25312 %}
25313 
25314 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25315 %{
25316   predicate(VM_Version::supports_avx10_2());
25317   match(Set dst (MinVHF src1 src2));
25318   match(Set dst (MaxVHF src1 src2));
25319   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25320   ins_encode %{
25321     int vlen_enc = vector_length_encoding(this);
25322     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25323     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25324   %}
25325   ins_pipe( pipe_slow );
25326 %}
25327 
25328 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25329 %{
25330   predicate(!VM_Version::supports_avx10_2());
25331   match(Set dst (MinVHF src1 src2));
25332   match(Set dst (MaxVHF src1 src2));
25333   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25334   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25335   ins_encode %{
25336     int vlen_enc = vector_length_encoding(this);
25337     int opcode = this->ideal_Opcode();
25338     __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25339                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25340   %}
25341   ins_pipe( pipe_slow );
25342 %}
25343 
25344 //----------PEEPHOLE RULES-----------------------------------------------------
25345 // These must follow all instruction definitions as they use the names
25346 // defined in the instructions definitions.
25347 //
25348 // peeppredicate ( rule_predicate );
25349 // // the predicate unless which the peephole rule will be ignored
25350 //
25351 // peepmatch ( root_instr_name [preceding_instruction]* );
25352 //
25353 // peepprocedure ( procedure_name );
25354 // // provide a procedure name to perform the optimization, the procedure should
25355 // // reside in the architecture dependent peephole file, the method has the
25356 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25357 // // with the arguments being the basic block, the current node index inside the
25358 // // block, the register allocator, the functions upon invoked return a new node
25359 // // defined in peepreplace, and the rules of the nodes appearing in the
25360 // // corresponding peepmatch, the function return true if successful, else
25361 // // return false
25362 //
25363 // peepconstraint %{
25364 // (instruction_number.operand_name relational_op instruction_number.operand_name
25365 //  [, ...] );
25366 // // instruction numbers are zero-based using left to right order in peepmatch
25367 //
25368 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25369 // // provide an instruction_number.operand_name for each operand that appears
25370 // // in the replacement instruction's match rule
25371 //
25372 // ---------VM FLAGS---------------------------------------------------------
25373 //
25374 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25375 //
25376 // Each peephole rule is given an identifying number starting with zero and
25377 // increasing by one in the order seen by the parser.  An individual peephole
25378 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25379 // on the command-line.
25380 //
25381 // ---------CURRENT LIMITATIONS----------------------------------------------
25382 //
25383 // Only transformations inside a basic block (do we need more for peephole)
25384 //
25385 // ---------EXAMPLE----------------------------------------------------------
25386 //
25387 // // pertinent parts of existing instructions in architecture description
25388 // instruct movI(rRegI dst, rRegI src)
25389 // %{
25390 //   match(Set dst (CopyI src));
25391 // %}
25392 //
25393 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25394 // %{
25395 //   match(Set dst (AddI dst src));
25396 //   effect(KILL cr);
25397 // %}
25398 //
25399 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25400 // %{
25401 //   match(Set dst (AddI dst src));
25402 // %}
25403 //
25404 // 1. Simple replacement
25405 // - Only match adjacent instructions in same basic block
25406 // - Only equality constraints
25407 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25408 // - Only one replacement instruction
25409 //
25410 // // Change (inc mov) to lea
25411 // peephole %{
25412 //   // lea should only be emitted when beneficial
25413 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25414 //   // increment preceded by register-register move
25415 //   peepmatch ( incI_rReg movI );
25416 //   // require that the destination register of the increment
25417 //   // match the destination register of the move
25418 //   peepconstraint ( 0.dst == 1.dst );
25419 //   // construct a replacement instruction that sets
25420 //   // the destination to ( move's source register + one )
25421 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25422 // %}
25423 //
25424 // 2. Procedural replacement
25425 // - More flexible finding relevent nodes
25426 // - More flexible constraints
25427 // - More flexible transformations
25428 // - May utilise architecture-dependent API more effectively
25429 // - Currently only one replacement instruction due to adlc parsing capabilities
25430 //
25431 // // Change (inc mov) to lea
25432 // peephole %{
25433 //   // lea should only be emitted when beneficial
25434 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25435 //   // the rule numbers of these nodes inside are passed into the function below
25436 //   peepmatch ( incI_rReg movI );
25437 //   // the method that takes the responsibility of transformation
25438 //   peepprocedure ( inc_mov_to_lea );
25439 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25440 //   // node is passed into the function above
25441 //   peepreplace ( leaI_rReg_immI() );
25442 // %}
25443 
25444 // These instructions is not matched by the matcher but used by the peephole
25445 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25446 %{
25447   predicate(false);
25448   match(Set dst (AddI src1 src2));
25449   format %{ "leal    $dst, [$src1 + $src2]" %}
25450   ins_encode %{
25451     Register dst = $dst$$Register;
25452     Register src1 = $src1$$Register;
25453     Register src2 = $src2$$Register;
25454     if (src1 != rbp && src1 != r13) {
25455       __ leal(dst, Address(src1, src2, Address::times_1));
25456     } else {
25457       assert(src2 != rbp && src2 != r13, "");
25458       __ leal(dst, Address(src2, src1, Address::times_1));
25459     }
25460   %}
25461   ins_pipe(ialu_reg_reg);
25462 %}
25463 
25464 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25465 %{
25466   predicate(false);
25467   match(Set dst (AddI src1 src2));
25468   format %{ "leal    $dst, [$src1 + $src2]" %}
25469   ins_encode %{
25470     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25471   %}
25472   ins_pipe(ialu_reg_reg);
25473 %}
25474 
25475 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25476 %{
25477   predicate(false);
25478   match(Set dst (LShiftI src shift));
25479   format %{ "leal    $dst, [$src << $shift]" %}
25480   ins_encode %{
25481     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25482     Register src = $src$$Register;
25483     if (scale == Address::times_2 && src != rbp && src != r13) {
25484       __ leal($dst$$Register, Address(src, src, Address::times_1));
25485     } else {
25486       __ leal($dst$$Register, Address(noreg, src, scale));
25487     }
25488   %}
25489   ins_pipe(ialu_reg_reg);
25490 %}
25491 
25492 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25493 %{
25494   predicate(false);
25495   match(Set dst (AddL src1 src2));
25496   format %{ "leaq    $dst, [$src1 + $src2]" %}
25497   ins_encode %{
25498     Register dst = $dst$$Register;
25499     Register src1 = $src1$$Register;
25500     Register src2 = $src2$$Register;
25501     if (src1 != rbp && src1 != r13) {
25502       __ leaq(dst, Address(src1, src2, Address::times_1));
25503     } else {
25504       assert(src2 != rbp && src2 != r13, "");
25505       __ leaq(dst, Address(src2, src1, Address::times_1));
25506     }
25507   %}
25508   ins_pipe(ialu_reg_reg);
25509 %}
25510 
25511 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25512 %{
25513   predicate(false);
25514   match(Set dst (AddL src1 src2));
25515   format %{ "leaq    $dst, [$src1 + $src2]" %}
25516   ins_encode %{
25517     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25518   %}
25519   ins_pipe(ialu_reg_reg);
25520 %}
25521 
25522 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25523 %{
25524   predicate(false);
25525   match(Set dst (LShiftL src shift));
25526   format %{ "leaq    $dst, [$src << $shift]" %}
25527   ins_encode %{
25528     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25529     Register src = $src$$Register;
25530     if (scale == Address::times_2 && src != rbp && src != r13) {
25531       __ leaq($dst$$Register, Address(src, src, Address::times_1));
25532     } else {
25533       __ leaq($dst$$Register, Address(noreg, src, scale));
25534     }
25535   %}
25536   ins_pipe(ialu_reg_reg);
25537 %}
25538 
25539 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25540 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25541 // processors with at least partial ALU support for lea
25542 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25543 // beneficial for processors with full ALU support
25544 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25545 
25546 peephole
25547 %{
25548   peeppredicate(VM_Version::supports_fast_2op_lea());
25549   peepmatch (addI_rReg);
25550   peepprocedure (lea_coalesce_reg);
25551   peepreplace (leaI_rReg_rReg_peep());
25552 %}
25553 
25554 peephole
25555 %{
25556   peeppredicate(VM_Version::supports_fast_2op_lea());
25557   peepmatch (addI_rReg_imm);
25558   peepprocedure (lea_coalesce_imm);
25559   peepreplace (leaI_rReg_immI_peep());
25560 %}
25561 
25562 peephole
25563 %{
25564   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25565                 VM_Version::is_intel_cascade_lake());
25566   peepmatch (incI_rReg);
25567   peepprocedure (lea_coalesce_imm);
25568   peepreplace (leaI_rReg_immI_peep());
25569 %}
25570 
25571 peephole
25572 %{
25573   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25574                 VM_Version::is_intel_cascade_lake());
25575   peepmatch (decI_rReg);
25576   peepprocedure (lea_coalesce_imm);
25577   peepreplace (leaI_rReg_immI_peep());
25578 %}
25579 
25580 peephole
25581 %{
25582   peeppredicate(VM_Version::supports_fast_2op_lea());
25583   peepmatch (salI_rReg_immI2);
25584   peepprocedure (lea_coalesce_imm);
25585   peepreplace (leaI_rReg_immI2_peep());
25586 %}
25587 
25588 peephole
25589 %{
25590   peeppredicate(VM_Version::supports_fast_2op_lea());
25591   peepmatch (addL_rReg);
25592   peepprocedure (lea_coalesce_reg);
25593   peepreplace (leaL_rReg_rReg_peep());
25594 %}
25595 
25596 peephole
25597 %{
25598   peeppredicate(VM_Version::supports_fast_2op_lea());
25599   peepmatch (addL_rReg_imm);
25600   peepprocedure (lea_coalesce_imm);
25601   peepreplace (leaL_rReg_immL32_peep());
25602 %}
25603 
25604 peephole
25605 %{
25606   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25607                 VM_Version::is_intel_cascade_lake());
25608   peepmatch (incL_rReg);
25609   peepprocedure (lea_coalesce_imm);
25610   peepreplace (leaL_rReg_immL32_peep());
25611 %}
25612 
25613 peephole
25614 %{
25615   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25616                 VM_Version::is_intel_cascade_lake());
25617   peepmatch (decL_rReg);
25618   peepprocedure (lea_coalesce_imm);
25619   peepreplace (leaL_rReg_immL32_peep());
25620 %}
25621 
25622 peephole
25623 %{
25624   peeppredicate(VM_Version::supports_fast_2op_lea());
25625   peepmatch (salL_rReg_immI2);
25626   peepprocedure (lea_coalesce_imm);
25627   peepreplace (leaL_rReg_immI2_peep());
25628 %}
25629 
25630 peephole
25631 %{
25632   peepmatch (leaPCompressedOopOffset);
25633   peepprocedure (lea_remove_redundant);
25634 %}
25635 
25636 peephole
25637 %{
25638   peepmatch (leaP8Narrow);
25639   peepprocedure (lea_remove_redundant);
25640 %}
25641 
25642 peephole
25643 %{
25644   peepmatch (leaP32Narrow);
25645   peepprocedure (lea_remove_redundant);
25646 %}
25647 
25648 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25649 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25650 
25651 //int variant
25652 peephole
25653 %{
25654   peepmatch (testI_reg);
25655   peepprocedure (test_may_remove);
25656 %}
25657 
25658 //long variant
25659 peephole
25660 %{
25661   peepmatch (testL_reg);
25662   peepprocedure (test_may_remove);
25663 %}
25664 
25665 
25666 //----------SMARTSPILL RULES---------------------------------------------------
25667 // These must follow all instruction definitions as they use the names
25668 // defined in the instructions definitions.