1 //
    2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }
 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;
 1678 }
 1679 
 1680 // This could be in MacroAssembler but it's fairly C2 specific
 1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1682   Label exit;
 1683   __ jccb(Assembler::noParity, exit);
 1684   __ pushf();
 1685   //
 1686   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1687   // zero OF,AF,SF for NaN values.
 1688   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1689   // values returns 'less than' result (CF is set).
 1690   // Leave the rest of flags unchanged.
 1691   //
 1692   //    7 6 5 4 3 2 1 0
 1693   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1694   //    0 0 1 0 1 0 1 1   (0x2B)
 1695   //
 1696   __ andq(Address(rsp, 0), 0xffffff2b);
 1697   __ popf();
 1698   __ bind(exit);
 1699 }
 1700 
 1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1702   // If any floating point comparison instruction is used, unordered case always triggers jump
 1703   // for below condition, CF=1 is true when at least one input is NaN
 1704   Label done;
 1705   __ movl(dst, -1);
 1706   __ jcc(Assembler::below, done);
 1707   __ setcc(Assembler::notEqual, dst);
 1708   __ bind(done);
 1709 }
 1710 
 1711 // Math.min()    # Math.max()
 1712 // --------------------------
 1713 // ucomis[s/d]   #
 1714 // ja   -> b     # a
 1715 // jp   -> NaN   # NaN
 1716 // jb   -> a     # b
 1717 // je            #
 1718 // |-jz -> a | b # a & b
 1719 // |    -> a     #
 1720 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1721                             XMMRegister a, XMMRegister b,
 1722                             XMMRegister xmmt, Register rt,
 1723                             bool min, bool single) {
 1724 
 1725   Label nan, zero, below, above, done;
 1726 
 1727   if (single)
 1728     __ ucomiss(a, b);
 1729   else
 1730     __ ucomisd(a, b);
 1731 
 1732   if (dst->encoding() != (min ? b : a)->encoding())
 1733     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1734   else
 1735     __ jccb(Assembler::above, done);
 1736 
 1737   __ jccb(Assembler::parity, nan);  // PF=1
 1738   __ jccb(Assembler::below, below); // CF=1
 1739 
 1740   // equal
 1741   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
 1742   if (single) {
 1743     __ ucomiss(a, xmmt);
 1744     __ jccb(Assembler::equal, zero);
 1745 
 1746     __ movflt(dst, a);
 1747     __ jmp(done);
 1748   }
 1749   else {
 1750     __ ucomisd(a, xmmt);
 1751     __ jccb(Assembler::equal, zero);
 1752 
 1753     __ movdbl(dst, a);
 1754     __ jmp(done);
 1755   }
 1756 
 1757   __ bind(zero);
 1758   if (min)
 1759     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1760   else
 1761     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1762 
 1763   __ jmp(done);
 1764 
 1765   __ bind(above);
 1766   if (single)
 1767     __ movflt(dst, min ? b : a);
 1768   else
 1769     __ movdbl(dst, min ? b : a);
 1770 
 1771   __ jmp(done);
 1772 
 1773   __ bind(nan);
 1774   if (single) {
 1775     __ movl(rt, 0x7fc00000); // Float.NaN
 1776     __ movdl(dst, rt);
 1777   }
 1778   else {
 1779     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1780     __ movdq(dst, rt);
 1781   }
 1782   __ jmp(done);
 1783 
 1784   __ bind(below);
 1785   if (single)
 1786     __ movflt(dst, min ? a : b);
 1787   else
 1788     __ movdbl(dst, min ? a : b);
 1789 
 1790   __ bind(done);
 1791 }
 1792 
 1793 //=============================================================================
 1794 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1795 
 1796 int ConstantTable::calculate_table_base_offset() const {
 1797   return 0;  // absolute addressing, no offset
 1798 }
 1799 
 1800 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1801 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1802   ShouldNotReachHere();
 1803 }
 1804 
 1805 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1806   // Empty encoding
 1807 }
 1808 
 1809 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1810   return 0;
 1811 }
 1812 
 1813 #ifndef PRODUCT
 1814 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1815   st->print("# MachConstantBaseNode (empty encoding)");
 1816 }
 1817 #endif
 1818 
 1819 
 1820 //=============================================================================
 1821 #ifndef PRODUCT
 1822 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1823   Compile* C = ra_->C;
 1824 
 1825   int framesize = C->output()->frame_size_in_bytes();
 1826   int bangsize = C->output()->bang_size_in_bytes();
 1827   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1828   // Remove wordSize for return addr which is already pushed.
 1829   framesize -= wordSize;
 1830 
 1831   if (C->output()->need_stack_bang(bangsize)) {
 1832     framesize -= wordSize;
 1833     st->print("# stack bang (%d bytes)", bangsize);
 1834     st->print("\n\t");
 1835     st->print("pushq   rbp\t# Save rbp");
 1836     if (PreserveFramePointer) {
 1837         st->print("\n\t");
 1838         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1839     }
 1840     if (framesize) {
 1841       st->print("\n\t");
 1842       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1843     }
 1844   } else {
 1845     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1846     st->print("\n\t");
 1847     framesize -= wordSize;
 1848     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1849     if (PreserveFramePointer) {
 1850       st->print("\n\t");
 1851       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1852       if (framesize > 0) {
 1853         st->print("\n\t");
 1854         st->print("addq    rbp, #%d", framesize);
 1855       }
 1856     }
 1857   }
 1858 
 1859   if (VerifyStackAtCalls) {
 1860     st->print("\n\t");
 1861     framesize -= wordSize;
 1862     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1863 #ifdef ASSERT
 1864     st->print("\n\t");
 1865     st->print("# stack alignment check");
 1866 #endif
 1867   }
 1868   if (C->stub_function() != nullptr) {
 1869     st->print("\n\t");
 1870     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1871     st->print("\n\t");
 1872     st->print("je      fast_entry\t");
 1873     st->print("\n\t");
 1874     st->print("call    #nmethod_entry_barrier_stub\t");
 1875     st->print("\n\tfast_entry:");
 1876   }
 1877   st->cr();
 1878 }
 1879 #endif
 1880 
 1881 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1882   Compile* C = ra_->C;
 1883 
 1884   int framesize = C->output()->frame_size_in_bytes();
 1885   int bangsize = C->output()->bang_size_in_bytes();
 1886 
 1887   if (C->clinit_barrier_on_entry()) {
 1888     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1889     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1890 
 1891     Label L_skip_barrier;
 1892     Register klass = rscratch1;
 1893 
 1894     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1895     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1896 
 1897     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1898 
 1899     __ bind(L_skip_barrier);
 1900   }
 1901 
 1902   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
 1903 
 1904   C->output()->set_frame_complete(__ offset());
 1905 
 1906   if (C->has_mach_constant_base_node()) {
 1907     // NOTE: We set the table base offset here because users might be
 1908     // emitted before MachConstantBaseNode.
 1909     ConstantTable& constant_table = C->output()->constant_table();
 1910     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1911   }
 1912 }
 1913 
 1914 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1915 {
 1916   return MachNode::size(ra_); // too many variables; just compute it
 1917                               // the hard way
 1918 }
 1919 
 1920 int MachPrologNode::reloc() const
 1921 {
 1922   return 0; // a large enough number
 1923 }
 1924 
 1925 //=============================================================================
 1926 #ifndef PRODUCT
 1927 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1928 {
 1929   Compile* C = ra_->C;
 1930   if (generate_vzeroupper(C)) {
 1931     st->print("vzeroupper");
 1932     st->cr(); st->print("\t");
 1933   }
 1934 
 1935   int framesize = C->output()->frame_size_in_bytes();
 1936   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1937   // Remove word for return adr already pushed
 1938   // and RBP
 1939   framesize -= 2*wordSize;
 1940 
 1941   if (framesize) {
 1942     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1943     st->print("\t");
 1944   }
 1945 
 1946   st->print_cr("popq    rbp");
 1947   if (do_polling() && C->is_method_compilation()) {
 1948     st->print("\t");
 1949     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1950                  "ja      #safepoint_stub\t"
 1951                  "# Safepoint: poll for GC");
 1952   }
 1953 }
 1954 #endif
 1955 
 1956 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1957 {
 1958   Compile* C = ra_->C;
 1959 
 1960   if (generate_vzeroupper(C)) {
 1961     // Clear upper bits of YMM registers when current compiled code uses
 1962     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1963     __ vzeroupper();
 1964   }
 1965 
 1966   int framesize = C->output()->frame_size_in_bytes();
 1967   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1968   // Remove word for return adr already pushed
 1969   // and RBP
 1970   framesize -= 2*wordSize;
 1971 
 1972   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1973 
 1974   if (framesize) {
 1975     __ addq(rsp, framesize);
 1976   }
 1977 
 1978   __ popq(rbp);
 1979 
 1980   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1981     __ reserved_stack_check();
 1982   }
 1983 
 1984   if (do_polling() && C->is_method_compilation()) {
 1985     Label dummy_label;
 1986     Label* code_stub = &dummy_label;
 1987     if (!C->output()->in_scratch_emit_size()) {
 1988       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1989       C->output()->add_stub(stub);
 1990       code_stub = &stub->entry();
 1991     }
 1992     __ relocate(relocInfo::poll_return_type);
 1993     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1994   }
 1995 }
 1996 
 1997 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1998 {
 1999   return MachNode::size(ra_); // too many variables; just compute it
 2000                               // the hard way
 2001 }
 2002 
 2003 int MachEpilogNode::reloc() const
 2004 {
 2005   return 2; // a large enough number
 2006 }
 2007 
 2008 const Pipeline* MachEpilogNode::pipeline() const
 2009 {
 2010   return MachNode::pipeline_class();
 2011 }
 2012 
 2013 //=============================================================================
 2014 
 2015 enum RC {
 2016   rc_bad,
 2017   rc_int,
 2018   rc_kreg,
 2019   rc_float,
 2020   rc_stack
 2021 };
 2022 
 2023 static enum RC rc_class(OptoReg::Name reg)
 2024 {
 2025   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 2026 
 2027   if (OptoReg::is_stack(reg)) return rc_stack;
 2028 
 2029   VMReg r = OptoReg::as_VMReg(reg);
 2030 
 2031   if (r->is_Register()) return rc_int;
 2032 
 2033   if (r->is_KRegister()) return rc_kreg;
 2034 
 2035   assert(r->is_XMMRegister(), "must be");
 2036   return rc_float;
 2037 }
 2038 
 2039 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2040 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2041                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2042 
 2043 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2044                      int stack_offset, int reg, uint ireg, outputStream* st);
 2045 
 2046 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2047                                       int dst_offset, uint ireg, outputStream* st) {
 2048   if (masm) {
 2049     switch (ireg) {
 2050     case Op_VecS:
 2051       __ movq(Address(rsp, -8), rax);
 2052       __ movl(rax, Address(rsp, src_offset));
 2053       __ movl(Address(rsp, dst_offset), rax);
 2054       __ movq(rax, Address(rsp, -8));
 2055       break;
 2056     case Op_VecD:
 2057       __ pushq(Address(rsp, src_offset));
 2058       __ popq (Address(rsp, dst_offset));
 2059       break;
 2060     case Op_VecX:
 2061       __ pushq(Address(rsp, src_offset));
 2062       __ popq (Address(rsp, dst_offset));
 2063       __ pushq(Address(rsp, src_offset+8));
 2064       __ popq (Address(rsp, dst_offset+8));
 2065       break;
 2066     case Op_VecY:
 2067       __ vmovdqu(Address(rsp, -32), xmm0);
 2068       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2069       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2070       __ vmovdqu(xmm0, Address(rsp, -32));
 2071       break;
 2072     case Op_VecZ:
 2073       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2074       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2075       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2076       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2077       break;
 2078     default:
 2079       ShouldNotReachHere();
 2080     }
 2081 #ifndef PRODUCT
 2082   } else {
 2083     switch (ireg) {
 2084     case Op_VecS:
 2085       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2086                 "movl    rax, [rsp + #%d]\n\t"
 2087                 "movl    [rsp + #%d], rax\n\t"
 2088                 "movq    rax, [rsp - #8]",
 2089                 src_offset, dst_offset);
 2090       break;
 2091     case Op_VecD:
 2092       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2093                 "popq    [rsp + #%d]",
 2094                 src_offset, dst_offset);
 2095       break;
 2096      case Op_VecX:
 2097       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2098                 "popq    [rsp + #%d]\n\t"
 2099                 "pushq   [rsp + #%d]\n\t"
 2100                 "popq    [rsp + #%d]",
 2101                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2102       break;
 2103     case Op_VecY:
 2104       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2105                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2106                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2107                 "vmovdqu xmm0, [rsp - #32]",
 2108                 src_offset, dst_offset);
 2109       break;
 2110     case Op_VecZ:
 2111       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2112                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2113                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2114                 "vmovdqu xmm0, [rsp - #64]",
 2115                 src_offset, dst_offset);
 2116       break;
 2117     default:
 2118       ShouldNotReachHere();
 2119     }
 2120 #endif
 2121   }
 2122 }
 2123 
 2124 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2125                                        PhaseRegAlloc* ra_,
 2126                                        bool do_size,
 2127                                        outputStream* st) const {
 2128   assert(masm != nullptr || st  != nullptr, "sanity");
 2129   // Get registers to move
 2130   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2131   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2132   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2133   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2134 
 2135   enum RC src_second_rc = rc_class(src_second);
 2136   enum RC src_first_rc = rc_class(src_first);
 2137   enum RC dst_second_rc = rc_class(dst_second);
 2138   enum RC dst_first_rc = rc_class(dst_first);
 2139 
 2140   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2141          "must move at least 1 register" );
 2142 
 2143   if (src_first == dst_first && src_second == dst_second) {
 2144     // Self copy, no move
 2145     return 0;
 2146   }
 2147   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 2148     uint ireg = ideal_reg();
 2149     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2150     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2151     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2152       // mem -> mem
 2153       int src_offset = ra_->reg2offset(src_first);
 2154       int dst_offset = ra_->reg2offset(dst_first);
 2155       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2156     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2157       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2158     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2159       int stack_offset = ra_->reg2offset(dst_first);
 2160       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2161     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2162       int stack_offset = ra_->reg2offset(src_first);
 2163       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2164     } else {
 2165       ShouldNotReachHere();
 2166     }
 2167     return 0;
 2168   }
 2169   if (src_first_rc == rc_stack) {
 2170     // mem ->
 2171     if (dst_first_rc == rc_stack) {
 2172       // mem -> mem
 2173       assert(src_second != dst_first, "overlap");
 2174       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2175           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2176         // 64-bit
 2177         int src_offset = ra_->reg2offset(src_first);
 2178         int dst_offset = ra_->reg2offset(dst_first);
 2179         if (masm) {
 2180           __ pushq(Address(rsp, src_offset));
 2181           __ popq (Address(rsp, dst_offset));
 2182 #ifndef PRODUCT
 2183         } else {
 2184           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2185                     "popq    [rsp + #%d]",
 2186                      src_offset, dst_offset);
 2187 #endif
 2188         }
 2189       } else {
 2190         // 32-bit
 2191         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2192         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2193         // No pushl/popl, so:
 2194         int src_offset = ra_->reg2offset(src_first);
 2195         int dst_offset = ra_->reg2offset(dst_first);
 2196         if (masm) {
 2197           __ movq(Address(rsp, -8), rax);
 2198           __ movl(rax, Address(rsp, src_offset));
 2199           __ movl(Address(rsp, dst_offset), rax);
 2200           __ movq(rax, Address(rsp, -8));
 2201 #ifndef PRODUCT
 2202         } else {
 2203           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2204                     "movl    rax, [rsp + #%d]\n\t"
 2205                     "movl    [rsp + #%d], rax\n\t"
 2206                     "movq    rax, [rsp - #8]",
 2207                      src_offset, dst_offset);
 2208 #endif
 2209         }
 2210       }
 2211       return 0;
 2212     } else if (dst_first_rc == rc_int) {
 2213       // mem -> gpr
 2214       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2215           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2216         // 64-bit
 2217         int offset = ra_->reg2offset(src_first);
 2218         if (masm) {
 2219           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2220 #ifndef PRODUCT
 2221         } else {
 2222           st->print("movq    %s, [rsp + #%d]\t# spill",
 2223                      Matcher::regName[dst_first],
 2224                      offset);
 2225 #endif
 2226         }
 2227       } else {
 2228         // 32-bit
 2229         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2230         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2231         int offset = ra_->reg2offset(src_first);
 2232         if (masm) {
 2233           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2234 #ifndef PRODUCT
 2235         } else {
 2236           st->print("movl    %s, [rsp + #%d]\t# spill",
 2237                      Matcher::regName[dst_first],
 2238                      offset);
 2239 #endif
 2240         }
 2241       }
 2242       return 0;
 2243     } else if (dst_first_rc == rc_float) {
 2244       // mem-> xmm
 2245       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2246           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2247         // 64-bit
 2248         int offset = ra_->reg2offset(src_first);
 2249         if (masm) {
 2250           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2251 #ifndef PRODUCT
 2252         } else {
 2253           st->print("%s  %s, [rsp + #%d]\t# spill",
 2254                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2255                      Matcher::regName[dst_first],
 2256                      offset);
 2257 #endif
 2258         }
 2259       } else {
 2260         // 32-bit
 2261         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2262         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2263         int offset = ra_->reg2offset(src_first);
 2264         if (masm) {
 2265           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2266 #ifndef PRODUCT
 2267         } else {
 2268           st->print("movss   %s, [rsp + #%d]\t# spill",
 2269                      Matcher::regName[dst_first],
 2270                      offset);
 2271 #endif
 2272         }
 2273       }
 2274       return 0;
 2275     } else if (dst_first_rc == rc_kreg) {
 2276       // mem -> kreg
 2277       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2278           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2279         // 64-bit
 2280         int offset = ra_->reg2offset(src_first);
 2281         if (masm) {
 2282           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2283 #ifndef PRODUCT
 2284         } else {
 2285           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2286                      Matcher::regName[dst_first],
 2287                      offset);
 2288 #endif
 2289         }
 2290       }
 2291       return 0;
 2292     }
 2293   } else if (src_first_rc == rc_int) {
 2294     // gpr ->
 2295     if (dst_first_rc == rc_stack) {
 2296       // gpr -> mem
 2297       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2298           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2299         // 64-bit
 2300         int offset = ra_->reg2offset(dst_first);
 2301         if (masm) {
 2302           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2303 #ifndef PRODUCT
 2304         } else {
 2305           st->print("movq    [rsp + #%d], %s\t# spill",
 2306                      offset,
 2307                      Matcher::regName[src_first]);
 2308 #endif
 2309         }
 2310       } else {
 2311         // 32-bit
 2312         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2313         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2314         int offset = ra_->reg2offset(dst_first);
 2315         if (masm) {
 2316           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2317 #ifndef PRODUCT
 2318         } else {
 2319           st->print("movl    [rsp + #%d], %s\t# spill",
 2320                      offset,
 2321                      Matcher::regName[src_first]);
 2322 #endif
 2323         }
 2324       }
 2325       return 0;
 2326     } else if (dst_first_rc == rc_int) {
 2327       // gpr -> gpr
 2328       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2329           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2330         // 64-bit
 2331         if (masm) {
 2332           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2333                   as_Register(Matcher::_regEncode[src_first]));
 2334 #ifndef PRODUCT
 2335         } else {
 2336           st->print("movq    %s, %s\t# spill",
 2337                      Matcher::regName[dst_first],
 2338                      Matcher::regName[src_first]);
 2339 #endif
 2340         }
 2341         return 0;
 2342       } else {
 2343         // 32-bit
 2344         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2345         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2346         if (masm) {
 2347           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2348                   as_Register(Matcher::_regEncode[src_first]));
 2349 #ifndef PRODUCT
 2350         } else {
 2351           st->print("movl    %s, %s\t# spill",
 2352                      Matcher::regName[dst_first],
 2353                      Matcher::regName[src_first]);
 2354 #endif
 2355         }
 2356         return 0;
 2357       }
 2358     } else if (dst_first_rc == rc_float) {
 2359       // gpr -> xmm
 2360       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2361           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2362         // 64-bit
 2363         if (masm) {
 2364           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2365 #ifndef PRODUCT
 2366         } else {
 2367           st->print("movdq   %s, %s\t# spill",
 2368                      Matcher::regName[dst_first],
 2369                      Matcher::regName[src_first]);
 2370 #endif
 2371         }
 2372       } else {
 2373         // 32-bit
 2374         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2375         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2376         if (masm) {
 2377           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2378 #ifndef PRODUCT
 2379         } else {
 2380           st->print("movdl   %s, %s\t# spill",
 2381                      Matcher::regName[dst_first],
 2382                      Matcher::regName[src_first]);
 2383 #endif
 2384         }
 2385       }
 2386       return 0;
 2387     } else if (dst_first_rc == rc_kreg) {
 2388       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2389           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2390         // 64-bit
 2391         if (masm) {
 2392           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2393   #ifndef PRODUCT
 2394         } else {
 2395            st->print("kmovq   %s, %s\t# spill",
 2396                        Matcher::regName[dst_first],
 2397                        Matcher::regName[src_first]);
 2398   #endif
 2399         }
 2400       }
 2401       Unimplemented();
 2402       return 0;
 2403     }
 2404   } else if (src_first_rc == rc_float) {
 2405     // xmm ->
 2406     if (dst_first_rc == rc_stack) {
 2407       // xmm -> mem
 2408       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2409           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2410         // 64-bit
 2411         int offset = ra_->reg2offset(dst_first);
 2412         if (masm) {
 2413           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2414 #ifndef PRODUCT
 2415         } else {
 2416           st->print("movsd   [rsp + #%d], %s\t# spill",
 2417                      offset,
 2418                      Matcher::regName[src_first]);
 2419 #endif
 2420         }
 2421       } else {
 2422         // 32-bit
 2423         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2424         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2425         int offset = ra_->reg2offset(dst_first);
 2426         if (masm) {
 2427           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2428 #ifndef PRODUCT
 2429         } else {
 2430           st->print("movss   [rsp + #%d], %s\t# spill",
 2431                      offset,
 2432                      Matcher::regName[src_first]);
 2433 #endif
 2434         }
 2435       }
 2436       return 0;
 2437     } else if (dst_first_rc == rc_int) {
 2438       // xmm -> gpr
 2439       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2440           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2441         // 64-bit
 2442         if (masm) {
 2443           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2444 #ifndef PRODUCT
 2445         } else {
 2446           st->print("movdq   %s, %s\t# spill",
 2447                      Matcher::regName[dst_first],
 2448                      Matcher::regName[src_first]);
 2449 #endif
 2450         }
 2451       } else {
 2452         // 32-bit
 2453         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2454         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2455         if (masm) {
 2456           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2457 #ifndef PRODUCT
 2458         } else {
 2459           st->print("movdl   %s, %s\t# spill",
 2460                      Matcher::regName[dst_first],
 2461                      Matcher::regName[src_first]);
 2462 #endif
 2463         }
 2464       }
 2465       return 0;
 2466     } else if (dst_first_rc == rc_float) {
 2467       // xmm -> xmm
 2468       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2469           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2470         // 64-bit
 2471         if (masm) {
 2472           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2473 #ifndef PRODUCT
 2474         } else {
 2475           st->print("%s  %s, %s\t# spill",
 2476                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2477                      Matcher::regName[dst_first],
 2478                      Matcher::regName[src_first]);
 2479 #endif
 2480         }
 2481       } else {
 2482         // 32-bit
 2483         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2484         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2485         if (masm) {
 2486           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2487 #ifndef PRODUCT
 2488         } else {
 2489           st->print("%s  %s, %s\t# spill",
 2490                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2491                      Matcher::regName[dst_first],
 2492                      Matcher::regName[src_first]);
 2493 #endif
 2494         }
 2495       }
 2496       return 0;
 2497     } else if (dst_first_rc == rc_kreg) {
 2498       assert(false, "Illegal spilling");
 2499       return 0;
 2500     }
 2501   } else if (src_first_rc == rc_kreg) {
 2502     if (dst_first_rc == rc_stack) {
 2503       // mem -> kreg
 2504       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2505           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2506         // 64-bit
 2507         int offset = ra_->reg2offset(dst_first);
 2508         if (masm) {
 2509           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2510 #ifndef PRODUCT
 2511         } else {
 2512           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2513                      offset,
 2514                      Matcher::regName[src_first]);
 2515 #endif
 2516         }
 2517       }
 2518       return 0;
 2519     } else if (dst_first_rc == rc_int) {
 2520       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2521           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2522         // 64-bit
 2523         if (masm) {
 2524           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2525 #ifndef PRODUCT
 2526         } else {
 2527          st->print("kmovq   %s, %s\t# spill",
 2528                      Matcher::regName[dst_first],
 2529                      Matcher::regName[src_first]);
 2530 #endif
 2531         }
 2532       }
 2533       Unimplemented();
 2534       return 0;
 2535     } else if (dst_first_rc == rc_kreg) {
 2536       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2537           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2538         // 64-bit
 2539         if (masm) {
 2540           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2541 #ifndef PRODUCT
 2542         } else {
 2543          st->print("kmovq   %s, %s\t# spill",
 2544                      Matcher::regName[dst_first],
 2545                      Matcher::regName[src_first]);
 2546 #endif
 2547         }
 2548       }
 2549       return 0;
 2550     } else if (dst_first_rc == rc_float) {
 2551       assert(false, "Illegal spill");
 2552       return 0;
 2553     }
 2554   }
 2555 
 2556   assert(0," foo ");
 2557   Unimplemented();
 2558   return 0;
 2559 }
 2560 
 2561 #ifndef PRODUCT
 2562 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2563   implementation(nullptr, ra_, false, st);
 2564 }
 2565 #endif
 2566 
 2567 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2568   implementation(masm, ra_, false, nullptr);
 2569 }
 2570 
 2571 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2572   return MachNode::size(ra_);
 2573 }
 2574 
 2575 //=============================================================================
 2576 #ifndef PRODUCT
 2577 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2578 {
 2579   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2580   int reg = ra_->get_reg_first(this);
 2581   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2582             Matcher::regName[reg], offset);
 2583 }
 2584 #endif
 2585 
 2586 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2587 {
 2588   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2589   int reg = ra_->get_encode(this);
 2590 
 2591   __ lea(as_Register(reg), Address(rsp, offset));
 2592 }
 2593 
 2594 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2595 {
 2596   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2597   if (ra_->get_encode(this) > 15) {
 2598     return (offset < 0x80) ? 6 : 9; // REX2
 2599   } else {
 2600     return (offset < 0x80) ? 5 : 8; // REX
 2601   }
 2602 }
 2603 
 2604 //=============================================================================
 2605 #ifndef PRODUCT
 2606 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2607 {
 2608   if (UseCompressedClassPointers) {
 2609     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2610     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2611   } else {
 2612     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2613     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2614   }
 2615   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2616 }
 2617 #endif
 2618 
 2619 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2620 {
 2621   __ ic_check(InteriorEntryAlignment);
 2622 }
 2623 
 2624 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2625 {
 2626   return MachNode::size(ra_); // too many variables; just compute it
 2627                               // the hard way
 2628 }
 2629 
 2630 
 2631 //=============================================================================
 2632 
 2633 bool Matcher::supports_vector_calling_convention(void) {
 2634   return EnableVectorSupport;
 2635 }
 2636 
 2637 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2638   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2639 }
 2640 
 2641 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2642   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2643 }
 2644 
 2645 #ifdef ASSERT
 2646 static bool is_ndd_demotable(const MachNode* mdef) {
 2647   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2648 }
 2649 #endif
 2650 
 2651 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
 2652                                             int oper_index) {
 2653   if (mdef == nullptr) {
 2654     return false;
 2655   }
 2656 
 2657   if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
 2658       mdef->in(mdef->operand_index(oper_index)) == nullptr) {
 2659     assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
 2660     assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
 2661     return false;
 2662   }
 2663 
 2664   // Complex memory operand covers multiple incoming edges needed for
 2665   // address computation. Biasing def towards any address component will not
 2666   // result in NDD demotion by assembler.
 2667   if (mdef->operand_num_edges(oper_index) != 1) {
 2668     return false;
 2669   }
 2670 
 2671   // Demotion candidate must be register mask compatible with definition.
 2672   const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
 2673   if (!oper_mask.overlap(mdef->out_RegMask())) {
 2674     assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
 2675     return false;
 2676   }
 2677 
 2678   switch (oper_index) {
 2679   // First operand of MachNode corresponding to Intel APX NDD selection
 2680   // pattern can share its assigned register with definition operand if
 2681   // their live ranges do not overlap. In such a scenario we can demote
 2682   // it to legacy map0/map1 instruction by replacing its 4-byte extended
 2683   // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
 2684   // are decorated with a special flag by instruction selector.
 2685   case 1:
 2686     return is_ndd_demotable_opr1(mdef);
 2687 
 2688   // Definition operand of commutative operation can be biased towards second
 2689   // operand.
 2690   case 2:
 2691     return is_ndd_demotable_opr2(mdef);
 2692 
 2693   // Current scheme only selects up to two biasing candidates
 2694   default:
 2695     assert(false, "unhandled operand index: %s", mdef->Name());
 2696     break;
 2697   }
 2698 
 2699   return false;
 2700 }
 2701 
 2702 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2703   assert(EnableVectorSupport, "sanity");
 2704   int lo = XMM0_num;
 2705   int hi = XMM0b_num;
 2706   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2707   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2708   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2709   return OptoRegPair(hi, lo);
 2710 }
 2711 
 2712 // Is this branch offset short enough that a short branch can be used?
 2713 //
 2714 // NOTE: If the platform does not provide any short branch variants, then
 2715 //       this method should return false for offset 0.
 2716 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2717   // The passed offset is relative to address of the branch.
 2718   // On 86 a branch displacement is calculated relative to address
 2719   // of a next instruction.
 2720   offset -= br_size;
 2721 
 2722   // the short version of jmpConUCF2 contains multiple branches,
 2723   // making the reach slightly less
 2724   if (rule == jmpConUCF2_rule)
 2725     return (-126 <= offset && offset <= 125);
 2726   return (-128 <= offset && offset <= 127);
 2727 }
 2728 
 2729 #ifdef ASSERT
 2730 // Return whether or not this register is ever used as an argument.
 2731 bool Matcher::can_be_java_arg(int reg)
 2732 {
 2733   return
 2734     reg ==  RDI_num || reg == RDI_H_num ||
 2735     reg ==  RSI_num || reg == RSI_H_num ||
 2736     reg ==  RDX_num || reg == RDX_H_num ||
 2737     reg ==  RCX_num || reg == RCX_H_num ||
 2738     reg ==   R8_num || reg ==  R8_H_num ||
 2739     reg ==   R9_num || reg ==  R9_H_num ||
 2740     reg ==  R12_num || reg == R12_H_num ||
 2741     reg == XMM0_num || reg == XMM0b_num ||
 2742     reg == XMM1_num || reg == XMM1b_num ||
 2743     reg == XMM2_num || reg == XMM2b_num ||
 2744     reg == XMM3_num || reg == XMM3b_num ||
 2745     reg == XMM4_num || reg == XMM4b_num ||
 2746     reg == XMM5_num || reg == XMM5b_num ||
 2747     reg == XMM6_num || reg == XMM6b_num ||
 2748     reg == XMM7_num || reg == XMM7b_num;
 2749 }
 2750 #endif
 2751 
 2752 uint Matcher::int_pressure_limit()
 2753 {
 2754   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2755 }
 2756 
 2757 uint Matcher::float_pressure_limit()
 2758 {
 2759   // After experiment around with different values, the following default threshold
 2760   // works best for LCM's register pressure scheduling on x64.
 2761   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2762   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2763   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2764 }
 2765 
 2766 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 2767   // In 64 bit mode a code which use multiply when
 2768   // devisor is constant is faster than hardware
 2769   // DIV instruction (it uses MulHiL).
 2770   return false;
 2771 }
 2772 
 2773 // Register for DIVI projection of divmodI
 2774 const RegMask& Matcher::divI_proj_mask() {
 2775   return INT_RAX_REG_mask();
 2776 }
 2777 
 2778 // Register for MODI projection of divmodI
 2779 const RegMask& Matcher::modI_proj_mask() {
 2780   return INT_RDX_REG_mask();
 2781 }
 2782 
 2783 // Register for DIVL projection of divmodL
 2784 const RegMask& Matcher::divL_proj_mask() {
 2785   return LONG_RAX_REG_mask();
 2786 }
 2787 
 2788 // Register for MODL projection of divmodL
 2789 const RegMask& Matcher::modL_proj_mask() {
 2790   return LONG_RDX_REG_mask();
 2791 }
 2792 
 2793 %}
 2794 
 2795 source_hpp %{
 2796 // Header information of the source block.
 2797 // Method declarations/definitions which are used outside
 2798 // the ad-scope can conveniently be defined here.
 2799 //
 2800 // To keep related declarations/definitions/uses close together,
 2801 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2802 
 2803 #include "runtime/vm_version.hpp"
 2804 
 2805 class NativeJump;
 2806 
 2807 class CallStubImpl {
 2808 
 2809   //--------------------------------------------------------------
 2810   //---<  Used for optimization in Compile::shorten_branches  >---
 2811   //--------------------------------------------------------------
 2812 
 2813  public:
 2814   // Size of call trampoline stub.
 2815   static uint size_call_trampoline() {
 2816     return 0; // no call trampolines on this platform
 2817   }
 2818 
 2819   // number of relocations needed by a call trampoline stub
 2820   static uint reloc_call_trampoline() {
 2821     return 0; // no call trampolines on this platform
 2822   }
 2823 };
 2824 
 2825 class HandlerImpl {
 2826 
 2827  public:
 2828 
 2829   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2830 
 2831   static uint size_deopt_handler() {
 2832     // one call and one jmp.
 2833     return 7;
 2834   }
 2835 };
 2836 
 2837 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2838   switch(bytes) {
 2839     case  4: // fall-through
 2840     case  8: // fall-through
 2841     case 16: return Assembler::AVX_128bit;
 2842     case 32: return Assembler::AVX_256bit;
 2843     case 64: return Assembler::AVX_512bit;
 2844 
 2845     default: {
 2846       ShouldNotReachHere();
 2847       return Assembler::AVX_NoVec;
 2848     }
 2849   }
 2850 }
 2851 
 2852 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2853   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2854 }
 2855 
 2856 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2857   uint def_idx = use->operand_index(opnd);
 2858   Node* def = use->in(def_idx);
 2859   return vector_length_encoding(def);
 2860 }
 2861 
 2862 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2863   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2864          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2865 }
 2866 
 2867 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2868   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2869            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2870 }
 2871 
 2872 class Node::PD {
 2873 public:
 2874   enum NodeFlags : uint64_t {
 2875     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2876     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2877     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2878     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2879     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2880     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2881     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2882     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2883     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2884     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2885     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2886     Flag_ndd_demotable_opr1   = Node::_last_flag << 12,
 2887     Flag_ndd_demotable_opr2   = Node::_last_flag << 13,
 2888     _last_flag                = Flag_ndd_demotable_opr2
 2889   };
 2890 };
 2891 
 2892 %} // end source_hpp
 2893 
 2894 source %{
 2895 
 2896 #include "opto/addnode.hpp"
 2897 #include "c2_intelJccErratum_x86.hpp"
 2898 
 2899 void PhaseOutput::pd_perform_mach_node_analysis() {
 2900   if (VM_Version::has_intel_jcc_erratum()) {
 2901     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2902     _buf_sizes._code += extra_padding;
 2903   }
 2904 }
 2905 
 2906 int MachNode::pd_alignment_required() const {
 2907   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2908     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2909     return IntelJccErratum::largest_jcc_size() + 1;
 2910   } else {
 2911     return 1;
 2912   }
 2913 }
 2914 
 2915 int MachNode::compute_padding(int current_offset) const {
 2916   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2917     Compile* C = Compile::current();
 2918     PhaseOutput* output = C->output();
 2919     Block* block = output->block();
 2920     int index = output->index();
 2921     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2922   } else {
 2923     return 0;
 2924   }
 2925 }
 2926 
 2927 // Emit deopt handler code.
 2928 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2929 
 2930   // Note that the code buffer's insts_mark is always relative to insts.
 2931   // That's why we must use the macroassembler to generate a handler.
 2932   address base = __ start_a_stub(size_deopt_handler());
 2933   if (base == nullptr) {
 2934     ciEnv::current()->record_failure("CodeCache is full");
 2935     return 0;  // CodeBuffer::expand failed
 2936   }
 2937   int offset = __ offset();
 2938 
 2939   Label start;
 2940   __ bind(start);
 2941 
 2942   __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2943 
 2944   int entry_offset = __ offset();
 2945 
 2946   __ jmp(start);
 2947 
 2948   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2949   assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
 2950          "out of bounds read in post-call NOP check");
 2951   __ end_a_stub();
 2952   return entry_offset;
 2953 }
 2954 
 2955 static Assembler::Width widthForType(BasicType bt) {
 2956   if (bt == T_BYTE) {
 2957     return Assembler::B;
 2958   } else if (bt == T_SHORT) {
 2959     return Assembler::W;
 2960   } else if (bt == T_INT) {
 2961     return Assembler::D;
 2962   } else {
 2963     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2964     return Assembler::Q;
 2965   }
 2966 }
 2967 
 2968 //=============================================================================
 2969 
 2970   // Float masks come from different places depending on platform.
 2971   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2972   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2973   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2974   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2975   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2976   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2977   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2978   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2979   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2980   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2981   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2982   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 2983   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 2984   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 2985   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 2986   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 2987   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 2988   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 2989   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 2990 
 2991 //=============================================================================
 2992 bool Matcher::match_rule_supported(int opcode) {
 2993   if (!has_match_rule(opcode)) {
 2994     return false; // no match rule present
 2995   }
 2996   switch (opcode) {
 2997     case Op_AbsVL:
 2998     case Op_StoreVectorScatter:
 2999       if (UseAVX < 3) {
 3000         return false;
 3001       }
 3002       break;
 3003     case Op_PopCountI:
 3004     case Op_PopCountL:
 3005       if (!UsePopCountInstruction) {
 3006         return false;
 3007       }
 3008       break;
 3009     case Op_PopCountVI:
 3010       if (UseAVX < 2) {
 3011         return false;
 3012       }
 3013       break;
 3014     case Op_CompressV:
 3015     case Op_ExpandV:
 3016     case Op_PopCountVL:
 3017       if (UseAVX < 2) {
 3018         return false;
 3019       }
 3020       break;
 3021     case Op_MulVI:
 3022       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 3023         return false;
 3024       }
 3025       break;
 3026     case Op_MulVL:
 3027       if (UseSSE < 4) { // only with SSE4_1 or AVX
 3028         return false;
 3029       }
 3030       break;
 3031     case Op_MulReductionVL:
 3032       if (VM_Version::supports_avx512dq() == false) {
 3033         return false;
 3034       }
 3035       break;
 3036     case Op_AbsVB:
 3037     case Op_AbsVS:
 3038     case Op_AbsVI:
 3039     case Op_AddReductionVI:
 3040     case Op_AndReductionV:
 3041     case Op_OrReductionV:
 3042     case Op_XorReductionV:
 3043       if (UseSSE < 3) { // requires at least SSSE3
 3044         return false;
 3045       }
 3046       break;
 3047     case Op_MaxHF:
 3048     case Op_MinHF:
 3049       if (!VM_Version::supports_avx512vlbw()) {
 3050         return false;
 3051       }  // fallthrough
 3052     case Op_AddHF:
 3053     case Op_DivHF:
 3054     case Op_FmaHF:
 3055     case Op_MulHF:
 3056     case Op_ReinterpretS2HF:
 3057     case Op_ReinterpretHF2S:
 3058     case Op_SubHF:
 3059     case Op_SqrtHF:
 3060       if (!VM_Version::supports_avx512_fp16()) {
 3061         return false;
 3062       }
 3063       break;
 3064     case Op_VectorLoadShuffle:
 3065     case Op_VectorRearrange:
 3066     case Op_MulReductionVI:
 3067       if (UseSSE < 4) { // requires at least SSE4
 3068         return false;
 3069       }
 3070       break;
 3071     case Op_IsInfiniteF:
 3072     case Op_IsInfiniteD:
 3073       if (!VM_Version::supports_avx512dq()) {
 3074         return false;
 3075       }
 3076       break;
 3077     case Op_SqrtVD:
 3078     case Op_SqrtVF:
 3079     case Op_VectorMaskCmp:
 3080     case Op_VectorCastB2X:
 3081     case Op_VectorCastS2X:
 3082     case Op_VectorCastI2X:
 3083     case Op_VectorCastL2X:
 3084     case Op_VectorCastF2X:
 3085     case Op_VectorCastD2X:
 3086     case Op_VectorUCastB2X:
 3087     case Op_VectorUCastS2X:
 3088     case Op_VectorUCastI2X:
 3089     case Op_VectorMaskCast:
 3090       if (UseAVX < 1) { // enabled for AVX only
 3091         return false;
 3092       }
 3093       break;
 3094     case Op_PopulateIndex:
 3095       if (UseAVX < 2) {
 3096         return false;
 3097       }
 3098       break;
 3099     case Op_RoundVF:
 3100       if (UseAVX < 2) { // enabled for AVX2 only
 3101         return false;
 3102       }
 3103       break;
 3104     case Op_RoundVD:
 3105       if (UseAVX < 3) {
 3106         return false;  // enabled for AVX3 only
 3107       }
 3108       break;
 3109     case Op_CompareAndSwapL:
 3110     case Op_CompareAndSwapP:
 3111       break;
 3112     case Op_StrIndexOf:
 3113       if (!UseSSE42Intrinsics) {
 3114         return false;
 3115       }
 3116       break;
 3117     case Op_StrIndexOfChar:
 3118       if (!UseSSE42Intrinsics) {
 3119         return false;
 3120       }
 3121       break;
 3122     case Op_OnSpinWait:
 3123       if (VM_Version::supports_on_spin_wait() == false) {
 3124         return false;
 3125       }
 3126       break;
 3127     case Op_MulVB:
 3128     case Op_LShiftVB:
 3129     case Op_RShiftVB:
 3130     case Op_URShiftVB:
 3131     case Op_VectorInsert:
 3132     case Op_VectorLoadMask:
 3133     case Op_VectorStoreMask:
 3134     case Op_VectorBlend:
 3135       if (UseSSE < 4) {
 3136         return false;
 3137       }
 3138       break;
 3139     case Op_MaxD:
 3140     case Op_MaxF:
 3141     case Op_MinD:
 3142     case Op_MinF:
 3143       if (UseAVX < 1) { // enabled for AVX only
 3144         return false;
 3145       }
 3146       break;
 3147     case Op_CacheWB:
 3148     case Op_CacheWBPreSync:
 3149     case Op_CacheWBPostSync:
 3150       if (!VM_Version::supports_data_cache_line_flush()) {
 3151         return false;
 3152       }
 3153       break;
 3154     case Op_ExtractB:
 3155     case Op_ExtractL:
 3156     case Op_ExtractI:
 3157     case Op_RoundDoubleMode:
 3158       if (UseSSE < 4) {
 3159         return false;
 3160       }
 3161       break;
 3162     case Op_RoundDoubleModeV:
 3163       if (VM_Version::supports_avx() == false) {
 3164         return false; // 128bit vroundpd is not available
 3165       }
 3166       break;
 3167     case Op_LoadVectorGather:
 3168     case Op_LoadVectorGatherMasked:
 3169       if (UseAVX < 2) {
 3170         return false;
 3171       }
 3172       break;
 3173     case Op_FmaF:
 3174     case Op_FmaD:
 3175     case Op_FmaVD:
 3176     case Op_FmaVF:
 3177       if (!UseFMA) {
 3178         return false;
 3179       }
 3180       break;
 3181     case Op_MacroLogicV:
 3182       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3183         return false;
 3184       }
 3185       break;
 3186 
 3187     case Op_VectorCmpMasked:
 3188     case Op_VectorMaskGen:
 3189       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3190         return false;
 3191       }
 3192       break;
 3193     case Op_VectorMaskFirstTrue:
 3194     case Op_VectorMaskLastTrue:
 3195     case Op_VectorMaskTrueCount:
 3196     case Op_VectorMaskToLong:
 3197       if (UseAVX < 1) {
 3198          return false;
 3199       }
 3200       break;
 3201     case Op_RoundF:
 3202     case Op_RoundD:
 3203       break;
 3204     case Op_CopySignD:
 3205     case Op_CopySignF:
 3206       if (UseAVX < 3)  {
 3207         return false;
 3208       }
 3209       if (!VM_Version::supports_avx512vl()) {
 3210         return false;
 3211       }
 3212       break;
 3213     case Op_CompressBits:
 3214     case Op_ExpandBits:
 3215       if (!VM_Version::supports_bmi2()) {
 3216         return false;
 3217       }
 3218       break;
 3219     case Op_CompressM:
 3220       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3221         return false;
 3222       }
 3223       break;
 3224     case Op_ConvF2HF:
 3225     case Op_ConvHF2F:
 3226       if (!VM_Version::supports_float16()) {
 3227         return false;
 3228       }
 3229       break;
 3230     case Op_VectorCastF2HF:
 3231     case Op_VectorCastHF2F:
 3232       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3233         return false;
 3234       }
 3235       break;
 3236   }
 3237   return true;  // Match rules are supported by default.
 3238 }
 3239 
 3240 //------------------------------------------------------------------------
 3241 
 3242 static inline bool is_pop_count_instr_target(BasicType bt) {
 3243   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3244          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3245 }
 3246 
 3247 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3248   return match_rule_supported_vector(opcode, vlen, bt);
 3249 }
 3250 
 3251 // Identify extra cases that we might want to provide match rules for vector nodes and
 3252 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3253 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3254   if (!match_rule_supported(opcode)) {
 3255     return false;
 3256   }
 3257   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3258   //   * SSE2 supports 128bit vectors for all types;
 3259   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3260   //   * AVX2 supports 256bit vectors for all types;
 3261   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3262   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3263   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3264   // And MaxVectorSize is taken into account as well.
 3265   if (!vector_size_supported(bt, vlen)) {
 3266     return false;
 3267   }
 3268   // Special cases which require vector length follow:
 3269   //   * implementation limitations
 3270   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3271   //   * 128bit vroundpd instruction is present only in AVX1
 3272   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3273   switch (opcode) {
 3274     case Op_MaxVHF:
 3275     case Op_MinVHF:
 3276       if (!VM_Version::supports_avx512bw()) {
 3277         return false;
 3278       }
 3279     case Op_AddVHF:
 3280     case Op_DivVHF:
 3281     case Op_FmaVHF:
 3282     case Op_MulVHF:
 3283     case Op_SubVHF:
 3284     case Op_SqrtVHF:
 3285       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3286         return false;
 3287       }
 3288       if (!VM_Version::supports_avx512_fp16()) {
 3289         return false;
 3290       }
 3291       break;
 3292     case Op_AbsVF:
 3293     case Op_NegVF:
 3294       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3295         return false; // 512bit vandps and vxorps are not available
 3296       }
 3297       break;
 3298     case Op_AbsVD:
 3299     case Op_NegVD:
 3300       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3301         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3302       }
 3303       break;
 3304     case Op_RotateRightV:
 3305     case Op_RotateLeftV:
 3306       if (bt != T_INT && bt != T_LONG) {
 3307         return false;
 3308       } // fallthrough
 3309     case Op_MacroLogicV:
 3310       if (!VM_Version::supports_evex() ||
 3311           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3312         return false;
 3313       }
 3314       break;
 3315     case Op_ClearArray:
 3316     case Op_VectorMaskGen:
 3317     case Op_VectorCmpMasked:
 3318       if (!VM_Version::supports_avx512bw()) {
 3319         return false;
 3320       }
 3321       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3322         return false;
 3323       }
 3324       break;
 3325     case Op_LoadVectorMasked:
 3326     case Op_StoreVectorMasked:
 3327       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3328         return false;
 3329       }
 3330       break;
 3331     case Op_UMinV:
 3332     case Op_UMaxV:
 3333       if (UseAVX == 0) {
 3334         return false;
 3335       }
 3336       break;
 3337     case Op_UMinReductionV:
 3338     case Op_UMaxReductionV:
 3339       if (UseAVX == 0) {
 3340         return false;
 3341       }
 3342       if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
 3343         return false;
 3344       }
 3345       if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
 3346         return false;
 3347       }
 3348       break;
 3349     case Op_MaxV:
 3350     case Op_MinV:
 3351       if (UseSSE < 4 && is_integral_type(bt)) {
 3352         return false;
 3353       }
 3354       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3355           // Float/Double intrinsics are enabled for AVX family currently.
 3356           if (UseAVX == 0) {
 3357             return false;
 3358           }
 3359           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3360             return false;
 3361           }
 3362       }
 3363       break;
 3364     case Op_CallLeafVector:
 3365       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3366         return false;
 3367       }
 3368       break;
 3369     case Op_AddReductionVI:
 3370       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3371         return false;
 3372       }
 3373       // fallthrough
 3374     case Op_AndReductionV:
 3375     case Op_OrReductionV:
 3376     case Op_XorReductionV:
 3377       if (is_subword_type(bt) && (UseSSE < 4)) {
 3378         return false;
 3379       }
 3380       break;
 3381     case Op_MinReductionV:
 3382     case Op_MaxReductionV:
 3383       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3384         return false;
 3385       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3386         return false;
 3387       }
 3388       // Float/Double intrinsics enabled for AVX family.
 3389       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3390         return false;
 3391       }
 3392       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3393         return false;
 3394       }
 3395       break;
 3396     case Op_VectorBlend:
 3397       if (UseAVX == 0 && size_in_bits < 128) {
 3398         return false;
 3399       }
 3400       break;
 3401     case Op_VectorTest:
 3402       if (UseSSE < 4) {
 3403         return false; // Implementation limitation
 3404       } else if (size_in_bits < 32) {
 3405         return false; // Implementation limitation
 3406       }
 3407       break;
 3408     case Op_VectorLoadShuffle:
 3409     case Op_VectorRearrange:
 3410       if(vlen == 2) {
 3411         return false; // Implementation limitation due to how shuffle is loaded
 3412       } else if (size_in_bits == 256 && UseAVX < 2) {
 3413         return false; // Implementation limitation
 3414       }
 3415       break;
 3416     case Op_VectorLoadMask:
 3417     case Op_VectorMaskCast:
 3418       if (size_in_bits == 256 && UseAVX < 2) {
 3419         return false; // Implementation limitation
 3420       }
 3421       // fallthrough
 3422     case Op_VectorStoreMask:
 3423       if (vlen == 2) {
 3424         return false; // Implementation limitation
 3425       }
 3426       break;
 3427     case Op_PopulateIndex:
 3428       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3429         return false;
 3430       }
 3431       break;
 3432     case Op_VectorCastB2X:
 3433     case Op_VectorCastS2X:
 3434     case Op_VectorCastI2X:
 3435       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3436         return false;
 3437       }
 3438       break;
 3439     case Op_VectorCastL2X:
 3440       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3441         return false;
 3442       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3443         return false;
 3444       }
 3445       break;
 3446     case Op_VectorCastF2X: {
 3447         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3448         // happen after intermediate conversion to integer and special handling
 3449         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3450         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3451         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3452           return false;
 3453         }
 3454       }
 3455       // fallthrough
 3456     case Op_VectorCastD2X:
 3457       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3458         return false;
 3459       }
 3460       break;
 3461     case Op_VectorCastF2HF:
 3462     case Op_VectorCastHF2F:
 3463       if (!VM_Version::supports_f16c() &&
 3464          ((!VM_Version::supports_evex() ||
 3465          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3466         return false;
 3467       }
 3468       break;
 3469     case Op_RoundVD:
 3470       if (!VM_Version::supports_avx512dq()) {
 3471         return false;
 3472       }
 3473       break;
 3474     case Op_MulReductionVI:
 3475       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3476         return false;
 3477       }
 3478       break;
 3479     case Op_LoadVectorGatherMasked:
 3480       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3481         return false;
 3482       }
 3483       if (is_subword_type(bt) &&
 3484          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3485           (size_in_bits < 64)                                      ||
 3486           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3487         return false;
 3488       }
 3489       break;
 3490     case Op_StoreVectorScatterMasked:
 3491     case Op_StoreVectorScatter:
 3492       if (is_subword_type(bt)) {
 3493         return false;
 3494       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3495         return false;
 3496       }
 3497       // fallthrough
 3498     case Op_LoadVectorGather:
 3499       if (!is_subword_type(bt) && size_in_bits == 64) {
 3500         return false;
 3501       }
 3502       if (is_subword_type(bt) && size_in_bits < 64) {
 3503         return false;
 3504       }
 3505       break;
 3506     case Op_SaturatingAddV:
 3507     case Op_SaturatingSubV:
 3508       if (UseAVX < 1) {
 3509         return false; // Implementation limitation
 3510       }
 3511       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3512         return false;
 3513       }
 3514       break;
 3515     case Op_SelectFromTwoVector:
 3516        if (size_in_bits < 128) {
 3517          return false;
 3518        }
 3519        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3520          return false;
 3521        }
 3522        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3523          return false;
 3524        }
 3525        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3526          return false;
 3527        }
 3528        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3529          return false;
 3530        }
 3531        break;
 3532     case Op_MaskAll:
 3533       if (!VM_Version::supports_evex()) {
 3534         return false;
 3535       }
 3536       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3537         return false;
 3538       }
 3539       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3540         return false;
 3541       }
 3542       break;
 3543     case Op_VectorMaskCmp:
 3544       if (vlen < 2 || size_in_bits < 32) {
 3545         return false;
 3546       }
 3547       break;
 3548     case Op_CompressM:
 3549       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3550         return false;
 3551       }
 3552       break;
 3553     case Op_CompressV:
 3554     case Op_ExpandV:
 3555       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3556         return false;
 3557       }
 3558       if (size_in_bits < 128 ) {
 3559         return false;
 3560       }
 3561     case Op_VectorLongToMask:
 3562       if (UseAVX < 1) {
 3563         return false;
 3564       }
 3565       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3566         return false;
 3567       }
 3568       break;
 3569     case Op_SignumVD:
 3570     case Op_SignumVF:
 3571       if (UseAVX < 1) {
 3572         return false;
 3573       }
 3574       break;
 3575     case Op_PopCountVI:
 3576     case Op_PopCountVL: {
 3577         if (!is_pop_count_instr_target(bt) &&
 3578             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3579           return false;
 3580         }
 3581       }
 3582       break;
 3583     case Op_ReverseV:
 3584     case Op_ReverseBytesV:
 3585       if (UseAVX < 2) {
 3586         return false;
 3587       }
 3588       break;
 3589     case Op_CountTrailingZerosV:
 3590     case Op_CountLeadingZerosV:
 3591       if (UseAVX < 2) {
 3592         return false;
 3593       }
 3594       break;
 3595   }
 3596   return true;  // Per default match rules are supported.
 3597 }
 3598 
 3599 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3600   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3601   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3602   // of their non-masked counterpart with mask edge being the differentiator.
 3603   // This routine does a strict check on the existence of masked operation patterns
 3604   // by returning a default false value for all the other opcodes apart from the
 3605   // ones whose masked instruction patterns are defined in this file.
 3606   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3607     return false;
 3608   }
 3609 
 3610   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3611   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3612     return false;
 3613   }
 3614   switch(opcode) {
 3615     // Unary masked operations
 3616     case Op_AbsVB:
 3617     case Op_AbsVS:
 3618       if(!VM_Version::supports_avx512bw()) {
 3619         return false;  // Implementation limitation
 3620       }
 3621     case Op_AbsVI:
 3622     case Op_AbsVL:
 3623       return true;
 3624 
 3625     // Ternary masked operations
 3626     case Op_FmaVF:
 3627     case Op_FmaVD:
 3628       return true;
 3629 
 3630     case Op_MacroLogicV:
 3631       if(bt != T_INT && bt != T_LONG) {
 3632         return false;
 3633       }
 3634       return true;
 3635 
 3636     // Binary masked operations
 3637     case Op_AddVB:
 3638     case Op_AddVS:
 3639     case Op_SubVB:
 3640     case Op_SubVS:
 3641     case Op_MulVS:
 3642     case Op_LShiftVS:
 3643     case Op_RShiftVS:
 3644     case Op_URShiftVS:
 3645       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3646       if (!VM_Version::supports_avx512bw()) {
 3647         return false;  // Implementation limitation
 3648       }
 3649       return true;
 3650 
 3651     case Op_MulVL:
 3652       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3653       if (!VM_Version::supports_avx512dq()) {
 3654         return false;  // Implementation limitation
 3655       }
 3656       return true;
 3657 
 3658     case Op_AndV:
 3659     case Op_OrV:
 3660     case Op_XorV:
 3661     case Op_RotateRightV:
 3662     case Op_RotateLeftV:
 3663       if (bt != T_INT && bt != T_LONG) {
 3664         return false; // Implementation limitation
 3665       }
 3666       return true;
 3667 
 3668     case Op_VectorLoadMask:
 3669       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3670       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3671         return false;
 3672       }
 3673       return true;
 3674 
 3675     case Op_AddVI:
 3676     case Op_AddVL:
 3677     case Op_AddVF:
 3678     case Op_AddVD:
 3679     case Op_SubVI:
 3680     case Op_SubVL:
 3681     case Op_SubVF:
 3682     case Op_SubVD:
 3683     case Op_MulVI:
 3684     case Op_MulVF:
 3685     case Op_MulVD:
 3686     case Op_DivVF:
 3687     case Op_DivVD:
 3688     case Op_SqrtVF:
 3689     case Op_SqrtVD:
 3690     case Op_LShiftVI:
 3691     case Op_LShiftVL:
 3692     case Op_RShiftVI:
 3693     case Op_RShiftVL:
 3694     case Op_URShiftVI:
 3695     case Op_URShiftVL:
 3696     case Op_LoadVectorMasked:
 3697     case Op_StoreVectorMasked:
 3698     case Op_LoadVectorGatherMasked:
 3699     case Op_StoreVectorScatterMasked:
 3700       return true;
 3701 
 3702     case Op_UMinV:
 3703     case Op_UMaxV:
 3704       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3705         return false;
 3706       } // fallthrough
 3707     case Op_MaxV:
 3708     case Op_MinV:
 3709       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3710         return false; // Implementation limitation
 3711       }
 3712       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3713         return false; // Implementation limitation
 3714       }
 3715       return true;
 3716     case Op_SaturatingAddV:
 3717     case Op_SaturatingSubV:
 3718       if (!is_subword_type(bt)) {
 3719         return false;
 3720       }
 3721       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3722         return false; // Implementation limitation
 3723       }
 3724       return true;
 3725 
 3726     case Op_VectorMaskCmp:
 3727       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3728         return false; // Implementation limitation
 3729       }
 3730       return true;
 3731 
 3732     case Op_VectorRearrange:
 3733       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3734         return false; // Implementation limitation
 3735       }
 3736       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3737         return false; // Implementation limitation
 3738       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3739         return false; // Implementation limitation
 3740       }
 3741       return true;
 3742 
 3743     // Binary Logical operations
 3744     case Op_AndVMask:
 3745     case Op_OrVMask:
 3746     case Op_XorVMask:
 3747       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3748         return false; // Implementation limitation
 3749       }
 3750       return true;
 3751 
 3752     case Op_PopCountVI:
 3753     case Op_PopCountVL:
 3754       if (!is_pop_count_instr_target(bt)) {
 3755         return false;
 3756       }
 3757       return true;
 3758 
 3759     case Op_MaskAll:
 3760       return true;
 3761 
 3762     case Op_CountLeadingZerosV:
 3763       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3764         return true;
 3765       }
 3766     default:
 3767       return false;
 3768   }
 3769 }
 3770 
 3771 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3772   return false;
 3773 }
 3774 
 3775 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3776 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3777   switch (elem_bt) {
 3778     case T_BYTE:  return false;
 3779     case T_SHORT: return !VM_Version::supports_avx512bw();
 3780     case T_INT:   return !VM_Version::supports_avx();
 3781     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3782     default:
 3783       ShouldNotReachHere();
 3784       return false;
 3785   }
 3786 }
 3787 
 3788 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 3789   // Prefer predicate if the mask type is "TypeVectMask".
 3790   return vt->isa_vectmask() != nullptr;
 3791 }
 3792 
 3793 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3794   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3795   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3796   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3797       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3798     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3799     return new legVecZOper();
 3800   }
 3801   if (legacy) {
 3802     switch (ideal_reg) {
 3803       case Op_VecS: return new legVecSOper();
 3804       case Op_VecD: return new legVecDOper();
 3805       case Op_VecX: return new legVecXOper();
 3806       case Op_VecY: return new legVecYOper();
 3807       case Op_VecZ: return new legVecZOper();
 3808     }
 3809   } else {
 3810     switch (ideal_reg) {
 3811       case Op_VecS: return new vecSOper();
 3812       case Op_VecD: return new vecDOper();
 3813       case Op_VecX: return new vecXOper();
 3814       case Op_VecY: return new vecYOper();
 3815       case Op_VecZ: return new vecZOper();
 3816     }
 3817   }
 3818   ShouldNotReachHere();
 3819   return nullptr;
 3820 }
 3821 
 3822 bool Matcher::is_reg2reg_move(MachNode* m) {
 3823   switch (m->rule()) {
 3824     case MoveVec2Leg_rule:
 3825     case MoveLeg2Vec_rule:
 3826     case MoveF2VL_rule:
 3827     case MoveF2LEG_rule:
 3828     case MoveVL2F_rule:
 3829     case MoveLEG2F_rule:
 3830     case MoveD2VL_rule:
 3831     case MoveD2LEG_rule:
 3832     case MoveVL2D_rule:
 3833     case MoveLEG2D_rule:
 3834       return true;
 3835     default:
 3836       return false;
 3837   }
 3838 }
 3839 
 3840 bool Matcher::is_generic_vector(MachOper* opnd) {
 3841   switch (opnd->opcode()) {
 3842     case VEC:
 3843     case LEGVEC:
 3844       return true;
 3845     default:
 3846       return false;
 3847   }
 3848 }
 3849 
 3850 //------------------------------------------------------------------------
 3851 
 3852 const RegMask* Matcher::predicate_reg_mask(void) {
 3853   return &_VECTMASK_REG_mask;
 3854 }
 3855 
 3856 // Max vector size in bytes. 0 if not supported.
 3857 int Matcher::vector_width_in_bytes(BasicType bt) {
 3858   assert(is_java_primitive(bt), "only primitive type vectors");
 3859   // SSE2 supports 128bit vectors for all types.
 3860   // AVX2 supports 256bit vectors for all types.
 3861   // AVX2/EVEX supports 512bit vectors for all types.
 3862   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3863   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3864   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3865     size = (UseAVX > 2) ? 64 : 32;
 3866   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3867     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3868   // Use flag to limit vector size.
 3869   size = MIN2(size,(int)MaxVectorSize);
 3870   // Minimum 2 values in vector (or 4 for bytes).
 3871   switch (bt) {
 3872   case T_DOUBLE:
 3873   case T_LONG:
 3874     if (size < 16) return 0;
 3875     break;
 3876   case T_FLOAT:
 3877   case T_INT:
 3878     if (size < 8) return 0;
 3879     break;
 3880   case T_BOOLEAN:
 3881     if (size < 4) return 0;
 3882     break;
 3883   case T_CHAR:
 3884     if (size < 4) return 0;
 3885     break;
 3886   case T_BYTE:
 3887     if (size < 4) return 0;
 3888     break;
 3889   case T_SHORT:
 3890     if (size < 4) return 0;
 3891     break;
 3892   default:
 3893     ShouldNotReachHere();
 3894   }
 3895   return size;
 3896 }
 3897 
 3898 // Limits on vector size (number of elements) loaded into vector.
 3899 int Matcher::max_vector_size(const BasicType bt) {
 3900   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3901 }
 3902 int Matcher::min_vector_size(const BasicType bt) {
 3903   int max_size = max_vector_size(bt);
 3904   // Min size which can be loaded into vector is 4 bytes.
 3905   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3906   // Support for calling svml double64 vectors
 3907   if (bt == T_DOUBLE) {
 3908     size = 1;
 3909   }
 3910   return MIN2(size,max_size);
 3911 }
 3912 
 3913 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3914   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3915   // by default on Cascade Lake
 3916   if (VM_Version::is_default_intel_cascade_lake()) {
 3917     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3918   }
 3919   return Matcher::max_vector_size(bt);
 3920 }
 3921 
 3922 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3923   return -1;
 3924 }
 3925 
 3926 // Vector ideal reg corresponding to specified size in bytes
 3927 uint Matcher::vector_ideal_reg(int size) {
 3928   assert(MaxVectorSize >= size, "");
 3929   switch(size) {
 3930     case  4: return Op_VecS;
 3931     case  8: return Op_VecD;
 3932     case 16: return Op_VecX;
 3933     case 32: return Op_VecY;
 3934     case 64: return Op_VecZ;
 3935   }
 3936   ShouldNotReachHere();
 3937   return 0;
 3938 }
 3939 
 3940 // Check for shift by small constant as well
 3941 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3942   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3943       shift->in(2)->get_int() <= 3 &&
 3944       // Are there other uses besides address expressions?
 3945       !matcher->is_visited(shift)) {
 3946     address_visited.set(shift->_idx); // Flag as address_visited
 3947     mstack.push(shift->in(2), Matcher::Visit);
 3948     Node *conv = shift->in(1);
 3949     // Allow Matcher to match the rule which bypass
 3950     // ConvI2L operation for an array index on LP64
 3951     // if the index value is positive.
 3952     if (conv->Opcode() == Op_ConvI2L &&
 3953         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3954         // Are there other uses besides address expressions?
 3955         !matcher->is_visited(conv)) {
 3956       address_visited.set(conv->_idx); // Flag as address_visited
 3957       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3958     } else {
 3959       mstack.push(conv, Matcher::Pre_Visit);
 3960     }
 3961     return true;
 3962   }
 3963   return false;
 3964 }
 3965 
 3966 // This function identifies sub-graphs in which a 'load' node is
 3967 // input to two different nodes, and such that it can be matched
 3968 // with BMI instructions like blsi, blsr, etc.
 3969 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3970 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3971 // refers to the same node.
 3972 //
 3973 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3974 // This is a temporary solution until we make DAGs expressible in ADL.
 3975 template<typename ConType>
 3976 class FusedPatternMatcher {
 3977   Node* _op1_node;
 3978   Node* _mop_node;
 3979   int _con_op;
 3980 
 3981   static int match_next(Node* n, int next_op, int next_op_idx) {
 3982     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3983       return -1;
 3984     }
 3985 
 3986     if (next_op_idx == -1) { // n is commutative, try rotations
 3987       if (n->in(1)->Opcode() == next_op) {
 3988         return 1;
 3989       } else if (n->in(2)->Opcode() == next_op) {
 3990         return 2;
 3991       }
 3992     } else {
 3993       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 3994       if (n->in(next_op_idx)->Opcode() == next_op) {
 3995         return next_op_idx;
 3996       }
 3997     }
 3998     return -1;
 3999   }
 4000 
 4001  public:
 4002   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 4003     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 4004 
 4005   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 4006              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 4007              typename ConType::NativeType con_value) {
 4008     if (_op1_node->Opcode() != op1) {
 4009       return false;
 4010     }
 4011     if (_mop_node->outcnt() > 2) {
 4012       return false;
 4013     }
 4014     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 4015     if (op1_op2_idx == -1) {
 4016       return false;
 4017     }
 4018     // Memory operation must be the other edge
 4019     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 4020 
 4021     // Check that the mop node is really what we want
 4022     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 4023       Node* op2_node = _op1_node->in(op1_op2_idx);
 4024       if (op2_node->outcnt() > 1) {
 4025         return false;
 4026       }
 4027       assert(op2_node->Opcode() == op2, "Should be");
 4028       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 4029       if (op2_con_idx == -1) {
 4030         return false;
 4031       }
 4032       // Memory operation must be the other edge
 4033       int op2_mop_idx = (op2_con_idx & 1) + 1;
 4034       // Check that the memory operation is the same node
 4035       if (op2_node->in(op2_mop_idx) == _mop_node) {
 4036         // Now check the constant
 4037         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 4038         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 4039           return true;
 4040         }
 4041       }
 4042     }
 4043     return false;
 4044   }
 4045 };
 4046 
 4047 static bool is_bmi_pattern(Node* n, Node* m) {
 4048   assert(UseBMI1Instructions, "sanity");
 4049   if (n != nullptr && m != nullptr) {
 4050     if (m->Opcode() == Op_LoadI) {
 4051       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 4052       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 4053              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 4054              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 4055     } else if (m->Opcode() == Op_LoadL) {
 4056       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 4057       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 4058              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 4059              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 4060     }
 4061   }
 4062   return false;
 4063 }
 4064 
 4065 // Should the matcher clone input 'm' of node 'n'?
 4066 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 4067   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 4068   if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
 4069     mstack.push(m, Visit);
 4070     return true;
 4071   }
 4072   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 4073     mstack.push(m, Visit);           // m = ShiftCntV
 4074     return true;
 4075   }
 4076   if (is_encode_and_store_pattern(n, m)) {
 4077     mstack.push(m, Visit);
 4078     return true;
 4079   }
 4080   return false;
 4081 }
 4082 
 4083 // Should the Matcher clone shifts on addressing modes, expecting them
 4084 // to be subsumed into complex addressing expressions or compute them
 4085 // into registers?
 4086 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4087   Node *off = m->in(AddPNode::Offset);
 4088   if (off->is_Con()) {
 4089     address_visited.test_set(m->_idx); // Flag as address_visited
 4090     Node *adr = m->in(AddPNode::Address);
 4091 
 4092     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4093     // AtomicAdd is not an addressing expression.
 4094     // Cheap to find it by looking for screwy base.
 4095     if (adr->is_AddP() &&
 4096         !adr->in(AddPNode::Base)->is_top() &&
 4097         !adr->in(AddPNode::Offset)->is_Con() &&
 4098         off->get_long() == (int) (off->get_long()) && // immL32
 4099         // Are there other uses besides address expressions?
 4100         !is_visited(adr)) {
 4101       address_visited.set(adr->_idx); // Flag as address_visited
 4102       Node *shift = adr->in(AddPNode::Offset);
 4103       if (!clone_shift(shift, this, mstack, address_visited)) {
 4104         mstack.push(shift, Pre_Visit);
 4105       }
 4106       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4107       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4108     } else {
 4109       mstack.push(adr, Pre_Visit);
 4110     }
 4111 
 4112     // Clone X+offset as it also folds into most addressing expressions
 4113     mstack.push(off, Visit);
 4114     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4115     return true;
 4116   } else if (clone_shift(off, this, mstack, address_visited)) {
 4117     address_visited.test_set(m->_idx); // Flag as address_visited
 4118     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4119     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4120     return true;
 4121   }
 4122   return false;
 4123 }
 4124 
 4125 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4126   switch (bt) {
 4127     case BoolTest::eq:
 4128       return Assembler::eq;
 4129     case BoolTest::ne:
 4130       return Assembler::neq;
 4131     case BoolTest::le:
 4132     case BoolTest::ule:
 4133       return Assembler::le;
 4134     case BoolTest::ge:
 4135     case BoolTest::uge:
 4136       return Assembler::nlt;
 4137     case BoolTest::lt:
 4138     case BoolTest::ult:
 4139       return Assembler::lt;
 4140     case BoolTest::gt:
 4141     case BoolTest::ugt:
 4142       return Assembler::nle;
 4143     default : ShouldNotReachHere(); return Assembler::_false;
 4144   }
 4145 }
 4146 
 4147 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4148   switch (bt) {
 4149   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4150   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4151   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4152   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4153   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4154   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4155   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4156   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4157   }
 4158 }
 4159 
 4160 // Helper methods for MachSpillCopyNode::implementation().
 4161 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4162                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4163   assert(ireg == Op_VecS || // 32bit vector
 4164          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4165           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4166          "no non-adjacent vector moves" );
 4167   if (masm) {
 4168     switch (ireg) {
 4169     case Op_VecS: // copy whole register
 4170     case Op_VecD:
 4171     case Op_VecX:
 4172       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4173         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4174       } else {
 4175         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4176      }
 4177       break;
 4178     case Op_VecY:
 4179       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4180         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4181       } else {
 4182         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4183      }
 4184       break;
 4185     case Op_VecZ:
 4186       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4187       break;
 4188     default:
 4189       ShouldNotReachHere();
 4190     }
 4191 #ifndef PRODUCT
 4192   } else {
 4193     switch (ireg) {
 4194     case Op_VecS:
 4195     case Op_VecD:
 4196     case Op_VecX:
 4197       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4198       break;
 4199     case Op_VecY:
 4200     case Op_VecZ:
 4201       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4202       break;
 4203     default:
 4204       ShouldNotReachHere();
 4205     }
 4206 #endif
 4207   }
 4208 }
 4209 
 4210 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4211                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4212   if (masm) {
 4213     if (is_load) {
 4214       switch (ireg) {
 4215       case Op_VecS:
 4216         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4217         break;
 4218       case Op_VecD:
 4219         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4220         break;
 4221       case Op_VecX:
 4222         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4223           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4224         } else {
 4225           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4226           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4227         }
 4228         break;
 4229       case Op_VecY:
 4230         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4231           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4232         } else {
 4233           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4234           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4235         }
 4236         break;
 4237       case Op_VecZ:
 4238         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4239         break;
 4240       default:
 4241         ShouldNotReachHere();
 4242       }
 4243     } else { // store
 4244       switch (ireg) {
 4245       case Op_VecS:
 4246         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4247         break;
 4248       case Op_VecD:
 4249         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4250         break;
 4251       case Op_VecX:
 4252         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4253           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4254         }
 4255         else {
 4256           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4257         }
 4258         break;
 4259       case Op_VecY:
 4260         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4261           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4262         }
 4263         else {
 4264           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4265         }
 4266         break;
 4267       case Op_VecZ:
 4268         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4269         break;
 4270       default:
 4271         ShouldNotReachHere();
 4272       }
 4273     }
 4274 #ifndef PRODUCT
 4275   } else {
 4276     if (is_load) {
 4277       switch (ireg) {
 4278       case Op_VecS:
 4279         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4280         break;
 4281       case Op_VecD:
 4282         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4283         break;
 4284        case Op_VecX:
 4285         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4286         break;
 4287       case Op_VecY:
 4288       case Op_VecZ:
 4289         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4290         break;
 4291       default:
 4292         ShouldNotReachHere();
 4293       }
 4294     } else { // store
 4295       switch (ireg) {
 4296       case Op_VecS:
 4297         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4298         break;
 4299       case Op_VecD:
 4300         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4301         break;
 4302        case Op_VecX:
 4303         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4304         break;
 4305       case Op_VecY:
 4306       case Op_VecZ:
 4307         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4308         break;
 4309       default:
 4310         ShouldNotReachHere();
 4311       }
 4312     }
 4313 #endif
 4314   }
 4315 }
 4316 
 4317 template <class T>
 4318 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4319   int size = type2aelembytes(bt) * len;
 4320   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4321   for (int i = 0; i < len; i++) {
 4322     int offset = i * type2aelembytes(bt);
 4323     switch (bt) {
 4324       case T_BYTE: val->at(i) = con; break;
 4325       case T_SHORT: {
 4326         jshort c = con;
 4327         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4328         break;
 4329       }
 4330       case T_INT: {
 4331         jint c = con;
 4332         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4333         break;
 4334       }
 4335       case T_LONG: {
 4336         jlong c = con;
 4337         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4338         break;
 4339       }
 4340       case T_FLOAT: {
 4341         jfloat c = con;
 4342         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4343         break;
 4344       }
 4345       case T_DOUBLE: {
 4346         jdouble c = con;
 4347         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4348         break;
 4349       }
 4350       default: assert(false, "%s", type2name(bt));
 4351     }
 4352   }
 4353   return val;
 4354 }
 4355 
 4356 static inline jlong high_bit_set(BasicType bt) {
 4357   switch (bt) {
 4358     case T_BYTE:  return 0x8080808080808080;
 4359     case T_SHORT: return 0x8000800080008000;
 4360     case T_INT:   return 0x8000000080000000;
 4361     case T_LONG:  return 0x8000000000000000;
 4362     default:
 4363       ShouldNotReachHere();
 4364       return 0;
 4365   }
 4366 }
 4367 
 4368 #ifndef PRODUCT
 4369   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4370     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4371   }
 4372 #endif
 4373 
 4374   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4375     __ nop(_count);
 4376   }
 4377 
 4378   uint MachNopNode::size(PhaseRegAlloc*) const {
 4379     return _count;
 4380   }
 4381 
 4382 #ifndef PRODUCT
 4383   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4384     st->print("# breakpoint");
 4385   }
 4386 #endif
 4387 
 4388   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4389     __ int3();
 4390   }
 4391 
 4392   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4393     return MachNode::size(ra_);
 4394   }
 4395 
 4396 %}
 4397 
 4398 //----------ENCODING BLOCK-----------------------------------------------------
 4399 // This block specifies the encoding classes used by the compiler to
 4400 // output byte streams.  Encoding classes are parameterized macros
 4401 // used by Machine Instruction Nodes in order to generate the bit
 4402 // encoding of the instruction.  Operands specify their base encoding
 4403 // interface with the interface keyword.  There are currently
 4404 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4405 // COND_INTER.  REG_INTER causes an operand to generate a function
 4406 // which returns its register number when queried.  CONST_INTER causes
 4407 // an operand to generate a function which returns the value of the
 4408 // constant when queried.  MEMORY_INTER causes an operand to generate
 4409 // four functions which return the Base Register, the Index Register,
 4410 // the Scale Value, and the Offset Value of the operand when queried.
 4411 // COND_INTER causes an operand to generate six functions which return
 4412 // the encoding code (ie - encoding bits for the instruction)
 4413 // associated with each basic boolean condition for a conditional
 4414 // instruction.
 4415 //
 4416 // Instructions specify two basic values for encoding.  Again, a
 4417 // function is available to check if the constant displacement is an
 4418 // oop. They use the ins_encode keyword to specify their encoding
 4419 // classes (which must be a sequence of enc_class names, and their
 4420 // parameters, specified in the encoding block), and they use the
 4421 // opcode keyword to specify, in order, their primary, secondary, and
 4422 // tertiary opcode.  Only the opcode sections which a particular
 4423 // instruction needs for encoding need to be specified.
 4424 encode %{
 4425   enc_class cdql_enc(no_rax_rdx_RegI div)
 4426   %{
 4427     // Full implementation of Java idiv and irem; checks for
 4428     // special case as described in JVM spec., p.243 & p.271.
 4429     //
 4430     //         normal case                           special case
 4431     //
 4432     // input : rax: dividend                         min_int
 4433     //         reg: divisor                          -1
 4434     //
 4435     // output: rax: quotient  (= rax idiv reg)       min_int
 4436     //         rdx: remainder (= rax irem reg)       0
 4437     //
 4438     //  Code sequnce:
 4439     //
 4440     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4441     //    5:   75 07/08                jne    e <normal>
 4442     //    7:   33 d2                   xor    %edx,%edx
 4443     //  [div >= 8 -> offset + 1]
 4444     //  [REX_B]
 4445     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4446     //    c:   74 03/04                je     11 <done>
 4447     // 000000000000000e <normal>:
 4448     //    e:   99                      cltd
 4449     //  [div >= 8 -> offset + 1]
 4450     //  [REX_B]
 4451     //    f:   f7 f9                   idiv   $div
 4452     // 0000000000000011 <done>:
 4453     Label normal;
 4454     Label done;
 4455 
 4456     // cmp    $0x80000000,%eax
 4457     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4458 
 4459     // jne    e <normal>
 4460     __ jccb(Assembler::notEqual, normal);
 4461 
 4462     // xor    %edx,%edx
 4463     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4464 
 4465     // cmp    $0xffffffffffffffff,%ecx
 4466     __ cmpl($div$$Register, -1);
 4467 
 4468     // je     11 <done>
 4469     __ jccb(Assembler::equal, done);
 4470 
 4471     // <normal>
 4472     // cltd
 4473     __ bind(normal);
 4474     __ cdql();
 4475 
 4476     // idivl
 4477     // <done>
 4478     __ idivl($div$$Register);
 4479     __ bind(done);
 4480   %}
 4481 
 4482   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4483   %{
 4484     // Full implementation of Java ldiv and lrem; checks for
 4485     // special case as described in JVM spec., p.243 & p.271.
 4486     //
 4487     //         normal case                           special case
 4488     //
 4489     // input : rax: dividend                         min_long
 4490     //         reg: divisor                          -1
 4491     //
 4492     // output: rax: quotient  (= rax idiv reg)       min_long
 4493     //         rdx: remainder (= rax irem reg)       0
 4494     //
 4495     //  Code sequnce:
 4496     //
 4497     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4498     //    7:   00 00 80
 4499     //    a:   48 39 d0                cmp    %rdx,%rax
 4500     //    d:   75 08                   jne    17 <normal>
 4501     //    f:   33 d2                   xor    %edx,%edx
 4502     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4503     //   15:   74 05                   je     1c <done>
 4504     // 0000000000000017 <normal>:
 4505     //   17:   48 99                   cqto
 4506     //   19:   48 f7 f9                idiv   $div
 4507     // 000000000000001c <done>:
 4508     Label normal;
 4509     Label done;
 4510 
 4511     // mov    $0x8000000000000000,%rdx
 4512     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4513 
 4514     // cmp    %rdx,%rax
 4515     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4516 
 4517     // jne    17 <normal>
 4518     __ jccb(Assembler::notEqual, normal);
 4519 
 4520     // xor    %edx,%edx
 4521     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4522 
 4523     // cmp    $0xffffffffffffffff,$div
 4524     __ cmpq($div$$Register, -1);
 4525 
 4526     // je     1e <done>
 4527     __ jccb(Assembler::equal, done);
 4528 
 4529     // <normal>
 4530     // cqto
 4531     __ bind(normal);
 4532     __ cdqq();
 4533 
 4534     // idivq (note: must be emitted by the user of this rule)
 4535     // <done>
 4536     __ idivq($div$$Register);
 4537     __ bind(done);
 4538   %}
 4539 
 4540   enc_class clear_avx %{
 4541     DEBUG_ONLY(int off0 = __ offset());
 4542     if (generate_vzeroupper(Compile::current())) {
 4543       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4544       // Clear upper bits of YMM registers when current compiled code uses
 4545       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4546       __ vzeroupper();
 4547     }
 4548     DEBUG_ONLY(int off1 = __ offset());
 4549     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4550   %}
 4551 
 4552   enc_class Java_To_Runtime(method meth) %{
 4553     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4554     __ call(r10);
 4555     __ post_call_nop();
 4556   %}
 4557 
 4558   enc_class Java_Static_Call(method meth)
 4559   %{
 4560     // JAVA STATIC CALL
 4561     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4562     // determine who we intended to call.
 4563     if (!_method) {
 4564       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4565     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4566       // The NOP here is purely to ensure that eliding a call to
 4567       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4568       __ addr_nop_5();
 4569       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4570     } else {
 4571       int method_index = resolved_method_index(masm);
 4572       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4573                                                   : static_call_Relocation::spec(method_index);
 4574       address mark = __ pc();
 4575       int call_offset = __ offset();
 4576       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4577       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4578         // Calls of the same statically bound method can share
 4579         // a stub to the interpreter.
 4580         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4581       } else {
 4582         // Emit stubs for static call.
 4583         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4584         __ clear_inst_mark();
 4585         if (stub == nullptr) {
 4586           ciEnv::current()->record_failure("CodeCache is full");
 4587           return;
 4588         }
 4589       }
 4590     }
 4591     __ post_call_nop();
 4592   %}
 4593 
 4594   enc_class Java_Dynamic_Call(method meth) %{
 4595     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4596     __ post_call_nop();
 4597   %}
 4598 
 4599   enc_class call_epilog %{
 4600     if (VerifyStackAtCalls) {
 4601       // Check that stack depth is unchanged: find majik cookie on stack
 4602       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4603       Label L;
 4604       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4605       __ jccb(Assembler::equal, L);
 4606       // Die if stack mismatch
 4607       __ int3();
 4608       __ bind(L);
 4609     }
 4610   %}
 4611 
 4612 %}
 4613 
 4614 //----------FRAME--------------------------------------------------------------
 4615 // Definition of frame structure and management information.
 4616 //
 4617 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4618 //                             |   (to get allocators register number
 4619 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4620 //  r   CALLER     |        |
 4621 //  o     |        +--------+      pad to even-align allocators stack-slot
 4622 //  w     V        |  pad0  |        numbers; owned by CALLER
 4623 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4624 //  h     ^        |   in   |  5
 4625 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4626 //  |     |        |        |  3
 4627 //  |     |        +--------+
 4628 //  V     |        | old out|      Empty on Intel, window on Sparc
 4629 //        |    old |preserve|      Must be even aligned.
 4630 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4631 //        |        |   in   |  3   area for Intel ret address
 4632 //     Owned by    |preserve|      Empty on Sparc.
 4633 //       SELF      +--------+
 4634 //        |        |  pad2  |  2   pad to align old SP
 4635 //        |        +--------+  1
 4636 //        |        | locks  |  0
 4637 //        |        +--------+----> OptoReg::stack0(), even aligned
 4638 //        |        |  pad1  | 11   pad to align new SP
 4639 //        |        +--------+
 4640 //        |        |        | 10
 4641 //        |        | spills |  9   spills
 4642 //        V        |        |  8   (pad0 slot for callee)
 4643 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4644 //        ^        |  out   |  7
 4645 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4646 //     Owned by    +--------+
 4647 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4648 //        |    new |preserve|      Must be even-aligned.
 4649 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4650 //        |        |        |
 4651 //
 4652 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4653 //         known from SELF's arguments and the Java calling convention.
 4654 //         Region 6-7 is determined per call site.
 4655 // Note 2: If the calling convention leaves holes in the incoming argument
 4656 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4657 //         are owned by the CALLEE.  Holes should not be necessary in the
 4658 //         incoming area, as the Java calling convention is completely under
 4659 //         the control of the AD file.  Doubles can be sorted and packed to
 4660 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4661 //         varargs C calling conventions.
 4662 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4663 //         even aligned with pad0 as needed.
 4664 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4665 //         region 6-11 is even aligned; it may be padded out more so that
 4666 //         the region from SP to FP meets the minimum stack alignment.
 4667 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4668 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4669 //         SP meets the minimum alignment.
 4670 
 4671 frame
 4672 %{
 4673   // These three registers define part of the calling convention
 4674   // between compiled code and the interpreter.
 4675   inline_cache_reg(RAX);                // Inline Cache Register
 4676 
 4677   // Optional: name the operand used by cisc-spilling to access
 4678   // [stack_pointer + offset]
 4679   cisc_spilling_operand_name(indOffset32);
 4680 
 4681   // Number of stack slots consumed by locking an object
 4682   sync_stack_slots(2);
 4683 
 4684   // Compiled code's Frame Pointer
 4685   frame_pointer(RSP);
 4686 
 4687   // Stack alignment requirement
 4688   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4689 
 4690   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4691   // for calls to C.  Supports the var-args backing area for register parms.
 4692   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4693 
 4694   // The after-PROLOG location of the return address.  Location of
 4695   // return address specifies a type (REG or STACK) and a number
 4696   // representing the register number (i.e. - use a register name) or
 4697   // stack slot.
 4698   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4699   // Otherwise, it is above the locks and verification slot and alignment word
 4700   return_addr(STACK - 2 +
 4701               align_up((Compile::current()->in_preserve_stack_slots() +
 4702                         Compile::current()->fixed_slots()),
 4703                        stack_alignment_in_slots()));
 4704 
 4705   // Location of compiled Java return values.  Same as C for now.
 4706   return_value
 4707   %{
 4708     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4709            "only return normal values");
 4710 
 4711     static const int lo[Op_RegL + 1] = {
 4712       0,
 4713       0,
 4714       RAX_num,  // Op_RegN
 4715       RAX_num,  // Op_RegI
 4716       RAX_num,  // Op_RegP
 4717       XMM0_num, // Op_RegF
 4718       XMM0_num, // Op_RegD
 4719       RAX_num   // Op_RegL
 4720     };
 4721     static const int hi[Op_RegL + 1] = {
 4722       0,
 4723       0,
 4724       OptoReg::Bad, // Op_RegN
 4725       OptoReg::Bad, // Op_RegI
 4726       RAX_H_num,    // Op_RegP
 4727       OptoReg::Bad, // Op_RegF
 4728       XMM0b_num,    // Op_RegD
 4729       RAX_H_num     // Op_RegL
 4730     };
 4731     // Excluded flags and vector registers.
 4732     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4733     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4734   %}
 4735 %}
 4736 
 4737 //----------ATTRIBUTES---------------------------------------------------------
 4738 //----------Operand Attributes-------------------------------------------------
 4739 op_attrib op_cost(0);        // Required cost attribute
 4740 
 4741 //----------Instruction Attributes---------------------------------------------
 4742 ins_attrib ins_cost(100);       // Required cost attribute
 4743 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4744 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4745                                 // a non-matching short branch variant
 4746                                 // of some long branch?
 4747 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4748                                 // be a power of 2) specifies the
 4749                                 // alignment that some part of the
 4750                                 // instruction (not necessarily the
 4751                                 // start) requires.  If > 1, a
 4752                                 // compute_padding() function must be
 4753                                 // provided for the instruction
 4754 
 4755 // Whether this node is expanded during code emission into a sequence of
 4756 // instructions and the first instruction can perform an implicit null check.
 4757 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4758 
 4759 //----------OPERANDS-----------------------------------------------------------
 4760 // Operand definitions must precede instruction definitions for correct parsing
 4761 // in the ADLC because operands constitute user defined types which are used in
 4762 // instruction definitions.
 4763 
 4764 //----------Simple Operands----------------------------------------------------
 4765 // Immediate Operands
 4766 // Integer Immediate
 4767 operand immI()
 4768 %{
 4769   match(ConI);
 4770 
 4771   op_cost(10);
 4772   format %{ %}
 4773   interface(CONST_INTER);
 4774 %}
 4775 
 4776 // Constant for test vs zero
 4777 operand immI_0()
 4778 %{
 4779   predicate(n->get_int() == 0);
 4780   match(ConI);
 4781 
 4782   op_cost(0);
 4783   format %{ %}
 4784   interface(CONST_INTER);
 4785 %}
 4786 
 4787 // Constant for increment
 4788 operand immI_1()
 4789 %{
 4790   predicate(n->get_int() == 1);
 4791   match(ConI);
 4792 
 4793   op_cost(0);
 4794   format %{ %}
 4795   interface(CONST_INTER);
 4796 %}
 4797 
 4798 // Constant for decrement
 4799 operand immI_M1()
 4800 %{
 4801   predicate(n->get_int() == -1);
 4802   match(ConI);
 4803 
 4804   op_cost(0);
 4805   format %{ %}
 4806   interface(CONST_INTER);
 4807 %}
 4808 
 4809 operand immI_2()
 4810 %{
 4811   predicate(n->get_int() == 2);
 4812   match(ConI);
 4813 
 4814   op_cost(0);
 4815   format %{ %}
 4816   interface(CONST_INTER);
 4817 %}
 4818 
 4819 operand immI_4()
 4820 %{
 4821   predicate(n->get_int() == 4);
 4822   match(ConI);
 4823 
 4824   op_cost(0);
 4825   format %{ %}
 4826   interface(CONST_INTER);
 4827 %}
 4828 
 4829 operand immI_8()
 4830 %{
 4831   predicate(n->get_int() == 8);
 4832   match(ConI);
 4833 
 4834   op_cost(0);
 4835   format %{ %}
 4836   interface(CONST_INTER);
 4837 %}
 4838 
 4839 // Valid scale values for addressing modes
 4840 operand immI2()
 4841 %{
 4842   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4843   match(ConI);
 4844 
 4845   format %{ %}
 4846   interface(CONST_INTER);
 4847 %}
 4848 
 4849 operand immU7()
 4850 %{
 4851   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4852   match(ConI);
 4853 
 4854   op_cost(5);
 4855   format %{ %}
 4856   interface(CONST_INTER);
 4857 %}
 4858 
 4859 operand immI8()
 4860 %{
 4861   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4862   match(ConI);
 4863 
 4864   op_cost(5);
 4865   format %{ %}
 4866   interface(CONST_INTER);
 4867 %}
 4868 
 4869 operand immU8()
 4870 %{
 4871   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4872   match(ConI);
 4873 
 4874   op_cost(5);
 4875   format %{ %}
 4876   interface(CONST_INTER);
 4877 %}
 4878 
 4879 operand immI16()
 4880 %{
 4881   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4882   match(ConI);
 4883 
 4884   op_cost(10);
 4885   format %{ %}
 4886   interface(CONST_INTER);
 4887 %}
 4888 
 4889 // Int Immediate non-negative
 4890 operand immU31()
 4891 %{
 4892   predicate(n->get_int() >= 0);
 4893   match(ConI);
 4894 
 4895   op_cost(0);
 4896   format %{ %}
 4897   interface(CONST_INTER);
 4898 %}
 4899 
 4900 // Pointer Immediate
 4901 operand immP()
 4902 %{
 4903   match(ConP);
 4904 
 4905   op_cost(10);
 4906   format %{ %}
 4907   interface(CONST_INTER);
 4908 %}
 4909 
 4910 // Null Pointer Immediate
 4911 operand immP0()
 4912 %{
 4913   predicate(n->get_ptr() == 0);
 4914   match(ConP);
 4915 
 4916   op_cost(5);
 4917   format %{ %}
 4918   interface(CONST_INTER);
 4919 %}
 4920 
 4921 // Pointer Immediate
 4922 operand immN() %{
 4923   match(ConN);
 4924 
 4925   op_cost(10);
 4926   format %{ %}
 4927   interface(CONST_INTER);
 4928 %}
 4929 
 4930 operand immNKlass() %{
 4931   match(ConNKlass);
 4932 
 4933   op_cost(10);
 4934   format %{ %}
 4935   interface(CONST_INTER);
 4936 %}
 4937 
 4938 // Null Pointer Immediate
 4939 operand immN0() %{
 4940   predicate(n->get_narrowcon() == 0);
 4941   match(ConN);
 4942 
 4943   op_cost(5);
 4944   format %{ %}
 4945   interface(CONST_INTER);
 4946 %}
 4947 
 4948 operand immP31()
 4949 %{
 4950   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 4951             && (n->get_ptr() >> 31) == 0);
 4952   match(ConP);
 4953 
 4954   op_cost(5);
 4955   format %{ %}
 4956   interface(CONST_INTER);
 4957 %}
 4958 
 4959 
 4960 // Long Immediate
 4961 operand immL()
 4962 %{
 4963   match(ConL);
 4964 
 4965   op_cost(20);
 4966   format %{ %}
 4967   interface(CONST_INTER);
 4968 %}
 4969 
 4970 // Long Immediate 8-bit
 4971 operand immL8()
 4972 %{
 4973   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 4974   match(ConL);
 4975 
 4976   op_cost(5);
 4977   format %{ %}
 4978   interface(CONST_INTER);
 4979 %}
 4980 
 4981 // Long Immediate 32-bit unsigned
 4982 operand immUL32()
 4983 %{
 4984   predicate(n->get_long() == (unsigned int) (n->get_long()));
 4985   match(ConL);
 4986 
 4987   op_cost(10);
 4988   format %{ %}
 4989   interface(CONST_INTER);
 4990 %}
 4991 
 4992 // Long Immediate 32-bit signed
 4993 operand immL32()
 4994 %{
 4995   predicate(n->get_long() == (int) (n->get_long()));
 4996   match(ConL);
 4997 
 4998   op_cost(15);
 4999   format %{ %}
 5000   interface(CONST_INTER);
 5001 %}
 5002 
 5003 operand immL_Pow2()
 5004 %{
 5005   predicate(is_power_of_2((julong)n->get_long()));
 5006   match(ConL);
 5007 
 5008   op_cost(15);
 5009   format %{ %}
 5010   interface(CONST_INTER);
 5011 %}
 5012 
 5013 operand immL_NotPow2()
 5014 %{
 5015   predicate(is_power_of_2((julong)~n->get_long()));
 5016   match(ConL);
 5017 
 5018   op_cost(15);
 5019   format %{ %}
 5020   interface(CONST_INTER);
 5021 %}
 5022 
 5023 // Long Immediate zero
 5024 operand immL0()
 5025 %{
 5026   predicate(n->get_long() == 0L);
 5027   match(ConL);
 5028 
 5029   op_cost(10);
 5030   format %{ %}
 5031   interface(CONST_INTER);
 5032 %}
 5033 
 5034 // Constant for increment
 5035 operand immL1()
 5036 %{
 5037   predicate(n->get_long() == 1);
 5038   match(ConL);
 5039 
 5040   format %{ %}
 5041   interface(CONST_INTER);
 5042 %}
 5043 
 5044 // Constant for decrement
 5045 operand immL_M1()
 5046 %{
 5047   predicate(n->get_long() == -1);
 5048   match(ConL);
 5049 
 5050   format %{ %}
 5051   interface(CONST_INTER);
 5052 %}
 5053 
 5054 // Long Immediate: low 32-bit mask
 5055 operand immL_32bits()
 5056 %{
 5057   predicate(n->get_long() == 0xFFFFFFFFL);
 5058   match(ConL);
 5059   op_cost(20);
 5060 
 5061   format %{ %}
 5062   interface(CONST_INTER);
 5063 %}
 5064 
 5065 // Int Immediate: 2^n-1, positive
 5066 operand immI_Pow2M1()
 5067 %{
 5068   predicate((n->get_int() > 0)
 5069             && is_power_of_2((juint)n->get_int() + 1));
 5070   match(ConI);
 5071 
 5072   op_cost(20);
 5073   format %{ %}
 5074   interface(CONST_INTER);
 5075 %}
 5076 
 5077 // Float Immediate zero
 5078 operand immF0()
 5079 %{
 5080   predicate(jint_cast(n->getf()) == 0);
 5081   match(ConF);
 5082 
 5083   op_cost(5);
 5084   format %{ %}
 5085   interface(CONST_INTER);
 5086 %}
 5087 
 5088 // Float Immediate
 5089 operand immF()
 5090 %{
 5091   match(ConF);
 5092 
 5093   op_cost(15);
 5094   format %{ %}
 5095   interface(CONST_INTER);
 5096 %}
 5097 
 5098 // Half Float Immediate
 5099 operand immH()
 5100 %{
 5101   match(ConH);
 5102 
 5103   op_cost(15);
 5104   format %{ %}
 5105   interface(CONST_INTER);
 5106 %}
 5107 
 5108 // Double Immediate zero
 5109 operand immD0()
 5110 %{
 5111   predicate(jlong_cast(n->getd()) == 0);
 5112   match(ConD);
 5113 
 5114   op_cost(5);
 5115   format %{ %}
 5116   interface(CONST_INTER);
 5117 %}
 5118 
 5119 // Double Immediate
 5120 operand immD()
 5121 %{
 5122   match(ConD);
 5123 
 5124   op_cost(15);
 5125   format %{ %}
 5126   interface(CONST_INTER);
 5127 %}
 5128 
 5129 // Immediates for special shifts (sign extend)
 5130 
 5131 // Constants for increment
 5132 operand immI_16()
 5133 %{
 5134   predicate(n->get_int() == 16);
 5135   match(ConI);
 5136 
 5137   format %{ %}
 5138   interface(CONST_INTER);
 5139 %}
 5140 
 5141 operand immI_24()
 5142 %{
 5143   predicate(n->get_int() == 24);
 5144   match(ConI);
 5145 
 5146   format %{ %}
 5147   interface(CONST_INTER);
 5148 %}
 5149 
 5150 // Constant for byte-wide masking
 5151 operand immI_255()
 5152 %{
 5153   predicate(n->get_int() == 255);
 5154   match(ConI);
 5155 
 5156   format %{ %}
 5157   interface(CONST_INTER);
 5158 %}
 5159 
 5160 // Constant for short-wide masking
 5161 operand immI_65535()
 5162 %{
 5163   predicate(n->get_int() == 65535);
 5164   match(ConI);
 5165 
 5166   format %{ %}
 5167   interface(CONST_INTER);
 5168 %}
 5169 
 5170 // Constant for byte-wide masking
 5171 operand immL_255()
 5172 %{
 5173   predicate(n->get_long() == 255);
 5174   match(ConL);
 5175 
 5176   format %{ %}
 5177   interface(CONST_INTER);
 5178 %}
 5179 
 5180 // Constant for short-wide masking
 5181 operand immL_65535()
 5182 %{
 5183   predicate(n->get_long() == 65535);
 5184   match(ConL);
 5185 
 5186   format %{ %}
 5187   interface(CONST_INTER);
 5188 %}
 5189 
 5190 // AOT Runtime Constants Address
 5191 operand immAOTRuntimeConstantsAddress()
 5192 %{
 5193   // Check if the address is in the range of AOT Runtime Constants
 5194   predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
 5195   match(ConP);
 5196 
 5197   op_cost(0);
 5198   format %{ %}
 5199   interface(CONST_INTER);
 5200 %}
 5201 
 5202 operand kReg()
 5203 %{
 5204   constraint(ALLOC_IN_RC(vectmask_reg));
 5205   match(RegVectMask);
 5206   format %{%}
 5207   interface(REG_INTER);
 5208 %}
 5209 
 5210 // Register Operands
 5211 // Integer Register
 5212 operand rRegI()
 5213 %{
 5214   constraint(ALLOC_IN_RC(int_reg));
 5215   match(RegI);
 5216 
 5217   match(rax_RegI);
 5218   match(rbx_RegI);
 5219   match(rcx_RegI);
 5220   match(rdx_RegI);
 5221   match(rdi_RegI);
 5222 
 5223   format %{ %}
 5224   interface(REG_INTER);
 5225 %}
 5226 
 5227 // Special Registers
 5228 operand rax_RegI()
 5229 %{
 5230   constraint(ALLOC_IN_RC(int_rax_reg));
 5231   match(RegI);
 5232   match(rRegI);
 5233 
 5234   format %{ "RAX" %}
 5235   interface(REG_INTER);
 5236 %}
 5237 
 5238 // Special Registers
 5239 operand rbx_RegI()
 5240 %{
 5241   constraint(ALLOC_IN_RC(int_rbx_reg));
 5242   match(RegI);
 5243   match(rRegI);
 5244 
 5245   format %{ "RBX" %}
 5246   interface(REG_INTER);
 5247 %}
 5248 
 5249 operand rcx_RegI()
 5250 %{
 5251   constraint(ALLOC_IN_RC(int_rcx_reg));
 5252   match(RegI);
 5253   match(rRegI);
 5254 
 5255   format %{ "RCX" %}
 5256   interface(REG_INTER);
 5257 %}
 5258 
 5259 operand rdx_RegI()
 5260 %{
 5261   constraint(ALLOC_IN_RC(int_rdx_reg));
 5262   match(RegI);
 5263   match(rRegI);
 5264 
 5265   format %{ "RDX" %}
 5266   interface(REG_INTER);
 5267 %}
 5268 
 5269 operand rdi_RegI()
 5270 %{
 5271   constraint(ALLOC_IN_RC(int_rdi_reg));
 5272   match(RegI);
 5273   match(rRegI);
 5274 
 5275   format %{ "RDI" %}
 5276   interface(REG_INTER);
 5277 %}
 5278 
 5279 operand no_rax_rdx_RegI()
 5280 %{
 5281   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5282   match(RegI);
 5283   match(rbx_RegI);
 5284   match(rcx_RegI);
 5285   match(rdi_RegI);
 5286 
 5287   format %{ %}
 5288   interface(REG_INTER);
 5289 %}
 5290 
 5291 operand no_rbp_r13_RegI()
 5292 %{
 5293   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5294   match(RegI);
 5295   match(rRegI);
 5296   match(rax_RegI);
 5297   match(rbx_RegI);
 5298   match(rcx_RegI);
 5299   match(rdx_RegI);
 5300   match(rdi_RegI);
 5301 
 5302   format %{ %}
 5303   interface(REG_INTER);
 5304 %}
 5305 
 5306 // Pointer Register
 5307 operand any_RegP()
 5308 %{
 5309   constraint(ALLOC_IN_RC(any_reg));
 5310   match(RegP);
 5311   match(rax_RegP);
 5312   match(rbx_RegP);
 5313   match(rdi_RegP);
 5314   match(rsi_RegP);
 5315   match(rbp_RegP);
 5316   match(r15_RegP);
 5317   match(rRegP);
 5318 
 5319   format %{ %}
 5320   interface(REG_INTER);
 5321 %}
 5322 
 5323 operand rRegP()
 5324 %{
 5325   constraint(ALLOC_IN_RC(ptr_reg));
 5326   match(RegP);
 5327   match(rax_RegP);
 5328   match(rbx_RegP);
 5329   match(rdi_RegP);
 5330   match(rsi_RegP);
 5331   match(rbp_RegP);  // See Q&A below about
 5332   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5333 
 5334   format %{ %}
 5335   interface(REG_INTER);
 5336 %}
 5337 
 5338 operand rRegN() %{
 5339   constraint(ALLOC_IN_RC(int_reg));
 5340   match(RegN);
 5341 
 5342   format %{ %}
 5343   interface(REG_INTER);
 5344 %}
 5345 
 5346 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5347 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5348 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5349 // The output of an instruction is controlled by the allocator, which respects
 5350 // register class masks, not match rules.  Unless an instruction mentions
 5351 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5352 // by the allocator as an input.
 5353 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5354 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5355 // result, RBP is not included in the output of the instruction either.
 5356 
 5357 // This operand is not allowed to use RBP even if
 5358 // RBP is not used to hold the frame pointer.
 5359 operand no_rbp_RegP()
 5360 %{
 5361   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5362   match(RegP);
 5363   match(rbx_RegP);
 5364   match(rsi_RegP);
 5365   match(rdi_RegP);
 5366 
 5367   format %{ %}
 5368   interface(REG_INTER);
 5369 %}
 5370 
 5371 // Special Registers
 5372 // Return a pointer value
 5373 operand rax_RegP()
 5374 %{
 5375   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5376   match(RegP);
 5377   match(rRegP);
 5378 
 5379   format %{ %}
 5380   interface(REG_INTER);
 5381 %}
 5382 
 5383 // Special Registers
 5384 // Return a compressed pointer value
 5385 operand rax_RegN()
 5386 %{
 5387   constraint(ALLOC_IN_RC(int_rax_reg));
 5388   match(RegN);
 5389   match(rRegN);
 5390 
 5391   format %{ %}
 5392   interface(REG_INTER);
 5393 %}
 5394 
 5395 // Used in AtomicAdd
 5396 operand rbx_RegP()
 5397 %{
 5398   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5399   match(RegP);
 5400   match(rRegP);
 5401 
 5402   format %{ %}
 5403   interface(REG_INTER);
 5404 %}
 5405 
 5406 operand rsi_RegP()
 5407 %{
 5408   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5409   match(RegP);
 5410   match(rRegP);
 5411 
 5412   format %{ %}
 5413   interface(REG_INTER);
 5414 %}
 5415 
 5416 operand rbp_RegP()
 5417 %{
 5418   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5419   match(RegP);
 5420   match(rRegP);
 5421 
 5422   format %{ %}
 5423   interface(REG_INTER);
 5424 %}
 5425 
 5426 // Used in rep stosq
 5427 operand rdi_RegP()
 5428 %{
 5429   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5430   match(RegP);
 5431   match(rRegP);
 5432 
 5433   format %{ %}
 5434   interface(REG_INTER);
 5435 %}
 5436 
 5437 operand r15_RegP()
 5438 %{
 5439   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5440   match(RegP);
 5441   match(rRegP);
 5442 
 5443   format %{ %}
 5444   interface(REG_INTER);
 5445 %}
 5446 
 5447 operand rRegL()
 5448 %{
 5449   constraint(ALLOC_IN_RC(long_reg));
 5450   match(RegL);
 5451   match(rax_RegL);
 5452   match(rdx_RegL);
 5453 
 5454   format %{ %}
 5455   interface(REG_INTER);
 5456 %}
 5457 
 5458 // Special Registers
 5459 operand no_rax_rdx_RegL()
 5460 %{
 5461   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5462   match(RegL);
 5463   match(rRegL);
 5464 
 5465   format %{ %}
 5466   interface(REG_INTER);
 5467 %}
 5468 
 5469 operand rax_RegL()
 5470 %{
 5471   constraint(ALLOC_IN_RC(long_rax_reg));
 5472   match(RegL);
 5473   match(rRegL);
 5474 
 5475   format %{ "RAX" %}
 5476   interface(REG_INTER);
 5477 %}
 5478 
 5479 operand rcx_RegL()
 5480 %{
 5481   constraint(ALLOC_IN_RC(long_rcx_reg));
 5482   match(RegL);
 5483   match(rRegL);
 5484 
 5485   format %{ %}
 5486   interface(REG_INTER);
 5487 %}
 5488 
 5489 operand rdx_RegL()
 5490 %{
 5491   constraint(ALLOC_IN_RC(long_rdx_reg));
 5492   match(RegL);
 5493   match(rRegL);
 5494 
 5495   format %{ %}
 5496   interface(REG_INTER);
 5497 %}
 5498 
 5499 operand r11_RegL()
 5500 %{
 5501   constraint(ALLOC_IN_RC(long_r11_reg));
 5502   match(RegL);
 5503   match(rRegL);
 5504 
 5505   format %{ %}
 5506   interface(REG_INTER);
 5507 %}
 5508 
 5509 operand no_rbp_r13_RegL()
 5510 %{
 5511   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5512   match(RegL);
 5513   match(rRegL);
 5514   match(rax_RegL);
 5515   match(rcx_RegL);
 5516   match(rdx_RegL);
 5517 
 5518   format %{ %}
 5519   interface(REG_INTER);
 5520 %}
 5521 
 5522 // Flags register, used as output of compare instructions
 5523 operand rFlagsReg()
 5524 %{
 5525   constraint(ALLOC_IN_RC(int_flags));
 5526   match(RegFlags);
 5527 
 5528   format %{ "RFLAGS" %}
 5529   interface(REG_INTER);
 5530 %}
 5531 
 5532 // Flags register, used as output of FLOATING POINT compare instructions
 5533 operand rFlagsRegU()
 5534 %{
 5535   constraint(ALLOC_IN_RC(int_flags));
 5536   match(RegFlags);
 5537 
 5538   format %{ "RFLAGS_U" %}
 5539   interface(REG_INTER);
 5540 %}
 5541 
 5542 operand rFlagsRegUCF() %{
 5543   constraint(ALLOC_IN_RC(int_flags));
 5544   match(RegFlags);
 5545   predicate(!UseAPX || !VM_Version::supports_avx10_2());
 5546 
 5547   format %{ "RFLAGS_U_CF" %}
 5548   interface(REG_INTER);
 5549 %}
 5550 
 5551 operand rFlagsRegUCFE() %{
 5552   constraint(ALLOC_IN_RC(int_flags));
 5553   match(RegFlags);
 5554   predicate(UseAPX && VM_Version::supports_avx10_2());
 5555 
 5556   format %{ "RFLAGS_U_CFE" %}
 5557   interface(REG_INTER);
 5558 %}
 5559 
 5560 // Float register operands
 5561 operand regF() %{
 5562    constraint(ALLOC_IN_RC(float_reg));
 5563    match(RegF);
 5564 
 5565    format %{ %}
 5566    interface(REG_INTER);
 5567 %}
 5568 
 5569 // Float register operands
 5570 operand legRegF() %{
 5571    constraint(ALLOC_IN_RC(float_reg_legacy));
 5572    match(RegF);
 5573 
 5574    format %{ %}
 5575    interface(REG_INTER);
 5576 %}
 5577 
 5578 // Float register operands
 5579 operand vlRegF() %{
 5580    constraint(ALLOC_IN_RC(float_reg_vl));
 5581    match(RegF);
 5582 
 5583    format %{ %}
 5584    interface(REG_INTER);
 5585 %}
 5586 
 5587 // Double register operands
 5588 operand regD() %{
 5589    constraint(ALLOC_IN_RC(double_reg));
 5590    match(RegD);
 5591 
 5592    format %{ %}
 5593    interface(REG_INTER);
 5594 %}
 5595 
 5596 // Double register operands
 5597 operand legRegD() %{
 5598    constraint(ALLOC_IN_RC(double_reg_legacy));
 5599    match(RegD);
 5600 
 5601    format %{ %}
 5602    interface(REG_INTER);
 5603 %}
 5604 
 5605 // Double register operands
 5606 operand vlRegD() %{
 5607    constraint(ALLOC_IN_RC(double_reg_vl));
 5608    match(RegD);
 5609 
 5610    format %{ %}
 5611    interface(REG_INTER);
 5612 %}
 5613 
 5614 //----------Memory Operands----------------------------------------------------
 5615 // Direct Memory Operand
 5616 // operand direct(immP addr)
 5617 // %{
 5618 //   match(addr);
 5619 
 5620 //   format %{ "[$addr]" %}
 5621 //   interface(MEMORY_INTER) %{
 5622 //     base(0xFFFFFFFF);
 5623 //     index(0x4);
 5624 //     scale(0x0);
 5625 //     disp($addr);
 5626 //   %}
 5627 // %}
 5628 
 5629 // Indirect Memory Operand
 5630 operand indirect(any_RegP reg)
 5631 %{
 5632   constraint(ALLOC_IN_RC(ptr_reg));
 5633   match(reg);
 5634 
 5635   format %{ "[$reg]" %}
 5636   interface(MEMORY_INTER) %{
 5637     base($reg);
 5638     index(0x4);
 5639     scale(0x0);
 5640     disp(0x0);
 5641   %}
 5642 %}
 5643 
 5644 // Indirect Memory Plus Short Offset Operand
 5645 operand indOffset8(any_RegP reg, immL8 off)
 5646 %{
 5647   constraint(ALLOC_IN_RC(ptr_reg));
 5648   match(AddP reg off);
 5649 
 5650   format %{ "[$reg + $off (8-bit)]" %}
 5651   interface(MEMORY_INTER) %{
 5652     base($reg);
 5653     index(0x4);
 5654     scale(0x0);
 5655     disp($off);
 5656   %}
 5657 %}
 5658 
 5659 // Indirect Memory Plus Long Offset Operand
 5660 operand indOffset32(any_RegP reg, immL32 off)
 5661 %{
 5662   constraint(ALLOC_IN_RC(ptr_reg));
 5663   match(AddP reg off);
 5664 
 5665   format %{ "[$reg + $off (32-bit)]" %}
 5666   interface(MEMORY_INTER) %{
 5667     base($reg);
 5668     index(0x4);
 5669     scale(0x0);
 5670     disp($off);
 5671   %}
 5672 %}
 5673 
 5674 // Indirect Memory Plus Index Register Plus Offset Operand
 5675 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5676 %{
 5677   constraint(ALLOC_IN_RC(ptr_reg));
 5678   match(AddP (AddP reg lreg) off);
 5679 
 5680   op_cost(10);
 5681   format %{"[$reg + $off + $lreg]" %}
 5682   interface(MEMORY_INTER) %{
 5683     base($reg);
 5684     index($lreg);
 5685     scale(0x0);
 5686     disp($off);
 5687   %}
 5688 %}
 5689 
 5690 // Indirect Memory Plus Index Register Plus Offset Operand
 5691 operand indIndex(any_RegP reg, rRegL lreg)
 5692 %{
 5693   constraint(ALLOC_IN_RC(ptr_reg));
 5694   match(AddP reg lreg);
 5695 
 5696   op_cost(10);
 5697   format %{"[$reg + $lreg]" %}
 5698   interface(MEMORY_INTER) %{
 5699     base($reg);
 5700     index($lreg);
 5701     scale(0x0);
 5702     disp(0x0);
 5703   %}
 5704 %}
 5705 
 5706 // Indirect Memory Times Scale Plus Index Register
 5707 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5708 %{
 5709   constraint(ALLOC_IN_RC(ptr_reg));
 5710   match(AddP reg (LShiftL lreg scale));
 5711 
 5712   op_cost(10);
 5713   format %{"[$reg + $lreg << $scale]" %}
 5714   interface(MEMORY_INTER) %{
 5715     base($reg);
 5716     index($lreg);
 5717     scale($scale);
 5718     disp(0x0);
 5719   %}
 5720 %}
 5721 
 5722 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5723 %{
 5724   constraint(ALLOC_IN_RC(ptr_reg));
 5725   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5726   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5727 
 5728   op_cost(10);
 5729   format %{"[$reg + pos $idx << $scale]" %}
 5730   interface(MEMORY_INTER) %{
 5731     base($reg);
 5732     index($idx);
 5733     scale($scale);
 5734     disp(0x0);
 5735   %}
 5736 %}
 5737 
 5738 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5739 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5740 %{
 5741   constraint(ALLOC_IN_RC(ptr_reg));
 5742   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5743 
 5744   op_cost(10);
 5745   format %{"[$reg + $off + $lreg << $scale]" %}
 5746   interface(MEMORY_INTER) %{
 5747     base($reg);
 5748     index($lreg);
 5749     scale($scale);
 5750     disp($off);
 5751   %}
 5752 %}
 5753 
 5754 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5755 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5756 %{
 5757   constraint(ALLOC_IN_RC(ptr_reg));
 5758   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5759   match(AddP (AddP reg (ConvI2L idx)) off);
 5760 
 5761   op_cost(10);
 5762   format %{"[$reg + $off + $idx]" %}
 5763   interface(MEMORY_INTER) %{
 5764     base($reg);
 5765     index($idx);
 5766     scale(0x0);
 5767     disp($off);
 5768   %}
 5769 %}
 5770 
 5771 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5772 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5773 %{
 5774   constraint(ALLOC_IN_RC(ptr_reg));
 5775   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5776   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5777 
 5778   op_cost(10);
 5779   format %{"[$reg + $off + $idx << $scale]" %}
 5780   interface(MEMORY_INTER) %{
 5781     base($reg);
 5782     index($idx);
 5783     scale($scale);
 5784     disp($off);
 5785   %}
 5786 %}
 5787 
 5788 // Indirect Narrow Oop Plus Offset Operand
 5789 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5790 // we can't free r12 even with CompressedOops::base() == nullptr.
 5791 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5792   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5793   constraint(ALLOC_IN_RC(ptr_reg));
 5794   match(AddP (DecodeN reg) off);
 5795 
 5796   op_cost(10);
 5797   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5798   interface(MEMORY_INTER) %{
 5799     base(0xc); // R12
 5800     index($reg);
 5801     scale(0x3);
 5802     disp($off);
 5803   %}
 5804 %}
 5805 
 5806 // Indirect Memory Operand
 5807 operand indirectNarrow(rRegN reg)
 5808 %{
 5809   predicate(CompressedOops::shift() == 0);
 5810   constraint(ALLOC_IN_RC(ptr_reg));
 5811   match(DecodeN reg);
 5812 
 5813   format %{ "[$reg]" %}
 5814   interface(MEMORY_INTER) %{
 5815     base($reg);
 5816     index(0x4);
 5817     scale(0x0);
 5818     disp(0x0);
 5819   %}
 5820 %}
 5821 
 5822 // Indirect Memory Plus Short Offset Operand
 5823 operand indOffset8Narrow(rRegN reg, immL8 off)
 5824 %{
 5825   predicate(CompressedOops::shift() == 0);
 5826   constraint(ALLOC_IN_RC(ptr_reg));
 5827   match(AddP (DecodeN reg) off);
 5828 
 5829   format %{ "[$reg + $off (8-bit)]" %}
 5830   interface(MEMORY_INTER) %{
 5831     base($reg);
 5832     index(0x4);
 5833     scale(0x0);
 5834     disp($off);
 5835   %}
 5836 %}
 5837 
 5838 // Indirect Memory Plus Long Offset Operand
 5839 operand indOffset32Narrow(rRegN reg, immL32 off)
 5840 %{
 5841   predicate(CompressedOops::shift() == 0);
 5842   constraint(ALLOC_IN_RC(ptr_reg));
 5843   match(AddP (DecodeN reg) off);
 5844 
 5845   format %{ "[$reg + $off (32-bit)]" %}
 5846   interface(MEMORY_INTER) %{
 5847     base($reg);
 5848     index(0x4);
 5849     scale(0x0);
 5850     disp($off);
 5851   %}
 5852 %}
 5853 
 5854 // Indirect Memory Plus Index Register Plus Offset Operand
 5855 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5856 %{
 5857   predicate(CompressedOops::shift() == 0);
 5858   constraint(ALLOC_IN_RC(ptr_reg));
 5859   match(AddP (AddP (DecodeN reg) lreg) off);
 5860 
 5861   op_cost(10);
 5862   format %{"[$reg + $off + $lreg]" %}
 5863   interface(MEMORY_INTER) %{
 5864     base($reg);
 5865     index($lreg);
 5866     scale(0x0);
 5867     disp($off);
 5868   %}
 5869 %}
 5870 
 5871 // Indirect Memory Plus Index Register Plus Offset Operand
 5872 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5873 %{
 5874   predicate(CompressedOops::shift() == 0);
 5875   constraint(ALLOC_IN_RC(ptr_reg));
 5876   match(AddP (DecodeN reg) lreg);
 5877 
 5878   op_cost(10);
 5879   format %{"[$reg + $lreg]" %}
 5880   interface(MEMORY_INTER) %{
 5881     base($reg);
 5882     index($lreg);
 5883     scale(0x0);
 5884     disp(0x0);
 5885   %}
 5886 %}
 5887 
 5888 // Indirect Memory Times Scale Plus Index Register
 5889 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5890 %{
 5891   predicate(CompressedOops::shift() == 0);
 5892   constraint(ALLOC_IN_RC(ptr_reg));
 5893   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5894 
 5895   op_cost(10);
 5896   format %{"[$reg + $lreg << $scale]" %}
 5897   interface(MEMORY_INTER) %{
 5898     base($reg);
 5899     index($lreg);
 5900     scale($scale);
 5901     disp(0x0);
 5902   %}
 5903 %}
 5904 
 5905 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5906 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5907 %{
 5908   predicate(CompressedOops::shift() == 0);
 5909   constraint(ALLOC_IN_RC(ptr_reg));
 5910   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5911 
 5912   op_cost(10);
 5913   format %{"[$reg + $off + $lreg << $scale]" %}
 5914   interface(MEMORY_INTER) %{
 5915     base($reg);
 5916     index($lreg);
 5917     scale($scale);
 5918     disp($off);
 5919   %}
 5920 %}
 5921 
 5922 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5923 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5924 %{
 5925   constraint(ALLOC_IN_RC(ptr_reg));
 5926   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5927   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5928 
 5929   op_cost(10);
 5930   format %{"[$reg + $off + $idx]" %}
 5931   interface(MEMORY_INTER) %{
 5932     base($reg);
 5933     index($idx);
 5934     scale(0x0);
 5935     disp($off);
 5936   %}
 5937 %}
 5938 
 5939 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5940 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 5941 %{
 5942   constraint(ALLOC_IN_RC(ptr_reg));
 5943   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5944   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 5945 
 5946   op_cost(10);
 5947   format %{"[$reg + $off + $idx << $scale]" %}
 5948   interface(MEMORY_INTER) %{
 5949     base($reg);
 5950     index($idx);
 5951     scale($scale);
 5952     disp($off);
 5953   %}
 5954 %}
 5955 
 5956 //----------Special Memory Operands--------------------------------------------
 5957 // Stack Slot Operand - This operand is used for loading and storing temporary
 5958 //                      values on the stack where a match requires a value to
 5959 //                      flow through memory.
 5960 operand stackSlotP(sRegP reg)
 5961 %{
 5962   constraint(ALLOC_IN_RC(stack_slots));
 5963   // No match rule because this operand is only generated in matching
 5964 
 5965   format %{ "[$reg]" %}
 5966   interface(MEMORY_INTER) %{
 5967     base(0x4);   // RSP
 5968     index(0x4);  // No Index
 5969     scale(0x0);  // No Scale
 5970     disp($reg);  // Stack Offset
 5971   %}
 5972 %}
 5973 
 5974 operand stackSlotI(sRegI reg)
 5975 %{
 5976   constraint(ALLOC_IN_RC(stack_slots));
 5977   // No match rule because this operand is only generated in matching
 5978 
 5979   format %{ "[$reg]" %}
 5980   interface(MEMORY_INTER) %{
 5981     base(0x4);   // RSP
 5982     index(0x4);  // No Index
 5983     scale(0x0);  // No Scale
 5984     disp($reg);  // Stack Offset
 5985   %}
 5986 %}
 5987 
 5988 operand stackSlotF(sRegF reg)
 5989 %{
 5990   constraint(ALLOC_IN_RC(stack_slots));
 5991   // No match rule because this operand is only generated in matching
 5992 
 5993   format %{ "[$reg]" %}
 5994   interface(MEMORY_INTER) %{
 5995     base(0x4);   // RSP
 5996     index(0x4);  // No Index
 5997     scale(0x0);  // No Scale
 5998     disp($reg);  // Stack Offset
 5999   %}
 6000 %}
 6001 
 6002 operand stackSlotD(sRegD reg)
 6003 %{
 6004   constraint(ALLOC_IN_RC(stack_slots));
 6005   // No match rule because this operand is only generated in matching
 6006 
 6007   format %{ "[$reg]" %}
 6008   interface(MEMORY_INTER) %{
 6009     base(0x4);   // RSP
 6010     index(0x4);  // No Index
 6011     scale(0x0);  // No Scale
 6012     disp($reg);  // Stack Offset
 6013   %}
 6014 %}
 6015 operand stackSlotL(sRegL reg)
 6016 %{
 6017   constraint(ALLOC_IN_RC(stack_slots));
 6018   // No match rule because this operand is only generated in matching
 6019 
 6020   format %{ "[$reg]" %}
 6021   interface(MEMORY_INTER) %{
 6022     base(0x4);   // RSP
 6023     index(0x4);  // No Index
 6024     scale(0x0);  // No Scale
 6025     disp($reg);  // Stack Offset
 6026   %}
 6027 %}
 6028 
 6029 //----------Conditional Branch Operands----------------------------------------
 6030 // Comparison Op  - This is the operation of the comparison, and is limited to
 6031 //                  the following set of codes:
 6032 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 6033 //
 6034 // Other attributes of the comparison, such as unsignedness, are specified
 6035 // by the comparison instruction that sets a condition code flags register.
 6036 // That result is represented by a flags operand whose subtype is appropriate
 6037 // to the unsignedness (etc.) of the comparison.
 6038 //
 6039 // Later, the instruction which matches both the Comparison Op (a Bool) and
 6040 // the flags (produced by the Cmp) specifies the coding of the comparison op
 6041 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 6042 
 6043 // Comparison Code
 6044 operand cmpOp()
 6045 %{
 6046   match(Bool);
 6047 
 6048   format %{ "" %}
 6049   interface(COND_INTER) %{
 6050     equal(0x4, "e");
 6051     not_equal(0x5, "ne");
 6052     less(0xc, "l");
 6053     greater_equal(0xd, "ge");
 6054     less_equal(0xe, "le");
 6055     greater(0xf, "g");
 6056     overflow(0x0, "o");
 6057     no_overflow(0x1, "no");
 6058   %}
 6059 %}
 6060 
 6061 // Comparison Code, unsigned compare.  Used by FP also, with
 6062 // C2 (unordered) turned into GT or LT already.  The other bits
 6063 // C0 and C3 are turned into Carry & Zero flags.
 6064 operand cmpOpU()
 6065 %{
 6066   match(Bool);
 6067 
 6068   format %{ "" %}
 6069   interface(COND_INTER) %{
 6070     equal(0x4, "e");
 6071     not_equal(0x5, "ne");
 6072     less(0x2, "b");
 6073     greater_equal(0x3, "ae");
 6074     less_equal(0x6, "be");
 6075     greater(0x7, "a");
 6076     overflow(0x0, "o");
 6077     no_overflow(0x1, "no");
 6078   %}
 6079 %}
 6080 
 6081 
 6082 // Floating comparisons that don't require any fixup for the unordered case,
 6083 // If both inputs of the comparison are the same, ZF is always set so we
 6084 // don't need to use cmpOpUCF2 for eq/ne
 6085 operand cmpOpUCF() %{
 6086   match(Bool);
 6087   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6088             (n->as_Bool()->_test._test == BoolTest::lt ||
 6089              n->as_Bool()->_test._test == BoolTest::ge ||
 6090              n->as_Bool()->_test._test == BoolTest::le ||
 6091              n->as_Bool()->_test._test == BoolTest::gt ||
 6092              n->in(1)->in(1) == n->in(1)->in(2)));
 6093   format %{ "" %}
 6094   interface(COND_INTER) %{
 6095     equal(0xb, "np");
 6096     not_equal(0xa, "p");
 6097     less(0x2, "b");
 6098     greater_equal(0x3, "ae");
 6099     less_equal(0x6, "be");
 6100     greater(0x7, "a");
 6101     overflow(0x0, "o");
 6102     no_overflow(0x1, "no");
 6103   %}
 6104 %}
 6105 
 6106 
 6107 // Floating comparisons that can be fixed up with extra conditional jumps
 6108 operand cmpOpUCF2() %{
 6109   match(Bool);
 6110   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6111             (n->as_Bool()->_test._test == BoolTest::ne ||
 6112              n->as_Bool()->_test._test == BoolTest::eq) &&
 6113             n->in(1)->in(1) != n->in(1)->in(2));
 6114   format %{ "" %}
 6115   interface(COND_INTER) %{
 6116     equal(0x4, "e");
 6117     not_equal(0x5, "ne");
 6118     less(0x2, "b");
 6119     greater_equal(0x3, "ae");
 6120     less_equal(0x6, "be");
 6121     greater(0x7, "a");
 6122     overflow(0x0, "o");
 6123     no_overflow(0x1, "no");
 6124   %}
 6125 %}
 6126 
 6127 
 6128 // Floating point comparisons that set condition flags to test more directly,
 6129 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
 6130 // are used for L (<) and LE (<=) conditions. It's important to convert these
 6131 // latter conditions to ones that use unsigned tests before passing into an
 6132 // instruction because the preceding comparison might be based on a three way
 6133 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
 6134 operand cmpOpUCFE()
 6135 %{
 6136   match(Bool);
 6137   predicate((UseAPX && VM_Version::supports_avx10_2()) &&
 6138             (n->as_Bool()->_test._test == BoolTest::ne ||
 6139              n->as_Bool()->_test._test == BoolTest::eq ||
 6140              n->as_Bool()->_test._test == BoolTest::lt ||
 6141              n->as_Bool()->_test._test == BoolTest::ge ||
 6142              n->as_Bool()->_test._test == BoolTest::le ||
 6143              n->as_Bool()->_test._test == BoolTest::gt));
 6144 
 6145   format %{ "" %}
 6146   interface(COND_INTER) %{
 6147     equal(0x4, "e");
 6148     not_equal(0x5, "ne");
 6149     less(0x2, "b");
 6150     greater_equal(0x3, "ae");
 6151     less_equal(0x6, "be");
 6152     greater(0x7, "a");
 6153     overflow(0x0, "o");
 6154     no_overflow(0x1, "no");
 6155   %}
 6156 %}
 6157 
 6158 // Operands for bound floating pointer register arguments
 6159 operand rxmm0() %{
 6160   constraint(ALLOC_IN_RC(xmm0_reg));
 6161   match(VecX);
 6162   format%{%}
 6163   interface(REG_INTER);
 6164 %}
 6165 
 6166 // Vectors
 6167 
 6168 // Dummy generic vector class. Should be used for all vector operands.
 6169 // Replaced with vec[SDXYZ] during post-selection pass.
 6170 operand vec() %{
 6171   constraint(ALLOC_IN_RC(dynamic));
 6172   match(VecX);
 6173   match(VecY);
 6174   match(VecZ);
 6175   match(VecS);
 6176   match(VecD);
 6177 
 6178   format %{ %}
 6179   interface(REG_INTER);
 6180 %}
 6181 
 6182 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6183 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6184 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6185 // runtime code generation via reg_class_dynamic.
 6186 operand legVec() %{
 6187   constraint(ALLOC_IN_RC(dynamic));
 6188   match(VecX);
 6189   match(VecY);
 6190   match(VecZ);
 6191   match(VecS);
 6192   match(VecD);
 6193 
 6194   format %{ %}
 6195   interface(REG_INTER);
 6196 %}
 6197 
 6198 // Replaces vec during post-selection cleanup. See above.
 6199 operand vecS() %{
 6200   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6201   match(VecS);
 6202 
 6203   format %{ %}
 6204   interface(REG_INTER);
 6205 %}
 6206 
 6207 // Replaces legVec during post-selection cleanup. See above.
 6208 operand legVecS() %{
 6209   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6210   match(VecS);
 6211 
 6212   format %{ %}
 6213   interface(REG_INTER);
 6214 %}
 6215 
 6216 // Replaces vec during post-selection cleanup. See above.
 6217 operand vecD() %{
 6218   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6219   match(VecD);
 6220 
 6221   format %{ %}
 6222   interface(REG_INTER);
 6223 %}
 6224 
 6225 // Replaces legVec during post-selection cleanup. See above.
 6226 operand legVecD() %{
 6227   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6228   match(VecD);
 6229 
 6230   format %{ %}
 6231   interface(REG_INTER);
 6232 %}
 6233 
 6234 // Replaces vec during post-selection cleanup. See above.
 6235 operand vecX() %{
 6236   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6237   match(VecX);
 6238 
 6239   format %{ %}
 6240   interface(REG_INTER);
 6241 %}
 6242 
 6243 // Replaces legVec during post-selection cleanup. See above.
 6244 operand legVecX() %{
 6245   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6246   match(VecX);
 6247 
 6248   format %{ %}
 6249   interface(REG_INTER);
 6250 %}
 6251 
 6252 // Replaces vec during post-selection cleanup. See above.
 6253 operand vecY() %{
 6254   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6255   match(VecY);
 6256 
 6257   format %{ %}
 6258   interface(REG_INTER);
 6259 %}
 6260 
 6261 // Replaces legVec during post-selection cleanup. See above.
 6262 operand legVecY() %{
 6263   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6264   match(VecY);
 6265 
 6266   format %{ %}
 6267   interface(REG_INTER);
 6268 %}
 6269 
 6270 // Replaces vec during post-selection cleanup. See above.
 6271 operand vecZ() %{
 6272   constraint(ALLOC_IN_RC(vectorz_reg));
 6273   match(VecZ);
 6274 
 6275   format %{ %}
 6276   interface(REG_INTER);
 6277 %}
 6278 
 6279 // Replaces legVec during post-selection cleanup. See above.
 6280 operand legVecZ() %{
 6281   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6282   match(VecZ);
 6283 
 6284   format %{ %}
 6285   interface(REG_INTER);
 6286 %}
 6287 
 6288 //----------OPERAND CLASSES----------------------------------------------------
 6289 // Operand Classes are groups of operands that are used as to simplify
 6290 // instruction definitions by not requiring the AD writer to specify separate
 6291 // instructions for every form of operand when the instruction accepts
 6292 // multiple operand types with the same basic encoding and format.  The classic
 6293 // case of this is memory operands.
 6294 
 6295 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6296                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6297                indCompressedOopOffset,
 6298                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6299                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6300                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6301 
 6302 //----------PIPELINE-----------------------------------------------------------
 6303 // Rules which define the behavior of the target architectures pipeline.
 6304 pipeline %{
 6305 
 6306 //----------ATTRIBUTES---------------------------------------------------------
 6307 attributes %{
 6308   variable_size_instructions;        // Fixed size instructions
 6309   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6310   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6311   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6312   instruction_fetch_units = 1;       // of 16 bytes
 6313 %}
 6314 
 6315 //----------RESOURCES----------------------------------------------------------
 6316 // Resources are the functional units available to the machine
 6317 
 6318 // Generic P2/P3 pipeline
 6319 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6320 // 3 instructions decoded per cycle.
 6321 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6322 // 3 ALU op, only ALU0 handles mul instructions.
 6323 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6324            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6325            BR, FPU,
 6326            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6327 
 6328 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6329 // Pipeline Description specifies the stages in the machine's pipeline
 6330 
 6331 // Generic P2/P3 pipeline
 6332 pipe_desc(S0, S1, S2, S3, S4, S5);
 6333 
 6334 //----------PIPELINE CLASSES---------------------------------------------------
 6335 // Pipeline Classes describe the stages in which input and output are
 6336 // referenced by the hardware pipeline.
 6337 
 6338 // Naming convention: ialu or fpu
 6339 // Then: _reg
 6340 // Then: _reg if there is a 2nd register
 6341 // Then: _long if it's a pair of instructions implementing a long
 6342 // Then: _fat if it requires the big decoder
 6343 //   Or: _mem if it requires the big decoder and a memory unit.
 6344 
 6345 // Integer ALU reg operation
 6346 pipe_class ialu_reg(rRegI dst)
 6347 %{
 6348     single_instruction;
 6349     dst    : S4(write);
 6350     dst    : S3(read);
 6351     DECODE : S0;        // any decoder
 6352     ALU    : S3;        // any alu
 6353 %}
 6354 
 6355 // Long ALU reg operation
 6356 pipe_class ialu_reg_long(rRegL dst)
 6357 %{
 6358     instruction_count(2);
 6359     dst    : S4(write);
 6360     dst    : S3(read);
 6361     DECODE : S0(2);     // any 2 decoders
 6362     ALU    : S3(2);     // both alus
 6363 %}
 6364 
 6365 // Integer ALU reg operation using big decoder
 6366 pipe_class ialu_reg_fat(rRegI dst)
 6367 %{
 6368     single_instruction;
 6369     dst    : S4(write);
 6370     dst    : S3(read);
 6371     D0     : S0;        // big decoder only
 6372     ALU    : S3;        // any alu
 6373 %}
 6374 
 6375 // Integer ALU reg-reg operation
 6376 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6377 %{
 6378     single_instruction;
 6379     dst    : S4(write);
 6380     src    : S3(read);
 6381     DECODE : S0;        // any decoder
 6382     ALU    : S3;        // any alu
 6383 %}
 6384 
 6385 // Integer ALU reg-reg operation
 6386 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6387 %{
 6388     single_instruction;
 6389     dst    : S4(write);
 6390     src    : S3(read);
 6391     D0     : S0;        // big decoder only
 6392     ALU    : S3;        // any alu
 6393 %}
 6394 
 6395 // Integer ALU reg-mem operation
 6396 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6397 %{
 6398     single_instruction;
 6399     dst    : S5(write);
 6400     mem    : S3(read);
 6401     D0     : S0;        // big decoder only
 6402     ALU    : S4;        // any alu
 6403     MEM    : S3;        // any mem
 6404 %}
 6405 
 6406 // Integer mem operation (prefetch)
 6407 pipe_class ialu_mem(memory mem)
 6408 %{
 6409     single_instruction;
 6410     mem    : S3(read);
 6411     D0     : S0;        // big decoder only
 6412     MEM    : S3;        // any mem
 6413 %}
 6414 
 6415 // Integer Store to Memory
 6416 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6417 %{
 6418     single_instruction;
 6419     mem    : S3(read);
 6420     src    : S5(read);
 6421     D0     : S0;        // big decoder only
 6422     ALU    : S4;        // any alu
 6423     MEM    : S3;
 6424 %}
 6425 
 6426 // // Long Store to Memory
 6427 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6428 // %{
 6429 //     instruction_count(2);
 6430 //     mem    : S3(read);
 6431 //     src    : S5(read);
 6432 //     D0     : S0(2);          // big decoder only; twice
 6433 //     ALU    : S4(2);     // any 2 alus
 6434 //     MEM    : S3(2);  // Both mems
 6435 // %}
 6436 
 6437 // Integer Store to Memory
 6438 pipe_class ialu_mem_imm(memory mem)
 6439 %{
 6440     single_instruction;
 6441     mem    : S3(read);
 6442     D0     : S0;        // big decoder only
 6443     ALU    : S4;        // any alu
 6444     MEM    : S3;
 6445 %}
 6446 
 6447 // Integer ALU0 reg-reg operation
 6448 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6449 %{
 6450     single_instruction;
 6451     dst    : S4(write);
 6452     src    : S3(read);
 6453     D0     : S0;        // Big decoder only
 6454     ALU0   : S3;        // only alu0
 6455 %}
 6456 
 6457 // Integer ALU0 reg-mem operation
 6458 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6459 %{
 6460     single_instruction;
 6461     dst    : S5(write);
 6462     mem    : S3(read);
 6463     D0     : S0;        // big decoder only
 6464     ALU0   : S4;        // ALU0 only
 6465     MEM    : S3;        // any mem
 6466 %}
 6467 
 6468 // Integer ALU reg-reg operation
 6469 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6470 %{
 6471     single_instruction;
 6472     cr     : S4(write);
 6473     src1   : S3(read);
 6474     src2   : S3(read);
 6475     DECODE : S0;        // any decoder
 6476     ALU    : S3;        // any alu
 6477 %}
 6478 
 6479 // Integer ALU reg-imm operation
 6480 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6481 %{
 6482     single_instruction;
 6483     cr     : S4(write);
 6484     src1   : S3(read);
 6485     DECODE : S0;        // any decoder
 6486     ALU    : S3;        // any alu
 6487 %}
 6488 
 6489 // Integer ALU reg-mem operation
 6490 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6491 %{
 6492     single_instruction;
 6493     cr     : S4(write);
 6494     src1   : S3(read);
 6495     src2   : S3(read);
 6496     D0     : S0;        // big decoder only
 6497     ALU    : S4;        // any alu
 6498     MEM    : S3;
 6499 %}
 6500 
 6501 // Conditional move reg-reg
 6502 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6503 %{
 6504     instruction_count(4);
 6505     y      : S4(read);
 6506     q      : S3(read);
 6507     p      : S3(read);
 6508     DECODE : S0(4);     // any decoder
 6509 %}
 6510 
 6511 // Conditional move reg-reg
 6512 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6513 %{
 6514     single_instruction;
 6515     dst    : S4(write);
 6516     src    : S3(read);
 6517     cr     : S3(read);
 6518     DECODE : S0;        // any decoder
 6519 %}
 6520 
 6521 // Conditional move reg-mem
 6522 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6523 %{
 6524     single_instruction;
 6525     dst    : S4(write);
 6526     src    : S3(read);
 6527     cr     : S3(read);
 6528     DECODE : S0;        // any decoder
 6529     MEM    : S3;
 6530 %}
 6531 
 6532 // Conditional move reg-reg long
 6533 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6534 %{
 6535     single_instruction;
 6536     dst    : S4(write);
 6537     src    : S3(read);
 6538     cr     : S3(read);
 6539     DECODE : S0(2);     // any 2 decoders
 6540 %}
 6541 
 6542 // Float reg-reg operation
 6543 pipe_class fpu_reg(regD dst)
 6544 %{
 6545     instruction_count(2);
 6546     dst    : S3(read);
 6547     DECODE : S0(2);     // any 2 decoders
 6548     FPU    : S3;
 6549 %}
 6550 
 6551 // Float reg-reg operation
 6552 pipe_class fpu_reg_reg(regD dst, regD src)
 6553 %{
 6554     instruction_count(2);
 6555     dst    : S4(write);
 6556     src    : S3(read);
 6557     DECODE : S0(2);     // any 2 decoders
 6558     FPU    : S3;
 6559 %}
 6560 
 6561 // Float reg-reg operation
 6562 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6563 %{
 6564     instruction_count(3);
 6565     dst    : S4(write);
 6566     src1   : S3(read);
 6567     src2   : S3(read);
 6568     DECODE : S0(3);     // any 3 decoders
 6569     FPU    : S3(2);
 6570 %}
 6571 
 6572 // Float reg-reg operation
 6573 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6574 %{
 6575     instruction_count(4);
 6576     dst    : S4(write);
 6577     src1   : S3(read);
 6578     src2   : S3(read);
 6579     src3   : S3(read);
 6580     DECODE : S0(4);     // any 3 decoders
 6581     FPU    : S3(2);
 6582 %}
 6583 
 6584 // Float reg-reg operation
 6585 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6586 %{
 6587     instruction_count(4);
 6588     dst    : S4(write);
 6589     src1   : S3(read);
 6590     src2   : S3(read);
 6591     src3   : S3(read);
 6592     DECODE : S1(3);     // any 3 decoders
 6593     D0     : S0;        // Big decoder only
 6594     FPU    : S3(2);
 6595     MEM    : S3;
 6596 %}
 6597 
 6598 // Float reg-mem operation
 6599 pipe_class fpu_reg_mem(regD dst, memory mem)
 6600 %{
 6601     instruction_count(2);
 6602     dst    : S5(write);
 6603     mem    : S3(read);
 6604     D0     : S0;        // big decoder only
 6605     DECODE : S1;        // any decoder for FPU POP
 6606     FPU    : S4;
 6607     MEM    : S3;        // any mem
 6608 %}
 6609 
 6610 // Float reg-mem operation
 6611 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6612 %{
 6613     instruction_count(3);
 6614     dst    : S5(write);
 6615     src1   : S3(read);
 6616     mem    : S3(read);
 6617     D0     : S0;        // big decoder only
 6618     DECODE : S1(2);     // any decoder for FPU POP
 6619     FPU    : S4;
 6620     MEM    : S3;        // any mem
 6621 %}
 6622 
 6623 // Float mem-reg operation
 6624 pipe_class fpu_mem_reg(memory mem, regD src)
 6625 %{
 6626     instruction_count(2);
 6627     src    : S5(read);
 6628     mem    : S3(read);
 6629     DECODE : S0;        // any decoder for FPU PUSH
 6630     D0     : S1;        // big decoder only
 6631     FPU    : S4;
 6632     MEM    : S3;        // any mem
 6633 %}
 6634 
 6635 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6636 %{
 6637     instruction_count(3);
 6638     src1   : S3(read);
 6639     src2   : S3(read);
 6640     mem    : S3(read);
 6641     DECODE : S0(2);     // any decoder for FPU PUSH
 6642     D0     : S1;        // big decoder only
 6643     FPU    : S4;
 6644     MEM    : S3;        // any mem
 6645 %}
 6646 
 6647 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6648 %{
 6649     instruction_count(3);
 6650     src1   : S3(read);
 6651     src2   : S3(read);
 6652     mem    : S4(read);
 6653     DECODE : S0;        // any decoder for FPU PUSH
 6654     D0     : S0(2);     // big decoder only
 6655     FPU    : S4;
 6656     MEM    : S3(2);     // any mem
 6657 %}
 6658 
 6659 pipe_class fpu_mem_mem(memory dst, memory src1)
 6660 %{
 6661     instruction_count(2);
 6662     src1   : S3(read);
 6663     dst    : S4(read);
 6664     D0     : S0(2);     // big decoder only
 6665     MEM    : S3(2);     // any mem
 6666 %}
 6667 
 6668 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6669 %{
 6670     instruction_count(3);
 6671     src1   : S3(read);
 6672     src2   : S3(read);
 6673     dst    : S4(read);
 6674     D0     : S0(3);     // big decoder only
 6675     FPU    : S4;
 6676     MEM    : S3(3);     // any mem
 6677 %}
 6678 
 6679 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6680 %{
 6681     instruction_count(3);
 6682     src1   : S4(read);
 6683     mem    : S4(read);
 6684     DECODE : S0;        // any decoder for FPU PUSH
 6685     D0     : S0(2);     // big decoder only
 6686     FPU    : S4;
 6687     MEM    : S3(2);     // any mem
 6688 %}
 6689 
 6690 // Float load constant
 6691 pipe_class fpu_reg_con(regD dst)
 6692 %{
 6693     instruction_count(2);
 6694     dst    : S5(write);
 6695     D0     : S0;        // big decoder only for the load
 6696     DECODE : S1;        // any decoder for FPU POP
 6697     FPU    : S4;
 6698     MEM    : S3;        // any mem
 6699 %}
 6700 
 6701 // Float load constant
 6702 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6703 %{
 6704     instruction_count(3);
 6705     dst    : S5(write);
 6706     src    : S3(read);
 6707     D0     : S0;        // big decoder only for the load
 6708     DECODE : S1(2);     // any decoder for FPU POP
 6709     FPU    : S4;
 6710     MEM    : S3;        // any mem
 6711 %}
 6712 
 6713 // UnConditional branch
 6714 pipe_class pipe_jmp(label labl)
 6715 %{
 6716     single_instruction;
 6717     BR   : S3;
 6718 %}
 6719 
 6720 // Conditional branch
 6721 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6722 %{
 6723     single_instruction;
 6724     cr    : S1(read);
 6725     BR    : S3;
 6726 %}
 6727 
 6728 // Allocation idiom
 6729 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6730 %{
 6731     instruction_count(1); force_serialization;
 6732     fixed_latency(6);
 6733     heap_ptr : S3(read);
 6734     DECODE   : S0(3);
 6735     D0       : S2;
 6736     MEM      : S3;
 6737     ALU      : S3(2);
 6738     dst      : S5(write);
 6739     BR       : S5;
 6740 %}
 6741 
 6742 // Generic big/slow expanded idiom
 6743 pipe_class pipe_slow()
 6744 %{
 6745     instruction_count(10); multiple_bundles; force_serialization;
 6746     fixed_latency(100);
 6747     D0  : S0(2);
 6748     MEM : S3(2);
 6749 %}
 6750 
 6751 // The real do-nothing guy
 6752 pipe_class empty()
 6753 %{
 6754     instruction_count(0);
 6755 %}
 6756 
 6757 // Define the class for the Nop node
 6758 define
 6759 %{
 6760    MachNop = empty;
 6761 %}
 6762 
 6763 %}
 6764 
 6765 //----------INSTRUCTIONS-------------------------------------------------------
 6766 //
 6767 // match      -- States which machine-independent subtree may be replaced
 6768 //               by this instruction.
 6769 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6770 //               selection to identify a minimum cost tree of machine
 6771 //               instructions that matches a tree of machine-independent
 6772 //               instructions.
 6773 // format     -- A string providing the disassembly for this instruction.
 6774 //               The value of an instruction's operand may be inserted
 6775 //               by referring to it with a '$' prefix.
 6776 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6777 //               to within an encode class as $primary, $secondary, and $tertiary
 6778 //               rrspectively.  The primary opcode is commonly used to
 6779 //               indicate the type of machine instruction, while secondary
 6780 //               and tertiary are often used for prefix options or addressing
 6781 //               modes.
 6782 // ins_encode -- A list of encode classes with parameters. The encode class
 6783 //               name must have been defined in an 'enc_class' specification
 6784 //               in the encode section of the architecture description.
 6785 
 6786 // ============================================================================
 6787 
 6788 instruct ShouldNotReachHere() %{
 6789   match(Halt);
 6790   format %{ "stop\t# ShouldNotReachHere" %}
 6791   ins_encode %{
 6792     if (is_reachable()) {
 6793       const char* str = __ code_string(_halt_reason);
 6794       __ stop(str);
 6795     }
 6796   %}
 6797   ins_pipe(pipe_slow);
 6798 %}
 6799 
 6800 // ============================================================================
 6801 
 6802 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6803 // Load Float
 6804 instruct MoveF2VL(vlRegF dst, regF src) %{
 6805   match(Set dst src);
 6806   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6807   ins_encode %{
 6808     ShouldNotReachHere();
 6809   %}
 6810   ins_pipe( fpu_reg_reg );
 6811 %}
 6812 
 6813 // Load Float
 6814 instruct MoveF2LEG(legRegF dst, regF src) %{
 6815   match(Set dst src);
 6816   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6817   ins_encode %{
 6818     ShouldNotReachHere();
 6819   %}
 6820   ins_pipe( fpu_reg_reg );
 6821 %}
 6822 
 6823 // Load Float
 6824 instruct MoveVL2F(regF dst, vlRegF src) %{
 6825   match(Set dst src);
 6826   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6827   ins_encode %{
 6828     ShouldNotReachHere();
 6829   %}
 6830   ins_pipe( fpu_reg_reg );
 6831 %}
 6832 
 6833 // Load Float
 6834 instruct MoveLEG2F(regF dst, legRegF src) %{
 6835   match(Set dst src);
 6836   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6837   ins_encode %{
 6838     ShouldNotReachHere();
 6839   %}
 6840   ins_pipe( fpu_reg_reg );
 6841 %}
 6842 
 6843 // Load Double
 6844 instruct MoveD2VL(vlRegD dst, regD src) %{
 6845   match(Set dst src);
 6846   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6847   ins_encode %{
 6848     ShouldNotReachHere();
 6849   %}
 6850   ins_pipe( fpu_reg_reg );
 6851 %}
 6852 
 6853 // Load Double
 6854 instruct MoveD2LEG(legRegD dst, regD src) %{
 6855   match(Set dst src);
 6856   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6857   ins_encode %{
 6858     ShouldNotReachHere();
 6859   %}
 6860   ins_pipe( fpu_reg_reg );
 6861 %}
 6862 
 6863 // Load Double
 6864 instruct MoveVL2D(regD dst, vlRegD src) %{
 6865   match(Set dst src);
 6866   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6867   ins_encode %{
 6868     ShouldNotReachHere();
 6869   %}
 6870   ins_pipe( fpu_reg_reg );
 6871 %}
 6872 
 6873 // Load Double
 6874 instruct MoveLEG2D(regD dst, legRegD src) %{
 6875   match(Set dst src);
 6876   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6877   ins_encode %{
 6878     ShouldNotReachHere();
 6879   %}
 6880   ins_pipe( fpu_reg_reg );
 6881 %}
 6882 
 6883 //----------Load/Store/Move Instructions---------------------------------------
 6884 //----------Load Instructions--------------------------------------------------
 6885 
 6886 // Load Byte (8 bit signed)
 6887 instruct loadB(rRegI dst, memory mem)
 6888 %{
 6889   match(Set dst (LoadB mem));
 6890 
 6891   ins_cost(125);
 6892   format %{ "movsbl  $dst, $mem\t# byte" %}
 6893 
 6894   ins_encode %{
 6895     __ movsbl($dst$$Register, $mem$$Address);
 6896   %}
 6897 
 6898   ins_pipe(ialu_reg_mem);
 6899 %}
 6900 
 6901 // Load Byte (8 bit signed) into Long Register
 6902 instruct loadB2L(rRegL dst, memory mem)
 6903 %{
 6904   match(Set dst (ConvI2L (LoadB mem)));
 6905 
 6906   ins_cost(125);
 6907   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6908 
 6909   ins_encode %{
 6910     __ movsbq($dst$$Register, $mem$$Address);
 6911   %}
 6912 
 6913   ins_pipe(ialu_reg_mem);
 6914 %}
 6915 
 6916 // Load Unsigned Byte (8 bit UNsigned)
 6917 instruct loadUB(rRegI dst, memory mem)
 6918 %{
 6919   match(Set dst (LoadUB mem));
 6920 
 6921   ins_cost(125);
 6922   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6923 
 6924   ins_encode %{
 6925     __ movzbl($dst$$Register, $mem$$Address);
 6926   %}
 6927 
 6928   ins_pipe(ialu_reg_mem);
 6929 %}
 6930 
 6931 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6932 instruct loadUB2L(rRegL dst, memory mem)
 6933 %{
 6934   match(Set dst (ConvI2L (LoadUB mem)));
 6935 
 6936   ins_cost(125);
 6937   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 6938 
 6939   ins_encode %{
 6940     __ movzbq($dst$$Register, $mem$$Address);
 6941   %}
 6942 
 6943   ins_pipe(ialu_reg_mem);
 6944 %}
 6945 
 6946 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 6947 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 6948   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 6949   effect(KILL cr);
 6950 
 6951   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 6952             "andl    $dst, right_n_bits($mask, 8)" %}
 6953   ins_encode %{
 6954     Register Rdst = $dst$$Register;
 6955     __ movzbq(Rdst, $mem$$Address);
 6956     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 6957   %}
 6958   ins_pipe(ialu_reg_mem);
 6959 %}
 6960 
 6961 // Load Short (16 bit signed)
 6962 instruct loadS(rRegI dst, memory mem)
 6963 %{
 6964   match(Set dst (LoadS mem));
 6965 
 6966   ins_cost(125);
 6967   format %{ "movswl $dst, $mem\t# short" %}
 6968 
 6969   ins_encode %{
 6970     __ movswl($dst$$Register, $mem$$Address);
 6971   %}
 6972 
 6973   ins_pipe(ialu_reg_mem);
 6974 %}
 6975 
 6976 // Load Short (16 bit signed) to Byte (8 bit signed)
 6977 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6978   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 6979 
 6980   ins_cost(125);
 6981   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 6982   ins_encode %{
 6983     __ movsbl($dst$$Register, $mem$$Address);
 6984   %}
 6985   ins_pipe(ialu_reg_mem);
 6986 %}
 6987 
 6988 // Load Short (16 bit signed) into Long Register
 6989 instruct loadS2L(rRegL dst, memory mem)
 6990 %{
 6991   match(Set dst (ConvI2L (LoadS mem)));
 6992 
 6993   ins_cost(125);
 6994   format %{ "movswq $dst, $mem\t# short -> long" %}
 6995 
 6996   ins_encode %{
 6997     __ movswq($dst$$Register, $mem$$Address);
 6998   %}
 6999 
 7000   ins_pipe(ialu_reg_mem);
 7001 %}
 7002 
 7003 // Load Unsigned Short/Char (16 bit UNsigned)
 7004 instruct loadUS(rRegI dst, memory mem)
 7005 %{
 7006   match(Set dst (LoadUS mem));
 7007 
 7008   ins_cost(125);
 7009   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 7010 
 7011   ins_encode %{
 7012     __ movzwl($dst$$Register, $mem$$Address);
 7013   %}
 7014 
 7015   ins_pipe(ialu_reg_mem);
 7016 %}
 7017 
 7018 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 7019 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7020   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 7021 
 7022   ins_cost(125);
 7023   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 7024   ins_encode %{
 7025     __ movsbl($dst$$Register, $mem$$Address);
 7026   %}
 7027   ins_pipe(ialu_reg_mem);
 7028 %}
 7029 
 7030 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 7031 instruct loadUS2L(rRegL dst, memory mem)
 7032 %{
 7033   match(Set dst (ConvI2L (LoadUS mem)));
 7034 
 7035   ins_cost(125);
 7036   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 7037 
 7038   ins_encode %{
 7039     __ movzwq($dst$$Register, $mem$$Address);
 7040   %}
 7041 
 7042   ins_pipe(ialu_reg_mem);
 7043 %}
 7044 
 7045 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 7046 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7047   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7048 
 7049   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 7050   ins_encode %{
 7051     __ movzbq($dst$$Register, $mem$$Address);
 7052   %}
 7053   ins_pipe(ialu_reg_mem);
 7054 %}
 7055 
 7056 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 7057 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7058   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7059   effect(KILL cr);
 7060 
 7061   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 7062             "andl    $dst, right_n_bits($mask, 16)" %}
 7063   ins_encode %{
 7064     Register Rdst = $dst$$Register;
 7065     __ movzwq(Rdst, $mem$$Address);
 7066     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 7067   %}
 7068   ins_pipe(ialu_reg_mem);
 7069 %}
 7070 
 7071 // Load Integer
 7072 instruct loadI(rRegI dst, memory mem)
 7073 %{
 7074   match(Set dst (LoadI mem));
 7075 
 7076   ins_cost(125);
 7077   format %{ "movl    $dst, $mem\t# int" %}
 7078 
 7079   ins_encode %{
 7080     __ movl($dst$$Register, $mem$$Address);
 7081   %}
 7082 
 7083   ins_pipe(ialu_reg_mem);
 7084 %}
 7085 
 7086 // Load Integer (32 bit signed) to Byte (8 bit signed)
 7087 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7088   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 7089 
 7090   ins_cost(125);
 7091   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 7092   ins_encode %{
 7093     __ movsbl($dst$$Register, $mem$$Address);
 7094   %}
 7095   ins_pipe(ialu_reg_mem);
 7096 %}
 7097 
 7098 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 7099 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 7100   match(Set dst (AndI (LoadI mem) mask));
 7101 
 7102   ins_cost(125);
 7103   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 7104   ins_encode %{
 7105     __ movzbl($dst$$Register, $mem$$Address);
 7106   %}
 7107   ins_pipe(ialu_reg_mem);
 7108 %}
 7109 
 7110 // Load Integer (32 bit signed) to Short (16 bit signed)
 7111 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 7112   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 7113 
 7114   ins_cost(125);
 7115   format %{ "movswl  $dst, $mem\t# int -> short" %}
 7116   ins_encode %{
 7117     __ movswl($dst$$Register, $mem$$Address);
 7118   %}
 7119   ins_pipe(ialu_reg_mem);
 7120 %}
 7121 
 7122 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 7123 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 7124   match(Set dst (AndI (LoadI mem) mask));
 7125 
 7126   ins_cost(125);
 7127   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7128   ins_encode %{
 7129     __ movzwl($dst$$Register, $mem$$Address);
 7130   %}
 7131   ins_pipe(ialu_reg_mem);
 7132 %}
 7133 
 7134 // Load Integer into Long Register
 7135 instruct loadI2L(rRegL dst, memory mem)
 7136 %{
 7137   match(Set dst (ConvI2L (LoadI mem)));
 7138 
 7139   ins_cost(125);
 7140   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7141 
 7142   ins_encode %{
 7143     __ movslq($dst$$Register, $mem$$Address);
 7144   %}
 7145 
 7146   ins_pipe(ialu_reg_mem);
 7147 %}
 7148 
 7149 // Load Integer with mask 0xFF into Long Register
 7150 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7151   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7152 
 7153   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7154   ins_encode %{
 7155     __ movzbq($dst$$Register, $mem$$Address);
 7156   %}
 7157   ins_pipe(ialu_reg_mem);
 7158 %}
 7159 
 7160 // Load Integer with mask 0xFFFF into Long Register
 7161 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7162   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7163 
 7164   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7165   ins_encode %{
 7166     __ movzwq($dst$$Register, $mem$$Address);
 7167   %}
 7168   ins_pipe(ialu_reg_mem);
 7169 %}
 7170 
 7171 // Load Integer with a 31-bit mask into Long Register
 7172 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7173   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7174   effect(KILL cr);
 7175 
 7176   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7177             "andl    $dst, $mask" %}
 7178   ins_encode %{
 7179     Register Rdst = $dst$$Register;
 7180     __ movl(Rdst, $mem$$Address);
 7181     __ andl(Rdst, $mask$$constant);
 7182   %}
 7183   ins_pipe(ialu_reg_mem);
 7184 %}
 7185 
 7186 // Load Unsigned Integer into Long Register
 7187 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7188 %{
 7189   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7190 
 7191   ins_cost(125);
 7192   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7193 
 7194   ins_encode %{
 7195     __ movl($dst$$Register, $mem$$Address);
 7196   %}
 7197 
 7198   ins_pipe(ialu_reg_mem);
 7199 %}
 7200 
 7201 // Load Long
 7202 instruct loadL(rRegL dst, memory mem)
 7203 %{
 7204   match(Set dst (LoadL mem));
 7205 
 7206   ins_cost(125);
 7207   format %{ "movq    $dst, $mem\t# long" %}
 7208 
 7209   ins_encode %{
 7210     __ movq($dst$$Register, $mem$$Address);
 7211   %}
 7212 
 7213   ins_pipe(ialu_reg_mem); // XXX
 7214 %}
 7215 
 7216 // Load Range
 7217 instruct loadRange(rRegI dst, memory mem)
 7218 %{
 7219   match(Set dst (LoadRange mem));
 7220 
 7221   ins_cost(125); // XXX
 7222   format %{ "movl    $dst, $mem\t# range" %}
 7223   ins_encode %{
 7224     __ movl($dst$$Register, $mem$$Address);
 7225   %}
 7226   ins_pipe(ialu_reg_mem);
 7227 %}
 7228 
 7229 // Load Pointer
 7230 instruct loadP(rRegP dst, memory mem)
 7231 %{
 7232   match(Set dst (LoadP mem));
 7233   predicate(n->as_Load()->barrier_data() == 0);
 7234 
 7235   ins_cost(125); // XXX
 7236   format %{ "movq    $dst, $mem\t# ptr" %}
 7237   ins_encode %{
 7238     __ movq($dst$$Register, $mem$$Address);
 7239   %}
 7240   ins_pipe(ialu_reg_mem); // XXX
 7241 %}
 7242 
 7243 // Load Compressed Pointer
 7244 instruct loadN(rRegN dst, memory mem)
 7245 %{
 7246    predicate(n->as_Load()->barrier_data() == 0);
 7247    match(Set dst (LoadN mem));
 7248 
 7249    ins_cost(125); // XXX
 7250    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7251    ins_encode %{
 7252      __ movl($dst$$Register, $mem$$Address);
 7253    %}
 7254    ins_pipe(ialu_reg_mem); // XXX
 7255 %}
 7256 
 7257 
 7258 // Load Klass Pointer
 7259 instruct loadKlass(rRegP dst, memory mem)
 7260 %{
 7261   match(Set dst (LoadKlass mem));
 7262 
 7263   ins_cost(125); // XXX
 7264   format %{ "movq    $dst, $mem\t# class" %}
 7265   ins_encode %{
 7266     __ movq($dst$$Register, $mem$$Address);
 7267   %}
 7268   ins_pipe(ialu_reg_mem); // XXX
 7269 %}
 7270 
 7271 // Load narrow Klass Pointer
 7272 instruct loadNKlass(rRegN dst, memory mem)
 7273 %{
 7274   predicate(!UseCompactObjectHeaders);
 7275   match(Set dst (LoadNKlass mem));
 7276 
 7277   ins_cost(125); // XXX
 7278   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7279   ins_encode %{
 7280     __ movl($dst$$Register, $mem$$Address);
 7281   %}
 7282   ins_pipe(ialu_reg_mem); // XXX
 7283 %}
 7284 
 7285 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7286 %{
 7287   predicate(UseCompactObjectHeaders);
 7288   match(Set dst (LoadNKlass mem));
 7289   effect(KILL cr);
 7290   ins_cost(125);
 7291   format %{
 7292     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7293     "shrl    $dst, markWord::klass_shift_at_offset"
 7294   %}
 7295   ins_encode %{
 7296     if (UseAPX) {
 7297       __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
 7298     }
 7299     else {
 7300       __ movl($dst$$Register, $mem$$Address);
 7301       __ shrl($dst$$Register, markWord::klass_shift_at_offset);
 7302     }
 7303   %}
 7304   ins_pipe(ialu_reg_mem);
 7305 %}
 7306 
 7307 // Load Float
 7308 instruct loadF(regF dst, memory mem)
 7309 %{
 7310   match(Set dst (LoadF mem));
 7311 
 7312   ins_cost(145); // XXX
 7313   format %{ "movss   $dst, $mem\t# float" %}
 7314   ins_encode %{
 7315     __ movflt($dst$$XMMRegister, $mem$$Address);
 7316   %}
 7317   ins_pipe(pipe_slow); // XXX
 7318 %}
 7319 
 7320 // Load Double
 7321 instruct loadD_partial(regD dst, memory mem)
 7322 %{
 7323   predicate(!UseXmmLoadAndClearUpper);
 7324   match(Set dst (LoadD mem));
 7325 
 7326   ins_cost(145); // XXX
 7327   format %{ "movlpd  $dst, $mem\t# double" %}
 7328   ins_encode %{
 7329     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7330   %}
 7331   ins_pipe(pipe_slow); // XXX
 7332 %}
 7333 
 7334 instruct loadD(regD dst, memory mem)
 7335 %{
 7336   predicate(UseXmmLoadAndClearUpper);
 7337   match(Set dst (LoadD mem));
 7338 
 7339   ins_cost(145); // XXX
 7340   format %{ "movsd   $dst, $mem\t# double" %}
 7341   ins_encode %{
 7342     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7343   %}
 7344   ins_pipe(pipe_slow); // XXX
 7345 %}
 7346 
 7347 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
 7348 %{
 7349   match(Set dst con);
 7350 
 7351   format %{ "leaq  $dst, $con\t# AOT Runtime Constants Address" %}
 7352 
 7353   ins_encode %{
 7354     __ load_aotrc_address($dst$$Register, (address)$con$$constant);
 7355   %}
 7356 
 7357   ins_pipe(ialu_reg_fat);
 7358 %}
 7359 
 7360 // max = java.lang.Math.max(float a, float b)
 7361 instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
 7362   predicate(VM_Version::supports_avx10_2());
 7363   match(Set dst (MaxF a b));
 7364   format %{ "maxF $dst, $a, $b" %}
 7365   ins_encode %{
 7366     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
 7367   %}
 7368   ins_pipe( pipe_slow );
 7369 %}
 7370 
 7371 // max = java.lang.Math.max(float a, float b)
 7372 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7373   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7374   match(Set dst (MaxF a b));
 7375   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7376   format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7377   ins_encode %{
 7378     __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7379   %}
 7380   ins_pipe( pipe_slow );
 7381 %}
 7382 
 7383 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7384   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7385   match(Set dst (MaxF a b));
 7386   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7387 
 7388   format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
 7389   ins_encode %{
 7390     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7391                     false /*min*/, true /*single*/);
 7392   %}
 7393   ins_pipe( pipe_slow );
 7394 %}
 7395 
 7396 // max = java.lang.Math.max(double a, double b)
 7397 instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
 7398   predicate(VM_Version::supports_avx10_2());
 7399   match(Set dst (MaxD a b));
 7400   format %{ "maxD $dst, $a, $b" %}
 7401   ins_encode %{
 7402     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
 7403   %}
 7404   ins_pipe( pipe_slow );
 7405 %}
 7406 
 7407 // max = java.lang.Math.max(double a, double b)
 7408 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7409   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7410   match(Set dst (MaxD a b));
 7411   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7412   format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7413   ins_encode %{
 7414     __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7415   %}
 7416   ins_pipe( pipe_slow );
 7417 %}
 7418 
 7419 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7420   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7421   match(Set dst (MaxD a b));
 7422   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7423 
 7424   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7425   ins_encode %{
 7426     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7427                     false /*min*/, false /*single*/);
 7428   %}
 7429   ins_pipe( pipe_slow );
 7430 %}
 7431 
 7432 // max = java.lang.Math.min(float a, float b)
 7433 instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
 7434   predicate(VM_Version::supports_avx10_2());
 7435   match(Set dst (MinF a b));
 7436   format %{ "minF $dst, $a, $b" %}
 7437   ins_encode %{
 7438     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
 7439   %}
 7440   ins_pipe( pipe_slow );
 7441 %}
 7442 
 7443 // min = java.lang.Math.min(float a, float b)
 7444 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7445   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7446   match(Set dst (MinF a b));
 7447   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7448   format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7449   ins_encode %{
 7450     __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7451   %}
 7452   ins_pipe( pipe_slow );
 7453 %}
 7454 
 7455 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7456   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7457   match(Set dst (MinF a b));
 7458   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7459 
 7460   format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7461   ins_encode %{
 7462     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7463                     true /*min*/, true /*single*/);
 7464   %}
 7465   ins_pipe( pipe_slow );
 7466 %}
 7467 
 7468 // max = java.lang.Math.min(double a, double b)
 7469 instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
 7470   predicate(VM_Version::supports_avx10_2());
 7471   match(Set dst (MinD a b));
 7472   format %{ "minD $dst, $a, $b" %}
 7473   ins_encode %{
 7474     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
 7475   %}
 7476   ins_pipe( pipe_slow );
 7477 %}
 7478 
 7479 // min = java.lang.Math.min(double a, double b)
 7480 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7481   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7482   match(Set dst (MinD a b));
 7483   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7484     format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7485   ins_encode %{
 7486     __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7487   %}
 7488   ins_pipe( pipe_slow );
 7489 %}
 7490 
 7491 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7492   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7493   match(Set dst (MinD a b));
 7494   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7495 
 7496   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7497   ins_encode %{
 7498     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7499                     true /*min*/, false /*single*/);
 7500   %}
 7501   ins_pipe( pipe_slow );
 7502 %}
 7503 
 7504 // Load Effective Address
 7505 instruct leaP8(rRegP dst, indOffset8 mem)
 7506 %{
 7507   match(Set dst mem);
 7508 
 7509   ins_cost(110); // XXX
 7510   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7511   ins_encode %{
 7512     __ leaq($dst$$Register, $mem$$Address);
 7513   %}
 7514   ins_pipe(ialu_reg_reg_fat);
 7515 %}
 7516 
 7517 instruct leaP32(rRegP dst, indOffset32 mem)
 7518 %{
 7519   match(Set dst mem);
 7520 
 7521   ins_cost(110);
 7522   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7523   ins_encode %{
 7524     __ leaq($dst$$Register, $mem$$Address);
 7525   %}
 7526   ins_pipe(ialu_reg_reg_fat);
 7527 %}
 7528 
 7529 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7530 %{
 7531   match(Set dst mem);
 7532 
 7533   ins_cost(110);
 7534   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7535   ins_encode %{
 7536     __ leaq($dst$$Register, $mem$$Address);
 7537   %}
 7538   ins_pipe(ialu_reg_reg_fat);
 7539 %}
 7540 
 7541 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7542 %{
 7543   match(Set dst mem);
 7544 
 7545   ins_cost(110);
 7546   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7547   ins_encode %{
 7548     __ leaq($dst$$Register, $mem$$Address);
 7549   %}
 7550   ins_pipe(ialu_reg_reg_fat);
 7551 %}
 7552 
 7553 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7554 %{
 7555   match(Set dst mem);
 7556 
 7557   ins_cost(110);
 7558   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7559   ins_encode %{
 7560     __ leaq($dst$$Register, $mem$$Address);
 7561   %}
 7562   ins_pipe(ialu_reg_reg_fat);
 7563 %}
 7564 
 7565 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7566 %{
 7567   match(Set dst mem);
 7568 
 7569   ins_cost(110);
 7570   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7571   ins_encode %{
 7572     __ leaq($dst$$Register, $mem$$Address);
 7573   %}
 7574   ins_pipe(ialu_reg_reg_fat);
 7575 %}
 7576 
 7577 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7578 %{
 7579   match(Set dst mem);
 7580 
 7581   ins_cost(110);
 7582   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7583   ins_encode %{
 7584     __ leaq($dst$$Register, $mem$$Address);
 7585   %}
 7586   ins_pipe(ialu_reg_reg_fat);
 7587 %}
 7588 
 7589 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7590 %{
 7591   match(Set dst mem);
 7592 
 7593   ins_cost(110);
 7594   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7595   ins_encode %{
 7596     __ leaq($dst$$Register, $mem$$Address);
 7597   %}
 7598   ins_pipe(ialu_reg_reg_fat);
 7599 %}
 7600 
 7601 // Load Effective Address which uses Narrow (32-bits) oop
 7602 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7603 %{
 7604   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7605   match(Set dst mem);
 7606 
 7607   ins_cost(110);
 7608   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7609   ins_encode %{
 7610     __ leaq($dst$$Register, $mem$$Address);
 7611   %}
 7612   ins_pipe(ialu_reg_reg_fat);
 7613 %}
 7614 
 7615 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7616 %{
 7617   predicate(CompressedOops::shift() == 0);
 7618   match(Set dst mem);
 7619 
 7620   ins_cost(110); // XXX
 7621   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7622   ins_encode %{
 7623     __ leaq($dst$$Register, $mem$$Address);
 7624   %}
 7625   ins_pipe(ialu_reg_reg_fat);
 7626 %}
 7627 
 7628 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7629 %{
 7630   predicate(CompressedOops::shift() == 0);
 7631   match(Set dst mem);
 7632 
 7633   ins_cost(110);
 7634   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7635   ins_encode %{
 7636     __ leaq($dst$$Register, $mem$$Address);
 7637   %}
 7638   ins_pipe(ialu_reg_reg_fat);
 7639 %}
 7640 
 7641 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7642 %{
 7643   predicate(CompressedOops::shift() == 0);
 7644   match(Set dst mem);
 7645 
 7646   ins_cost(110);
 7647   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7648   ins_encode %{
 7649     __ leaq($dst$$Register, $mem$$Address);
 7650   %}
 7651   ins_pipe(ialu_reg_reg_fat);
 7652 %}
 7653 
 7654 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7655 %{
 7656   predicate(CompressedOops::shift() == 0);
 7657   match(Set dst mem);
 7658 
 7659   ins_cost(110);
 7660   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7661   ins_encode %{
 7662     __ leaq($dst$$Register, $mem$$Address);
 7663   %}
 7664   ins_pipe(ialu_reg_reg_fat);
 7665 %}
 7666 
 7667 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7668 %{
 7669   predicate(CompressedOops::shift() == 0);
 7670   match(Set dst mem);
 7671 
 7672   ins_cost(110);
 7673   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7674   ins_encode %{
 7675     __ leaq($dst$$Register, $mem$$Address);
 7676   %}
 7677   ins_pipe(ialu_reg_reg_fat);
 7678 %}
 7679 
 7680 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7681 %{
 7682   predicate(CompressedOops::shift() == 0);
 7683   match(Set dst mem);
 7684 
 7685   ins_cost(110);
 7686   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7687   ins_encode %{
 7688     __ leaq($dst$$Register, $mem$$Address);
 7689   %}
 7690   ins_pipe(ialu_reg_reg_fat);
 7691 %}
 7692 
 7693 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7694 %{
 7695   predicate(CompressedOops::shift() == 0);
 7696   match(Set dst mem);
 7697 
 7698   ins_cost(110);
 7699   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7700   ins_encode %{
 7701     __ leaq($dst$$Register, $mem$$Address);
 7702   %}
 7703   ins_pipe(ialu_reg_reg_fat);
 7704 %}
 7705 
 7706 instruct loadConI(rRegI dst, immI src)
 7707 %{
 7708   match(Set dst src);
 7709 
 7710   format %{ "movl    $dst, $src\t# int" %}
 7711   ins_encode %{
 7712     __ movl($dst$$Register, $src$$constant);
 7713   %}
 7714   ins_pipe(ialu_reg_fat); // XXX
 7715 %}
 7716 
 7717 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7718 %{
 7719   match(Set dst src);
 7720   effect(KILL cr);
 7721 
 7722   ins_cost(50);
 7723   format %{ "xorl    $dst, $dst\t# int" %}
 7724   ins_encode %{
 7725     __ xorl($dst$$Register, $dst$$Register);
 7726   %}
 7727   ins_pipe(ialu_reg);
 7728 %}
 7729 
 7730 instruct loadConL(rRegL dst, immL src)
 7731 %{
 7732   match(Set dst src);
 7733 
 7734   ins_cost(150);
 7735   format %{ "movq    $dst, $src\t# long" %}
 7736   ins_encode %{
 7737     __ mov64($dst$$Register, $src$$constant);
 7738   %}
 7739   ins_pipe(ialu_reg);
 7740 %}
 7741 
 7742 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7743 %{
 7744   match(Set dst src);
 7745   effect(KILL cr);
 7746 
 7747   ins_cost(50);
 7748   format %{ "xorl    $dst, $dst\t# long" %}
 7749   ins_encode %{
 7750     __ xorl($dst$$Register, $dst$$Register);
 7751   %}
 7752   ins_pipe(ialu_reg); // XXX
 7753 %}
 7754 
 7755 instruct loadConUL32(rRegL dst, immUL32 src)
 7756 %{
 7757   match(Set dst src);
 7758 
 7759   ins_cost(60);
 7760   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7761   ins_encode %{
 7762     __ movl($dst$$Register, $src$$constant);
 7763   %}
 7764   ins_pipe(ialu_reg);
 7765 %}
 7766 
 7767 instruct loadConL32(rRegL dst, immL32 src)
 7768 %{
 7769   match(Set dst src);
 7770 
 7771   ins_cost(70);
 7772   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7773   ins_encode %{
 7774     __ movq($dst$$Register, $src$$constant);
 7775   %}
 7776   ins_pipe(ialu_reg);
 7777 %}
 7778 
 7779 instruct loadConP(rRegP dst, immP con) %{
 7780   match(Set dst con);
 7781 
 7782   format %{ "movq    $dst, $con\t# ptr" %}
 7783   ins_encode %{
 7784     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7785   %}
 7786   ins_pipe(ialu_reg_fat); // XXX
 7787 %}
 7788 
 7789 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7790 %{
 7791   match(Set dst src);
 7792   effect(KILL cr);
 7793 
 7794   ins_cost(50);
 7795   format %{ "xorl    $dst, $dst\t# ptr" %}
 7796   ins_encode %{
 7797     __ xorl($dst$$Register, $dst$$Register);
 7798   %}
 7799   ins_pipe(ialu_reg);
 7800 %}
 7801 
 7802 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7803 %{
 7804   match(Set dst src);
 7805   effect(KILL cr);
 7806 
 7807   ins_cost(60);
 7808   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7809   ins_encode %{
 7810     __ movl($dst$$Register, $src$$constant);
 7811   %}
 7812   ins_pipe(ialu_reg);
 7813 %}
 7814 
 7815 instruct loadConF(regF dst, immF con) %{
 7816   match(Set dst con);
 7817   ins_cost(125);
 7818   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7819   ins_encode %{
 7820     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7821   %}
 7822   ins_pipe(pipe_slow);
 7823 %}
 7824 
 7825 instruct loadConH(regF dst, immH con) %{
 7826   match(Set dst con);
 7827   ins_cost(125);
 7828   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7829   ins_encode %{
 7830     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7831   %}
 7832   ins_pipe(pipe_slow);
 7833 %}
 7834 
 7835 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7836   match(Set dst src);
 7837   effect(KILL cr);
 7838   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7839   ins_encode %{
 7840     __ xorq($dst$$Register, $dst$$Register);
 7841   %}
 7842   ins_pipe(ialu_reg);
 7843 %}
 7844 
 7845 instruct loadConN(rRegN dst, immN src) %{
 7846   match(Set dst src);
 7847 
 7848   ins_cost(125);
 7849   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7850   ins_encode %{
 7851     address con = (address)$src$$constant;
 7852     if (con == nullptr) {
 7853       ShouldNotReachHere();
 7854     } else {
 7855       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7856     }
 7857   %}
 7858   ins_pipe(ialu_reg_fat); // XXX
 7859 %}
 7860 
 7861 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7862   match(Set dst src);
 7863 
 7864   ins_cost(125);
 7865   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7866   ins_encode %{
 7867     address con = (address)$src$$constant;
 7868     if (con == nullptr) {
 7869       ShouldNotReachHere();
 7870     } else {
 7871       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7872     }
 7873   %}
 7874   ins_pipe(ialu_reg_fat); // XXX
 7875 %}
 7876 
 7877 instruct loadConF0(regF dst, immF0 src)
 7878 %{
 7879   match(Set dst src);
 7880   ins_cost(100);
 7881 
 7882   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7883   ins_encode %{
 7884     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7885   %}
 7886   ins_pipe(pipe_slow);
 7887 %}
 7888 
 7889 // Use the same format since predicate() can not be used here.
 7890 instruct loadConD(regD dst, immD con) %{
 7891   match(Set dst con);
 7892   ins_cost(125);
 7893   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7894   ins_encode %{
 7895     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7896   %}
 7897   ins_pipe(pipe_slow);
 7898 %}
 7899 
 7900 instruct loadConD0(regD dst, immD0 src)
 7901 %{
 7902   match(Set dst src);
 7903   ins_cost(100);
 7904 
 7905   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7906   ins_encode %{
 7907     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7908   %}
 7909   ins_pipe(pipe_slow);
 7910 %}
 7911 
 7912 instruct loadSSI(rRegI dst, stackSlotI src)
 7913 %{
 7914   match(Set dst src);
 7915 
 7916   ins_cost(125);
 7917   format %{ "movl    $dst, $src\t# int stk" %}
 7918   ins_encode %{
 7919     __ movl($dst$$Register, $src$$Address);
 7920   %}
 7921   ins_pipe(ialu_reg_mem);
 7922 %}
 7923 
 7924 instruct loadSSL(rRegL dst, stackSlotL src)
 7925 %{
 7926   match(Set dst src);
 7927 
 7928   ins_cost(125);
 7929   format %{ "movq    $dst, $src\t# long stk" %}
 7930   ins_encode %{
 7931     __ movq($dst$$Register, $src$$Address);
 7932   %}
 7933   ins_pipe(ialu_reg_mem);
 7934 %}
 7935 
 7936 instruct loadSSP(rRegP dst, stackSlotP src)
 7937 %{
 7938   match(Set dst src);
 7939 
 7940   ins_cost(125);
 7941   format %{ "movq    $dst, $src\t# ptr stk" %}
 7942   ins_encode %{
 7943     __ movq($dst$$Register, $src$$Address);
 7944   %}
 7945   ins_pipe(ialu_reg_mem);
 7946 %}
 7947 
 7948 instruct loadSSF(regF dst, stackSlotF src)
 7949 %{
 7950   match(Set dst src);
 7951 
 7952   ins_cost(125);
 7953   format %{ "movss   $dst, $src\t# float stk" %}
 7954   ins_encode %{
 7955     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 7956   %}
 7957   ins_pipe(pipe_slow); // XXX
 7958 %}
 7959 
 7960 // Use the same format since predicate() can not be used here.
 7961 instruct loadSSD(regD dst, stackSlotD src)
 7962 %{
 7963   match(Set dst src);
 7964 
 7965   ins_cost(125);
 7966   format %{ "movsd   $dst, $src\t# double stk" %}
 7967   ins_encode  %{
 7968     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 7969   %}
 7970   ins_pipe(pipe_slow); // XXX
 7971 %}
 7972 
 7973 // Prefetch instructions for allocation.
 7974 // Must be safe to execute with invalid address (cannot fault).
 7975 
 7976 instruct prefetchAlloc( memory mem ) %{
 7977   predicate(AllocatePrefetchInstr==3);
 7978   match(PrefetchAllocation mem);
 7979   ins_cost(125);
 7980 
 7981   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 7982   ins_encode %{
 7983     __ prefetchw($mem$$Address);
 7984   %}
 7985   ins_pipe(ialu_mem);
 7986 %}
 7987 
 7988 instruct prefetchAllocNTA( memory mem ) %{
 7989   predicate(AllocatePrefetchInstr==0);
 7990   match(PrefetchAllocation mem);
 7991   ins_cost(125);
 7992 
 7993   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 7994   ins_encode %{
 7995     __ prefetchnta($mem$$Address);
 7996   %}
 7997   ins_pipe(ialu_mem);
 7998 %}
 7999 
 8000 instruct prefetchAllocT0( memory mem ) %{
 8001   predicate(AllocatePrefetchInstr==1);
 8002   match(PrefetchAllocation mem);
 8003   ins_cost(125);
 8004 
 8005   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 8006   ins_encode %{
 8007     __ prefetcht0($mem$$Address);
 8008   %}
 8009   ins_pipe(ialu_mem);
 8010 %}
 8011 
 8012 instruct prefetchAllocT2( memory mem ) %{
 8013   predicate(AllocatePrefetchInstr==2);
 8014   match(PrefetchAllocation mem);
 8015   ins_cost(125);
 8016 
 8017   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 8018   ins_encode %{
 8019     __ prefetcht2($mem$$Address);
 8020   %}
 8021   ins_pipe(ialu_mem);
 8022 %}
 8023 
 8024 //----------Store Instructions-------------------------------------------------
 8025 
 8026 // Store Byte
 8027 instruct storeB(memory mem, rRegI src)
 8028 %{
 8029   match(Set mem (StoreB mem src));
 8030 
 8031   ins_cost(125); // XXX
 8032   format %{ "movb    $mem, $src\t# byte" %}
 8033   ins_encode %{
 8034     __ movb($mem$$Address, $src$$Register);
 8035   %}
 8036   ins_pipe(ialu_mem_reg);
 8037 %}
 8038 
 8039 // Store Char/Short
 8040 instruct storeC(memory mem, rRegI src)
 8041 %{
 8042   match(Set mem (StoreC mem src));
 8043 
 8044   ins_cost(125); // XXX
 8045   format %{ "movw    $mem, $src\t# char/short" %}
 8046   ins_encode %{
 8047     __ movw($mem$$Address, $src$$Register);
 8048   %}
 8049   ins_pipe(ialu_mem_reg);
 8050 %}
 8051 
 8052 // Store Integer
 8053 instruct storeI(memory mem, rRegI src)
 8054 %{
 8055   match(Set mem (StoreI mem src));
 8056 
 8057   ins_cost(125); // XXX
 8058   format %{ "movl    $mem, $src\t# int" %}
 8059   ins_encode %{
 8060     __ movl($mem$$Address, $src$$Register);
 8061   %}
 8062   ins_pipe(ialu_mem_reg);
 8063 %}
 8064 
 8065 // Store Long
 8066 instruct storeL(memory mem, rRegL src)
 8067 %{
 8068   match(Set mem (StoreL mem src));
 8069 
 8070   ins_cost(125); // XXX
 8071   format %{ "movq    $mem, $src\t# long" %}
 8072   ins_encode %{
 8073     __ movq($mem$$Address, $src$$Register);
 8074   %}
 8075   ins_pipe(ialu_mem_reg); // XXX
 8076 %}
 8077 
 8078 // Store Pointer
 8079 instruct storeP(memory mem, any_RegP src)
 8080 %{
 8081   predicate(n->as_Store()->barrier_data() == 0);
 8082   match(Set mem (StoreP mem src));
 8083 
 8084   ins_cost(125); // XXX
 8085   format %{ "movq    $mem, $src\t# ptr" %}
 8086   ins_encode %{
 8087     __ movq($mem$$Address, $src$$Register);
 8088   %}
 8089   ins_pipe(ialu_mem_reg);
 8090 %}
 8091 
 8092 instruct storeImmP0(memory mem, immP0 zero)
 8093 %{
 8094   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 8095   match(Set mem (StoreP mem zero));
 8096 
 8097   ins_cost(125); // XXX
 8098   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 8099   ins_encode %{
 8100     __ movq($mem$$Address, r12);
 8101   %}
 8102   ins_pipe(ialu_mem_reg);
 8103 %}
 8104 
 8105 // Store Null Pointer, mark word, or other simple pointer constant.
 8106 instruct storeImmP(memory mem, immP31 src)
 8107 %{
 8108   predicate(n->as_Store()->barrier_data() == 0);
 8109   match(Set mem (StoreP mem src));
 8110 
 8111   ins_cost(150); // XXX
 8112   format %{ "movq    $mem, $src\t# ptr" %}
 8113   ins_encode %{
 8114     __ movq($mem$$Address, $src$$constant);
 8115   %}
 8116   ins_pipe(ialu_mem_imm);
 8117 %}
 8118 
 8119 // Store Compressed Pointer
 8120 instruct storeN(memory mem, rRegN src)
 8121 %{
 8122   predicate(n->as_Store()->barrier_data() == 0);
 8123   match(Set mem (StoreN mem src));
 8124 
 8125   ins_cost(125); // XXX
 8126   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8127   ins_encode %{
 8128     __ movl($mem$$Address, $src$$Register);
 8129   %}
 8130   ins_pipe(ialu_mem_reg);
 8131 %}
 8132 
 8133 instruct storeNKlass(memory mem, rRegN src)
 8134 %{
 8135   match(Set mem (StoreNKlass mem src));
 8136 
 8137   ins_cost(125); // XXX
 8138   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8139   ins_encode %{
 8140     __ movl($mem$$Address, $src$$Register);
 8141   %}
 8142   ins_pipe(ialu_mem_reg);
 8143 %}
 8144 
 8145 instruct storeImmN0(memory mem, immN0 zero)
 8146 %{
 8147   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8148   match(Set mem (StoreN mem zero));
 8149 
 8150   ins_cost(125); // XXX
 8151   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8152   ins_encode %{
 8153     __ movl($mem$$Address, r12);
 8154   %}
 8155   ins_pipe(ialu_mem_reg);
 8156 %}
 8157 
 8158 instruct storeImmN(memory mem, immN src)
 8159 %{
 8160   predicate(n->as_Store()->barrier_data() == 0);
 8161   match(Set mem (StoreN mem src));
 8162 
 8163   ins_cost(150); // XXX
 8164   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8165   ins_encode %{
 8166     address con = (address)$src$$constant;
 8167     if (con == nullptr) {
 8168       __ movl($mem$$Address, 0);
 8169     } else {
 8170       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8171     }
 8172   %}
 8173   ins_pipe(ialu_mem_imm);
 8174 %}
 8175 
 8176 instruct storeImmNKlass(memory mem, immNKlass src)
 8177 %{
 8178   match(Set mem (StoreNKlass mem src));
 8179 
 8180   ins_cost(150); // XXX
 8181   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8182   ins_encode %{
 8183     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8184   %}
 8185   ins_pipe(ialu_mem_imm);
 8186 %}
 8187 
 8188 // Store Integer Immediate
 8189 instruct storeImmI0(memory mem, immI_0 zero)
 8190 %{
 8191   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8192   match(Set mem (StoreI mem zero));
 8193 
 8194   ins_cost(125); // XXX
 8195   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8196   ins_encode %{
 8197     __ movl($mem$$Address, r12);
 8198   %}
 8199   ins_pipe(ialu_mem_reg);
 8200 %}
 8201 
 8202 instruct storeImmI(memory mem, immI src)
 8203 %{
 8204   match(Set mem (StoreI mem src));
 8205 
 8206   ins_cost(150);
 8207   format %{ "movl    $mem, $src\t# int" %}
 8208   ins_encode %{
 8209     __ movl($mem$$Address, $src$$constant);
 8210   %}
 8211   ins_pipe(ialu_mem_imm);
 8212 %}
 8213 
 8214 // Store Long Immediate
 8215 instruct storeImmL0(memory mem, immL0 zero)
 8216 %{
 8217   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8218   match(Set mem (StoreL mem zero));
 8219 
 8220   ins_cost(125); // XXX
 8221   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8222   ins_encode %{
 8223     __ movq($mem$$Address, r12);
 8224   %}
 8225   ins_pipe(ialu_mem_reg);
 8226 %}
 8227 
 8228 instruct storeImmL(memory mem, immL32 src)
 8229 %{
 8230   match(Set mem (StoreL mem src));
 8231 
 8232   ins_cost(150);
 8233   format %{ "movq    $mem, $src\t# long" %}
 8234   ins_encode %{
 8235     __ movq($mem$$Address, $src$$constant);
 8236   %}
 8237   ins_pipe(ialu_mem_imm);
 8238 %}
 8239 
 8240 // Store Short/Char Immediate
 8241 instruct storeImmC0(memory mem, immI_0 zero)
 8242 %{
 8243   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8244   match(Set mem (StoreC mem zero));
 8245 
 8246   ins_cost(125); // XXX
 8247   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8248   ins_encode %{
 8249     __ movw($mem$$Address, r12);
 8250   %}
 8251   ins_pipe(ialu_mem_reg);
 8252 %}
 8253 
 8254 instruct storeImmI16(memory mem, immI16 src)
 8255 %{
 8256   predicate(UseStoreImmI16);
 8257   match(Set mem (StoreC mem src));
 8258 
 8259   ins_cost(150);
 8260   format %{ "movw    $mem, $src\t# short/char" %}
 8261   ins_encode %{
 8262     __ movw($mem$$Address, $src$$constant);
 8263   %}
 8264   ins_pipe(ialu_mem_imm);
 8265 %}
 8266 
 8267 // Store Byte Immediate
 8268 instruct storeImmB0(memory mem, immI_0 zero)
 8269 %{
 8270   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8271   match(Set mem (StoreB mem zero));
 8272 
 8273   ins_cost(125); // XXX
 8274   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8275   ins_encode %{
 8276     __ movb($mem$$Address, r12);
 8277   %}
 8278   ins_pipe(ialu_mem_reg);
 8279 %}
 8280 
 8281 instruct storeImmB(memory mem, immI8 src)
 8282 %{
 8283   match(Set mem (StoreB mem src));
 8284 
 8285   ins_cost(150); // XXX
 8286   format %{ "movb    $mem, $src\t# byte" %}
 8287   ins_encode %{
 8288     __ movb($mem$$Address, $src$$constant);
 8289   %}
 8290   ins_pipe(ialu_mem_imm);
 8291 %}
 8292 
 8293 // Store Float
 8294 instruct storeF(memory mem, regF src)
 8295 %{
 8296   match(Set mem (StoreF mem src));
 8297 
 8298   ins_cost(95); // XXX
 8299   format %{ "movss   $mem, $src\t# float" %}
 8300   ins_encode %{
 8301     __ movflt($mem$$Address, $src$$XMMRegister);
 8302   %}
 8303   ins_pipe(pipe_slow); // XXX
 8304 %}
 8305 
 8306 // Store immediate Float value (it is faster than store from XMM register)
 8307 instruct storeF0(memory mem, immF0 zero)
 8308 %{
 8309   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8310   match(Set mem (StoreF mem zero));
 8311 
 8312   ins_cost(25); // XXX
 8313   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8314   ins_encode %{
 8315     __ movl($mem$$Address, r12);
 8316   %}
 8317   ins_pipe(ialu_mem_reg);
 8318 %}
 8319 
 8320 instruct storeF_imm(memory mem, immF src)
 8321 %{
 8322   match(Set mem (StoreF mem src));
 8323 
 8324   ins_cost(50);
 8325   format %{ "movl    $mem, $src\t# float" %}
 8326   ins_encode %{
 8327     __ movl($mem$$Address, jint_cast($src$$constant));
 8328   %}
 8329   ins_pipe(ialu_mem_imm);
 8330 %}
 8331 
 8332 // Store Double
 8333 instruct storeD(memory mem, regD src)
 8334 %{
 8335   match(Set mem (StoreD mem src));
 8336 
 8337   ins_cost(95); // XXX
 8338   format %{ "movsd   $mem, $src\t# double" %}
 8339   ins_encode %{
 8340     __ movdbl($mem$$Address, $src$$XMMRegister);
 8341   %}
 8342   ins_pipe(pipe_slow); // XXX
 8343 %}
 8344 
 8345 // Store immediate double 0.0 (it is faster than store from XMM register)
 8346 instruct storeD0_imm(memory mem, immD0 src)
 8347 %{
 8348   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8349   match(Set mem (StoreD mem src));
 8350 
 8351   ins_cost(50);
 8352   format %{ "movq    $mem, $src\t# double 0." %}
 8353   ins_encode %{
 8354     __ movq($mem$$Address, $src$$constant);
 8355   %}
 8356   ins_pipe(ialu_mem_imm);
 8357 %}
 8358 
 8359 instruct storeD0(memory mem, immD0 zero)
 8360 %{
 8361   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8362   match(Set mem (StoreD mem zero));
 8363 
 8364   ins_cost(25); // XXX
 8365   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8366   ins_encode %{
 8367     __ movq($mem$$Address, r12);
 8368   %}
 8369   ins_pipe(ialu_mem_reg);
 8370 %}
 8371 
 8372 instruct storeSSI(stackSlotI dst, rRegI src)
 8373 %{
 8374   match(Set dst src);
 8375 
 8376   ins_cost(100);
 8377   format %{ "movl    $dst, $src\t# int stk" %}
 8378   ins_encode %{
 8379     __ movl($dst$$Address, $src$$Register);
 8380   %}
 8381   ins_pipe( ialu_mem_reg );
 8382 %}
 8383 
 8384 instruct storeSSL(stackSlotL dst, rRegL src)
 8385 %{
 8386   match(Set dst src);
 8387 
 8388   ins_cost(100);
 8389   format %{ "movq    $dst, $src\t# long stk" %}
 8390   ins_encode %{
 8391     __ movq($dst$$Address, $src$$Register);
 8392   %}
 8393   ins_pipe(ialu_mem_reg);
 8394 %}
 8395 
 8396 instruct storeSSP(stackSlotP dst, rRegP src)
 8397 %{
 8398   match(Set dst src);
 8399 
 8400   ins_cost(100);
 8401   format %{ "movq    $dst, $src\t# ptr stk" %}
 8402   ins_encode %{
 8403     __ movq($dst$$Address, $src$$Register);
 8404   %}
 8405   ins_pipe(ialu_mem_reg);
 8406 %}
 8407 
 8408 instruct storeSSF(stackSlotF dst, regF src)
 8409 %{
 8410   match(Set dst src);
 8411 
 8412   ins_cost(95); // XXX
 8413   format %{ "movss   $dst, $src\t# float stk" %}
 8414   ins_encode %{
 8415     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8416   %}
 8417   ins_pipe(pipe_slow); // XXX
 8418 %}
 8419 
 8420 instruct storeSSD(stackSlotD dst, regD src)
 8421 %{
 8422   match(Set dst src);
 8423 
 8424   ins_cost(95); // XXX
 8425   format %{ "movsd   $dst, $src\t# double stk" %}
 8426   ins_encode %{
 8427     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8428   %}
 8429   ins_pipe(pipe_slow); // XXX
 8430 %}
 8431 
 8432 instruct cacheWB(indirect addr)
 8433 %{
 8434   predicate(VM_Version::supports_data_cache_line_flush());
 8435   match(CacheWB addr);
 8436 
 8437   ins_cost(100);
 8438   format %{"cache wb $addr" %}
 8439   ins_encode %{
 8440     assert($addr->index_position() < 0, "should be");
 8441     assert($addr$$disp == 0, "should be");
 8442     __ cache_wb(Address($addr$$base$$Register, 0));
 8443   %}
 8444   ins_pipe(pipe_slow); // XXX
 8445 %}
 8446 
 8447 instruct cacheWBPreSync()
 8448 %{
 8449   predicate(VM_Version::supports_data_cache_line_flush());
 8450   match(CacheWBPreSync);
 8451 
 8452   ins_cost(100);
 8453   format %{"cache wb presync" %}
 8454   ins_encode %{
 8455     __ cache_wbsync(true);
 8456   %}
 8457   ins_pipe(pipe_slow); // XXX
 8458 %}
 8459 
 8460 instruct cacheWBPostSync()
 8461 %{
 8462   predicate(VM_Version::supports_data_cache_line_flush());
 8463   match(CacheWBPostSync);
 8464 
 8465   ins_cost(100);
 8466   format %{"cache wb postsync" %}
 8467   ins_encode %{
 8468     __ cache_wbsync(false);
 8469   %}
 8470   ins_pipe(pipe_slow); // XXX
 8471 %}
 8472 
 8473 //----------BSWAP Instructions-------------------------------------------------
 8474 instruct bytes_reverse_int(rRegI dst) %{
 8475   match(Set dst (ReverseBytesI dst));
 8476 
 8477   format %{ "bswapl  $dst" %}
 8478   ins_encode %{
 8479     __ bswapl($dst$$Register);
 8480   %}
 8481   ins_pipe( ialu_reg );
 8482 %}
 8483 
 8484 instruct bytes_reverse_long(rRegL dst) %{
 8485   match(Set dst (ReverseBytesL dst));
 8486 
 8487   format %{ "bswapq  $dst" %}
 8488   ins_encode %{
 8489     __ bswapq($dst$$Register);
 8490   %}
 8491   ins_pipe( ialu_reg);
 8492 %}
 8493 
 8494 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8495   match(Set dst (ReverseBytesUS dst));
 8496   effect(KILL cr);
 8497 
 8498   format %{ "bswapl  $dst\n\t"
 8499             "shrl    $dst,16\n\t" %}
 8500   ins_encode %{
 8501     __ bswapl($dst$$Register);
 8502     __ shrl($dst$$Register, 16);
 8503   %}
 8504   ins_pipe( ialu_reg );
 8505 %}
 8506 
 8507 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8508   match(Set dst (ReverseBytesS dst));
 8509   effect(KILL cr);
 8510 
 8511   format %{ "bswapl  $dst\n\t"
 8512             "sar     $dst,16\n\t" %}
 8513   ins_encode %{
 8514     __ bswapl($dst$$Register);
 8515     __ sarl($dst$$Register, 16);
 8516   %}
 8517   ins_pipe( ialu_reg );
 8518 %}
 8519 
 8520 //---------- Zeros Count Instructions ------------------------------------------
 8521 
 8522 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8523   predicate(UseCountLeadingZerosInstruction);
 8524   match(Set dst (CountLeadingZerosI src));
 8525   effect(KILL cr);
 8526 
 8527   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8528   ins_encode %{
 8529     __ lzcntl($dst$$Register, $src$$Register);
 8530   %}
 8531   ins_pipe(ialu_reg);
 8532 %}
 8533 
 8534 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8535   predicate(UseCountLeadingZerosInstruction);
 8536   match(Set dst (CountLeadingZerosI (LoadI src)));
 8537   effect(KILL cr);
 8538   ins_cost(175);
 8539   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8540   ins_encode %{
 8541     __ lzcntl($dst$$Register, $src$$Address);
 8542   %}
 8543   ins_pipe(ialu_reg_mem);
 8544 %}
 8545 
 8546 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8547   predicate(!UseCountLeadingZerosInstruction);
 8548   match(Set dst (CountLeadingZerosI src));
 8549   effect(KILL cr);
 8550 
 8551   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8552             "jnz     skip\n\t"
 8553             "movl    $dst, -1\n"
 8554       "skip:\n\t"
 8555             "negl    $dst\n\t"
 8556             "addl    $dst, 31" %}
 8557   ins_encode %{
 8558     Register Rdst = $dst$$Register;
 8559     Register Rsrc = $src$$Register;
 8560     Label skip;
 8561     __ bsrl(Rdst, Rsrc);
 8562     __ jccb(Assembler::notZero, skip);
 8563     __ movl(Rdst, -1);
 8564     __ bind(skip);
 8565     __ negl(Rdst);
 8566     __ addl(Rdst, BitsPerInt - 1);
 8567   %}
 8568   ins_pipe(ialu_reg);
 8569 %}
 8570 
 8571 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8572   predicate(UseCountLeadingZerosInstruction);
 8573   match(Set dst (CountLeadingZerosL src));
 8574   effect(KILL cr);
 8575 
 8576   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8577   ins_encode %{
 8578     __ lzcntq($dst$$Register, $src$$Register);
 8579   %}
 8580   ins_pipe(ialu_reg);
 8581 %}
 8582 
 8583 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8584   predicate(UseCountLeadingZerosInstruction);
 8585   match(Set dst (CountLeadingZerosL (LoadL src)));
 8586   effect(KILL cr);
 8587   ins_cost(175);
 8588   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8589   ins_encode %{
 8590     __ lzcntq($dst$$Register, $src$$Address);
 8591   %}
 8592   ins_pipe(ialu_reg_mem);
 8593 %}
 8594 
 8595 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8596   predicate(!UseCountLeadingZerosInstruction);
 8597   match(Set dst (CountLeadingZerosL src));
 8598   effect(KILL cr);
 8599 
 8600   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8601             "jnz     skip\n\t"
 8602             "movl    $dst, -1\n"
 8603       "skip:\n\t"
 8604             "negl    $dst\n\t"
 8605             "addl    $dst, 63" %}
 8606   ins_encode %{
 8607     Register Rdst = $dst$$Register;
 8608     Register Rsrc = $src$$Register;
 8609     Label skip;
 8610     __ bsrq(Rdst, Rsrc);
 8611     __ jccb(Assembler::notZero, skip);
 8612     __ movl(Rdst, -1);
 8613     __ bind(skip);
 8614     __ negl(Rdst);
 8615     __ addl(Rdst, BitsPerLong - 1);
 8616   %}
 8617   ins_pipe(ialu_reg);
 8618 %}
 8619 
 8620 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8621   predicate(UseCountTrailingZerosInstruction);
 8622   match(Set dst (CountTrailingZerosI src));
 8623   effect(KILL cr);
 8624 
 8625   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8626   ins_encode %{
 8627     __ tzcntl($dst$$Register, $src$$Register);
 8628   %}
 8629   ins_pipe(ialu_reg);
 8630 %}
 8631 
 8632 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8633   predicate(UseCountTrailingZerosInstruction);
 8634   match(Set dst (CountTrailingZerosI (LoadI src)));
 8635   effect(KILL cr);
 8636   ins_cost(175);
 8637   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8638   ins_encode %{
 8639     __ tzcntl($dst$$Register, $src$$Address);
 8640   %}
 8641   ins_pipe(ialu_reg_mem);
 8642 %}
 8643 
 8644 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8645   predicate(!UseCountTrailingZerosInstruction);
 8646   match(Set dst (CountTrailingZerosI src));
 8647   effect(KILL cr);
 8648 
 8649   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8650             "jnz     done\n\t"
 8651             "movl    $dst, 32\n"
 8652       "done:" %}
 8653   ins_encode %{
 8654     Register Rdst = $dst$$Register;
 8655     Label done;
 8656     __ bsfl(Rdst, $src$$Register);
 8657     __ jccb(Assembler::notZero, done);
 8658     __ movl(Rdst, BitsPerInt);
 8659     __ bind(done);
 8660   %}
 8661   ins_pipe(ialu_reg);
 8662 %}
 8663 
 8664 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8665   predicate(UseCountTrailingZerosInstruction);
 8666   match(Set dst (CountTrailingZerosL src));
 8667   effect(KILL cr);
 8668 
 8669   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8670   ins_encode %{
 8671     __ tzcntq($dst$$Register, $src$$Register);
 8672   %}
 8673   ins_pipe(ialu_reg);
 8674 %}
 8675 
 8676 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8677   predicate(UseCountTrailingZerosInstruction);
 8678   match(Set dst (CountTrailingZerosL (LoadL src)));
 8679   effect(KILL cr);
 8680   ins_cost(175);
 8681   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8682   ins_encode %{
 8683     __ tzcntq($dst$$Register, $src$$Address);
 8684   %}
 8685   ins_pipe(ialu_reg_mem);
 8686 %}
 8687 
 8688 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8689   predicate(!UseCountTrailingZerosInstruction);
 8690   match(Set dst (CountTrailingZerosL src));
 8691   effect(KILL cr);
 8692 
 8693   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8694             "jnz     done\n\t"
 8695             "movl    $dst, 64\n"
 8696       "done:" %}
 8697   ins_encode %{
 8698     Register Rdst = $dst$$Register;
 8699     Label done;
 8700     __ bsfq(Rdst, $src$$Register);
 8701     __ jccb(Assembler::notZero, done);
 8702     __ movl(Rdst, BitsPerLong);
 8703     __ bind(done);
 8704   %}
 8705   ins_pipe(ialu_reg);
 8706 %}
 8707 
 8708 //--------------- Reverse Operation Instructions ----------------
 8709 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8710   predicate(!VM_Version::supports_gfni());
 8711   match(Set dst (ReverseI src));
 8712   effect(TEMP dst, TEMP rtmp, KILL cr);
 8713   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8714   ins_encode %{
 8715     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8716   %}
 8717   ins_pipe( ialu_reg );
 8718 %}
 8719 
 8720 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8721   predicate(VM_Version::supports_gfni());
 8722   match(Set dst (ReverseI src));
 8723   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8724   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8725   ins_encode %{
 8726     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8727   %}
 8728   ins_pipe( ialu_reg );
 8729 %}
 8730 
 8731 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8732   predicate(!VM_Version::supports_gfni());
 8733   match(Set dst (ReverseL src));
 8734   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8735   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8736   ins_encode %{
 8737     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8738   %}
 8739   ins_pipe( ialu_reg );
 8740 %}
 8741 
 8742 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8743   predicate(VM_Version::supports_gfni());
 8744   match(Set dst (ReverseL src));
 8745   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8746   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8747   ins_encode %{
 8748     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8749   %}
 8750   ins_pipe( ialu_reg );
 8751 %}
 8752 
 8753 //---------- Population Count Instructions -------------------------------------
 8754 
 8755 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8756   predicate(UsePopCountInstruction);
 8757   match(Set dst (PopCountI src));
 8758   effect(KILL cr);
 8759 
 8760   format %{ "popcnt  $dst, $src" %}
 8761   ins_encode %{
 8762     __ popcntl($dst$$Register, $src$$Register);
 8763   %}
 8764   ins_pipe(ialu_reg);
 8765 %}
 8766 
 8767 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8768   predicate(UsePopCountInstruction);
 8769   match(Set dst (PopCountI (LoadI mem)));
 8770   effect(KILL cr);
 8771 
 8772   format %{ "popcnt  $dst, $mem" %}
 8773   ins_encode %{
 8774     __ popcntl($dst$$Register, $mem$$Address);
 8775   %}
 8776   ins_pipe(ialu_reg);
 8777 %}
 8778 
 8779 // Note: Long.bitCount(long) returns an int.
 8780 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8781   predicate(UsePopCountInstruction);
 8782   match(Set dst (PopCountL src));
 8783   effect(KILL cr);
 8784 
 8785   format %{ "popcnt  $dst, $src" %}
 8786   ins_encode %{
 8787     __ popcntq($dst$$Register, $src$$Register);
 8788   %}
 8789   ins_pipe(ialu_reg);
 8790 %}
 8791 
 8792 // Note: Long.bitCount(long) returns an int.
 8793 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8794   predicate(UsePopCountInstruction);
 8795   match(Set dst (PopCountL (LoadL mem)));
 8796   effect(KILL cr);
 8797 
 8798   format %{ "popcnt  $dst, $mem" %}
 8799   ins_encode %{
 8800     __ popcntq($dst$$Register, $mem$$Address);
 8801   %}
 8802   ins_pipe(ialu_reg);
 8803 %}
 8804 
 8805 
 8806 //----------MemBar Instructions-----------------------------------------------
 8807 // Memory barrier flavors
 8808 
 8809 instruct membar_acquire()
 8810 %{
 8811   match(MemBarAcquire);
 8812   match(LoadFence);
 8813   ins_cost(0);
 8814 
 8815   size(0);
 8816   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8817   ins_encode();
 8818   ins_pipe(empty);
 8819 %}
 8820 
 8821 instruct membar_acquire_lock()
 8822 %{
 8823   match(MemBarAcquireLock);
 8824   ins_cost(0);
 8825 
 8826   size(0);
 8827   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8828   ins_encode();
 8829   ins_pipe(empty);
 8830 %}
 8831 
 8832 instruct membar_release()
 8833 %{
 8834   match(MemBarRelease);
 8835   match(StoreFence);
 8836   ins_cost(0);
 8837 
 8838   size(0);
 8839   format %{ "MEMBAR-release ! (empty encoding)" %}
 8840   ins_encode();
 8841   ins_pipe(empty);
 8842 %}
 8843 
 8844 instruct membar_release_lock()
 8845 %{
 8846   match(MemBarReleaseLock);
 8847   ins_cost(0);
 8848 
 8849   size(0);
 8850   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8851   ins_encode();
 8852   ins_pipe(empty);
 8853 %}
 8854 
 8855 instruct membar_volatile(rFlagsReg cr) %{
 8856   match(MemBarVolatile);
 8857   effect(KILL cr);
 8858   ins_cost(400);
 8859 
 8860   format %{
 8861     $$template
 8862     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8863   %}
 8864   ins_encode %{
 8865     __ membar(Assembler::StoreLoad);
 8866   %}
 8867   ins_pipe(pipe_slow);
 8868 %}
 8869 
 8870 instruct unnecessary_membar_volatile()
 8871 %{
 8872   match(MemBarVolatile);
 8873   predicate(Matcher::post_store_load_barrier(n));
 8874   ins_cost(0);
 8875 
 8876   size(0);
 8877   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8878   ins_encode();
 8879   ins_pipe(empty);
 8880 %}
 8881 
 8882 instruct membar_storestore() %{
 8883   match(MemBarStoreStore);
 8884   match(StoreStoreFence);
 8885   ins_cost(0);
 8886 
 8887   size(0);
 8888   format %{ "MEMBAR-storestore (empty encoding)" %}
 8889   ins_encode( );
 8890   ins_pipe(empty);
 8891 %}
 8892 
 8893 //----------Move Instructions--------------------------------------------------
 8894 
 8895 instruct castX2P(rRegP dst, rRegL src)
 8896 %{
 8897   match(Set dst (CastX2P src));
 8898 
 8899   format %{ "movq    $dst, $src\t# long->ptr" %}
 8900   ins_encode %{
 8901     if ($dst$$reg != $src$$reg) {
 8902       __ movptr($dst$$Register, $src$$Register);
 8903     }
 8904   %}
 8905   ins_pipe(ialu_reg_reg); // XXX
 8906 %}
 8907 
 8908 instruct castP2X(rRegL dst, rRegP src)
 8909 %{
 8910   match(Set dst (CastP2X src));
 8911 
 8912   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8913   ins_encode %{
 8914     if ($dst$$reg != $src$$reg) {
 8915       __ movptr($dst$$Register, $src$$Register);
 8916     }
 8917   %}
 8918   ins_pipe(ialu_reg_reg); // XXX
 8919 %}
 8920 
 8921 // Convert oop into int for vectors alignment masking
 8922 instruct convP2I(rRegI dst, rRegP src)
 8923 %{
 8924   match(Set dst (ConvL2I (CastP2X src)));
 8925 
 8926   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8927   ins_encode %{
 8928     __ movl($dst$$Register, $src$$Register);
 8929   %}
 8930   ins_pipe(ialu_reg_reg); // XXX
 8931 %}
 8932 
 8933 // Convert compressed oop into int for vectors alignment masking
 8934 // in case of 32bit oops (heap < 4Gb).
 8935 instruct convN2I(rRegI dst, rRegN src)
 8936 %{
 8937   predicate(CompressedOops::shift() == 0);
 8938   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 8939 
 8940   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 8941   ins_encode %{
 8942     __ movl($dst$$Register, $src$$Register);
 8943   %}
 8944   ins_pipe(ialu_reg_reg); // XXX
 8945 %}
 8946 
 8947 // Convert oop pointer into compressed form
 8948 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 8949   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 8950   match(Set dst (EncodeP src));
 8951   effect(KILL cr);
 8952   format %{ "encode_heap_oop $dst,$src" %}
 8953   ins_encode %{
 8954     Register s = $src$$Register;
 8955     Register d = $dst$$Register;
 8956     if (s != d) {
 8957       __ movq(d, s);
 8958     }
 8959     __ encode_heap_oop(d);
 8960   %}
 8961   ins_pipe(ialu_reg_long);
 8962 %}
 8963 
 8964 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8965   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 8966   match(Set dst (EncodeP src));
 8967   effect(KILL cr);
 8968   format %{ "encode_heap_oop_not_null $dst,$src" %}
 8969   ins_encode %{
 8970     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 8971   %}
 8972   ins_pipe(ialu_reg_long);
 8973 %}
 8974 
 8975 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 8976   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 8977             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 8978   match(Set dst (DecodeN src));
 8979   effect(KILL cr);
 8980   format %{ "decode_heap_oop $dst,$src" %}
 8981   ins_encode %{
 8982     Register s = $src$$Register;
 8983     Register d = $dst$$Register;
 8984     if (s != d) {
 8985       __ movq(d, s);
 8986     }
 8987     __ decode_heap_oop(d);
 8988   %}
 8989   ins_pipe(ialu_reg_long);
 8990 %}
 8991 
 8992 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 8993   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 8994             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 8995   match(Set dst (DecodeN src));
 8996   effect(KILL cr);
 8997   format %{ "decode_heap_oop_not_null $dst,$src" %}
 8998   ins_encode %{
 8999     Register s = $src$$Register;
 9000     Register d = $dst$$Register;
 9001     if (s != d) {
 9002       __ decode_heap_oop_not_null(d, s);
 9003     } else {
 9004       __ decode_heap_oop_not_null(d);
 9005     }
 9006   %}
 9007   ins_pipe(ialu_reg_long);
 9008 %}
 9009 
 9010 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9011   match(Set dst (EncodePKlass src));
 9012   effect(TEMP dst, KILL cr);
 9013   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 9014   ins_encode %{
 9015     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9016   %}
 9017   ins_pipe(ialu_reg_long);
 9018 %}
 9019 
 9020 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9021   match(Set dst (DecodeNKlass src));
 9022   effect(TEMP dst, KILL cr);
 9023   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 9024   ins_encode %{
 9025     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9026   %}
 9027   ins_pipe(ialu_reg_long);
 9028 %}
 9029 
 9030 //----------Conditional Move---------------------------------------------------
 9031 // Jump
 9032 // dummy instruction for generating temp registers
 9033 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 9034   match(Jump (LShiftL switch_val shift));
 9035   ins_cost(350);
 9036   predicate(false);
 9037   effect(TEMP dest);
 9038 
 9039   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9040             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 9041   ins_encode %{
 9042     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9043     // to do that and the compiler is using that register as one it can allocate.
 9044     // So we build it all by hand.
 9045     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 9046     // ArrayAddress dispatch(table, index);
 9047     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 9048     __ lea($dest$$Register, $constantaddress);
 9049     __ jmp(dispatch);
 9050   %}
 9051   ins_pipe(pipe_jmp);
 9052 %}
 9053 
 9054 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 9055   match(Jump (AddL (LShiftL switch_val shift) offset));
 9056   ins_cost(350);
 9057   effect(TEMP dest);
 9058 
 9059   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9060             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 9061   ins_encode %{
 9062     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9063     // to do that and the compiler is using that register as one it can allocate.
 9064     // So we build it all by hand.
 9065     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9066     // ArrayAddress dispatch(table, index);
 9067     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9068     __ lea($dest$$Register, $constantaddress);
 9069     __ jmp(dispatch);
 9070   %}
 9071   ins_pipe(pipe_jmp);
 9072 %}
 9073 
 9074 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 9075   match(Jump switch_val);
 9076   ins_cost(350);
 9077   effect(TEMP dest);
 9078 
 9079   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9080             "jmp     [$dest + $switch_val]\n\t" %}
 9081   ins_encode %{
 9082     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9083     // to do that and the compiler is using that register as one it can allocate.
 9084     // So we build it all by hand.
 9085     // Address index(noreg, switch_reg, Address::times_1);
 9086     // ArrayAddress dispatch(table, index);
 9087     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 9088     __ lea($dest$$Register, $constantaddress);
 9089     __ jmp(dispatch);
 9090   %}
 9091   ins_pipe(pipe_jmp);
 9092 %}
 9093 
 9094 // Conditional move
 9095 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 9096 %{
 9097   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9098   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9099 
 9100   ins_cost(100); // XXX
 9101   format %{ "setbn$cop $dst\t# signed, int" %}
 9102   ins_encode %{
 9103     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9104     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9105   %}
 9106   ins_pipe(ialu_reg);
 9107 %}
 9108 
 9109 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 9110 %{
 9111   predicate(!UseAPX);
 9112   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9113 
 9114   ins_cost(200); // XXX
 9115   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9116   ins_encode %{
 9117     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9118   %}
 9119   ins_pipe(pipe_cmov_reg);
 9120 %}
 9121 
 9122 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 9123 %{
 9124   predicate(UseAPX);
 9125   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9126 
 9127   ins_cost(200);
 9128   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9129   ins_encode %{
 9130     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9131   %}
 9132   ins_pipe(pipe_cmov_reg);
 9133 %}
 9134 
 9135 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 9136 %{
 9137   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9138   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9139 
 9140   ins_cost(100); // XXX
 9141   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9142   ins_encode %{
 9143     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9144     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9145   %}
 9146   ins_pipe(ialu_reg);
 9147 %}
 9148 
 9149 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9150   predicate(!UseAPX);
 9151   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9152 
 9153   ins_cost(200); // XXX
 9154   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9155   ins_encode %{
 9156     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9157   %}
 9158   ins_pipe(pipe_cmov_reg);
 9159 %}
 9160 
 9161 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9162   predicate(UseAPX);
 9163   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9164 
 9165   ins_cost(200);
 9166   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9167   ins_encode %{
 9168     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9169   %}
 9170   ins_pipe(pipe_cmov_reg);
 9171 %}
 9172 
 9173 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9174 %{
 9175   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9176   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9177 
 9178   ins_cost(100); // XXX
 9179   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9180   ins_encode %{
 9181     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9182     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9183   %}
 9184   ins_pipe(ialu_reg);
 9185 %}
 9186 
 9187 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9188 %{
 9189   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9190   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9191 
 9192   ins_cost(100); // XXX
 9193   format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
 9194   ins_encode %{
 9195     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9196     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9197   %}
 9198   ins_pipe(ialu_reg);
 9199 %}
 9200 
 9201 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9202   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9203 
 9204   ins_cost(200);
 9205   expand %{
 9206     cmovI_regU(cop, cr, dst, src);
 9207   %}
 9208 %}
 9209 
 9210 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
 9211   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9212 
 9213   ins_cost(200);
 9214   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9215   ins_encode %{
 9216     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9217   %}
 9218   ins_pipe(pipe_cmov_reg);
 9219 %}
 9220 
 9221 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9222   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9223   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9224 
 9225   ins_cost(200); // XXX
 9226   format %{ "cmovpl  $dst, $src\n\t"
 9227             "cmovnel $dst, $src" %}
 9228   ins_encode %{
 9229     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9230     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9231   %}
 9232   ins_pipe(pipe_cmov_reg);
 9233 %}
 9234 
 9235 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9236 // inputs of the CMove
 9237 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9238   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9239   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9240   effect(TEMP dst);
 9241 
 9242   ins_cost(200); // XXX
 9243   format %{ "cmovpl  $dst, $src\n\t"
 9244             "cmovnel $dst, $src" %}
 9245   ins_encode %{
 9246     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9247     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9248   %}
 9249   ins_pipe(pipe_cmov_reg);
 9250 %}
 9251 
 9252 // Conditional move
 9253 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9254   predicate(!UseAPX);
 9255   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9256 
 9257   ins_cost(250); // XXX
 9258   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9259   ins_encode %{
 9260     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9261   %}
 9262   ins_pipe(pipe_cmov_mem);
 9263 %}
 9264 
 9265 // Conditional move
 9266 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
 9267 %{
 9268   predicate(UseAPX);
 9269   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9270 
 9271   ins_cost(250);
 9272   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9273   ins_encode %{
 9274     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9275   %}
 9276   ins_pipe(pipe_cmov_mem);
 9277 %}
 9278 
 9279 // Conditional move
 9280 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9281 %{
 9282   predicate(!UseAPX);
 9283   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9284 
 9285   ins_cost(250); // XXX
 9286   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9287   ins_encode %{
 9288     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9289   %}
 9290   ins_pipe(pipe_cmov_mem);
 9291 %}
 9292 
 9293 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9294   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9295 
 9296   ins_cost(250);
 9297   expand %{
 9298     cmovI_memU(cop, cr, dst, src);
 9299   %}
 9300 %}
 9301 
 9302 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
 9303 %{
 9304   predicate(UseAPX);
 9305   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9306 
 9307   ins_cost(250);
 9308   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9309   ins_encode %{
 9310     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9311   %}
 9312   ins_pipe(pipe_cmov_mem);
 9313 %}
 9314 
 9315 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
 9316 %{
 9317   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9318 
 9319   ins_cost(250);
 9320   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9321   ins_encode %{
 9322     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9323   %}
 9324   ins_pipe(pipe_cmov_mem);
 9325 %}
 9326 
 9327 // Conditional move
 9328 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9329 %{
 9330   predicate(!UseAPX);
 9331   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9332 
 9333   ins_cost(200); // XXX
 9334   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9335   ins_encode %{
 9336     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9337   %}
 9338   ins_pipe(pipe_cmov_reg);
 9339 %}
 9340 
 9341 // Conditional move ndd
 9342 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9343 %{
 9344   predicate(UseAPX);
 9345   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9346 
 9347   ins_cost(200);
 9348   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9349   ins_encode %{
 9350     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9351   %}
 9352   ins_pipe(pipe_cmov_reg);
 9353 %}
 9354 
 9355 // Conditional move
 9356 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9357 %{
 9358   predicate(!UseAPX);
 9359   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9360 
 9361   ins_cost(200); // XXX
 9362   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9363   ins_encode %{
 9364     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9365   %}
 9366   ins_pipe(pipe_cmov_reg);
 9367 %}
 9368 
 9369 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9370   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9371 
 9372   ins_cost(200);
 9373   expand %{
 9374     cmovN_regU(cop, cr, dst, src);
 9375   %}
 9376 %}
 9377 
 9378 // Conditional move ndd
 9379 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9380 %{
 9381   predicate(UseAPX);
 9382   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9383 
 9384   ins_cost(200);
 9385   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9386   ins_encode %{
 9387     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9388   %}
 9389   ins_pipe(pipe_cmov_reg);
 9390 %}
 9391 
 9392 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
 9393   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9394 
 9395   ins_cost(200);
 9396   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
 9397   ins_encode %{
 9398     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9399   %}
 9400   ins_pipe(pipe_cmov_reg);
 9401 %}
 9402 
 9403 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9404   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9405   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9406 
 9407   ins_cost(200); // XXX
 9408   format %{ "cmovpl  $dst, $src\n\t"
 9409             "cmovnel $dst, $src" %}
 9410   ins_encode %{
 9411     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9412     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9413   %}
 9414   ins_pipe(pipe_cmov_reg);
 9415 %}
 9416 
 9417 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9418 // inputs of the CMove
 9419 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9420   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9421   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9422 
 9423   ins_cost(200); // XXX
 9424   format %{ "cmovpl  $dst, $src\n\t"
 9425             "cmovnel $dst, $src" %}
 9426   ins_encode %{
 9427     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9428     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9429   %}
 9430   ins_pipe(pipe_cmov_reg);
 9431 %}
 9432 
 9433 // Conditional move
 9434 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9435 %{
 9436   predicate(!UseAPX);
 9437   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9438 
 9439   ins_cost(200); // XXX
 9440   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9441   ins_encode %{
 9442     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9443   %}
 9444   ins_pipe(pipe_cmov_reg);  // XXX
 9445 %}
 9446 
 9447 // Conditional move ndd
 9448 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9449 %{
 9450   predicate(UseAPX);
 9451   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9452 
 9453   ins_cost(200);
 9454   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9455   ins_encode %{
 9456     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9457   %}
 9458   ins_pipe(pipe_cmov_reg);
 9459 %}
 9460 
 9461 // Conditional move
 9462 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9463 %{
 9464   predicate(!UseAPX);
 9465   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9466 
 9467   ins_cost(200); // XXX
 9468   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9469   ins_encode %{
 9470     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9471   %}
 9472   ins_pipe(pipe_cmov_reg); // XXX
 9473 %}
 9474 
 9475 // Conditional move ndd
 9476 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9477 %{
 9478   predicate(UseAPX);
 9479   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9480 
 9481   ins_cost(200);
 9482   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9483   ins_encode %{
 9484     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9485   %}
 9486   ins_pipe(pipe_cmov_reg);
 9487 %}
 9488 
 9489 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9490   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9491 
 9492   ins_cost(200);
 9493   expand %{
 9494     cmovP_regU(cop, cr, dst, src);
 9495   %}
 9496 %}
 9497 
 9498 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
 9499   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9500 
 9501   ins_cost(200);
 9502   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
 9503   ins_encode %{
 9504     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9505   %}
 9506   ins_pipe(pipe_cmov_reg);
 9507 %}
 9508 
 9509 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9510   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9511   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9512 
 9513   ins_cost(200); // XXX
 9514   format %{ "cmovpq  $dst, $src\n\t"
 9515             "cmovneq $dst, $src" %}
 9516   ins_encode %{
 9517     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9518     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9519   %}
 9520   ins_pipe(pipe_cmov_reg);
 9521 %}
 9522 
 9523 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9524 // inputs of the CMove
 9525 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9526   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9527   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9528 
 9529   ins_cost(200); // XXX
 9530   format %{ "cmovpq  $dst, $src\n\t"
 9531             "cmovneq $dst, $src" %}
 9532   ins_encode %{
 9533     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9534     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9535   %}
 9536   ins_pipe(pipe_cmov_reg);
 9537 %}
 9538 
 9539 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9540 %{
 9541   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9542   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9543 
 9544   ins_cost(100); // XXX
 9545   format %{ "setbn$cop $dst\t# signed, long" %}
 9546   ins_encode %{
 9547     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9548     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9549   %}
 9550   ins_pipe(ialu_reg);
 9551 %}
 9552 
 9553 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9554 %{
 9555   predicate(!UseAPX);
 9556   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9557 
 9558   ins_cost(200); // XXX
 9559   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9560   ins_encode %{
 9561     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9562   %}
 9563   ins_pipe(pipe_cmov_reg);  // XXX
 9564 %}
 9565 
 9566 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9567 %{
 9568   predicate(UseAPX);
 9569   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9570 
 9571   ins_cost(200);
 9572   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9573   ins_encode %{
 9574     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9575   %}
 9576   ins_pipe(pipe_cmov_reg);
 9577 %}
 9578 
 9579 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9580 %{
 9581   predicate(!UseAPX);
 9582   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9583 
 9584   ins_cost(200); // XXX
 9585   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9586   ins_encode %{
 9587     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9588   %}
 9589   ins_pipe(pipe_cmov_mem);  // XXX
 9590 %}
 9591 
 9592 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
 9593 %{
 9594   predicate(UseAPX);
 9595   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9596 
 9597   ins_cost(200);
 9598   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9599   ins_encode %{
 9600     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9601   %}
 9602   ins_pipe(pipe_cmov_mem);
 9603 %}
 9604 
 9605 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9606 %{
 9607   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9608   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9609 
 9610   ins_cost(100); // XXX
 9611   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9612   ins_encode %{
 9613     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9614     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9615   %}
 9616   ins_pipe(ialu_reg);
 9617 %}
 9618 
 9619 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9620 %{
 9621   predicate(!UseAPX);
 9622   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9623 
 9624   ins_cost(200); // XXX
 9625   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9626   ins_encode %{
 9627     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9628   %}
 9629   ins_pipe(pipe_cmov_reg); // XXX
 9630 %}
 9631 
 9632 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9633 %{
 9634   predicate(UseAPX);
 9635   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9636 
 9637   ins_cost(200);
 9638   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9639   ins_encode %{
 9640     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9641   %}
 9642   ins_pipe(pipe_cmov_reg);
 9643 %}
 9644 
 9645 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9646 %{
 9647   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9648   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9649 
 9650   ins_cost(100); // XXX
 9651   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9652   ins_encode %{
 9653     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9654     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9655   %}
 9656   ins_pipe(ialu_reg);
 9657 %}
 9658 
 9659 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9660 %{
 9661   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9662   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9663 
 9664   ins_cost(100); // XXX
 9665   format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
 9666   ins_encode %{
 9667     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9668     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9669   %}
 9670   ins_pipe(ialu_reg);
 9671 %}
 9672 
 9673 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9674   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9675 
 9676   ins_cost(200);
 9677   expand %{
 9678     cmovL_regU(cop, cr, dst, src);
 9679   %}
 9680 %}
 9681 
 9682 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
 9683 %{
 9684   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9685 
 9686   ins_cost(200);
 9687   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9688   ins_encode %{
 9689     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9690   %}
 9691   ins_pipe(pipe_cmov_reg);
 9692 %}
 9693 
 9694 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9695   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9696   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9697 
 9698   ins_cost(200); // XXX
 9699   format %{ "cmovpq  $dst, $src\n\t"
 9700             "cmovneq $dst, $src" %}
 9701   ins_encode %{
 9702     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9703     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9704   %}
 9705   ins_pipe(pipe_cmov_reg);
 9706 %}
 9707 
 9708 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9709 // inputs of the CMove
 9710 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9711   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9712   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9713 
 9714   ins_cost(200); // XXX
 9715   format %{ "cmovpq  $dst, $src\n\t"
 9716             "cmovneq $dst, $src" %}
 9717   ins_encode %{
 9718     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9719     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9720   %}
 9721   ins_pipe(pipe_cmov_reg);
 9722 %}
 9723 
 9724 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9725 %{
 9726   predicate(!UseAPX);
 9727   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9728 
 9729   ins_cost(200); // XXX
 9730   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9731   ins_encode %{
 9732     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9733   %}
 9734   ins_pipe(pipe_cmov_mem); // XXX
 9735 %}
 9736 
 9737 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9738   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9739 
 9740   ins_cost(200);
 9741   expand %{
 9742     cmovL_memU(cop, cr, dst, src);
 9743   %}
 9744 %}
 9745 
 9746 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
 9747 %{
 9748   predicate(UseAPX);
 9749   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9750 
 9751   ins_cost(200);
 9752   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9753   ins_encode %{
 9754     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9755   %}
 9756   ins_pipe(pipe_cmov_mem);
 9757 %}
 9758 
 9759 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
 9760 %{
 9761   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9762 
 9763   ins_cost(200);
 9764   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9765   ins_encode %{
 9766     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9767   %}
 9768   ins_pipe(pipe_cmov_mem);
 9769 %}
 9770 
 9771 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9772 %{
 9773   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9774 
 9775   ins_cost(200); // XXX
 9776   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9777             "movss     $dst, $src\n"
 9778     "skip:" %}
 9779   ins_encode %{
 9780     Label Lskip;
 9781     // Invert sense of branch from sense of CMOV
 9782     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9783     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9784     __ bind(Lskip);
 9785   %}
 9786   ins_pipe(pipe_slow);
 9787 %}
 9788 
 9789 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9790 %{
 9791   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9792 
 9793   ins_cost(200); // XXX
 9794   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9795             "movss     $dst, $src\n"
 9796     "skip:" %}
 9797   ins_encode %{
 9798     Label Lskip;
 9799     // Invert sense of branch from sense of CMOV
 9800     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9801     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9802     __ bind(Lskip);
 9803   %}
 9804   ins_pipe(pipe_slow);
 9805 %}
 9806 
 9807 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9808   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9809 
 9810   ins_cost(200);
 9811   expand %{
 9812     cmovF_regU(cop, cr, dst, src);
 9813   %}
 9814 %}
 9815 
 9816 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
 9817 %{
 9818   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9819 
 9820   ins_cost(200); // XXX
 9821   format %{ "jn$cop    skip\t# signed, unsigned cmove float\n\t"
 9822             "movss     $dst, $src\n"
 9823     "skip:" %}
 9824   ins_encode %{
 9825     Label Lskip;
 9826     // Invert sense of branch from sense of CMOV
 9827     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9828     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9829     __ bind(Lskip);
 9830   %}
 9831   ins_pipe(pipe_slow);
 9832 %}
 9833 
 9834 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9835 %{
 9836   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9837 
 9838   ins_cost(200); // XXX
 9839   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9840             "movsd     $dst, $src\n"
 9841     "skip:" %}
 9842   ins_encode %{
 9843     Label Lskip;
 9844     // Invert sense of branch from sense of CMOV
 9845     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9846     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9847     __ bind(Lskip);
 9848   %}
 9849   ins_pipe(pipe_slow);
 9850 %}
 9851 
 9852 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9853 %{
 9854   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9855 
 9856   ins_cost(200); // XXX
 9857   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9858             "movsd     $dst, $src\n"
 9859     "skip:" %}
 9860   ins_encode %{
 9861     Label Lskip;
 9862     // Invert sense of branch from sense of CMOV
 9863     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9864     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9865     __ bind(Lskip);
 9866   %}
 9867   ins_pipe(pipe_slow);
 9868 %}
 9869 
 9870 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9871   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9872 
 9873   ins_cost(200);
 9874   expand %{
 9875     cmovD_regU(cop, cr, dst, src);
 9876   %}
 9877 %}
 9878 
 9879 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
 9880 %{
 9881   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9882 
 9883   ins_cost(200); // XXX
 9884   format %{ "jn$cop    skip\t# signed, unsigned cmove double\n\t"
 9885             "movsd     $dst, $src\n"
 9886     "skip:" %}
 9887   ins_encode %{
 9888     Label Lskip;
 9889     // Invert sense of branch from sense of CMOV
 9890     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9891     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9892     __ bind(Lskip);
 9893   %}
 9894   ins_pipe(pipe_slow);
 9895 %}
 9896 
 9897 //----------Arithmetic Instructions--------------------------------------------
 9898 //----------Addition Instructions----------------------------------------------
 9899 
 9900 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9901 %{
 9902   predicate(!UseAPX);
 9903   match(Set dst (AddI dst src));
 9904   effect(KILL cr);
 9905   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9906   format %{ "addl    $dst, $src\t# int" %}
 9907   ins_encode %{
 9908     __ addl($dst$$Register, $src$$Register);
 9909   %}
 9910   ins_pipe(ialu_reg_reg);
 9911 %}
 9912 
 9913 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
 9914 %{
 9915   predicate(UseAPX);
 9916   match(Set dst (AddI src1 src2));
 9917   effect(KILL cr);
 9918   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
 9919 
 9920   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9921   ins_encode %{
 9922     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
 9923   %}
 9924   ins_pipe(ialu_reg_reg);
 9925 %}
 9926 
 9927 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9928 %{
 9929   predicate(!UseAPX);
 9930   match(Set dst (AddI dst src));
 9931   effect(KILL cr);
 9932   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9933 
 9934   format %{ "addl    $dst, $src\t# int" %}
 9935   ins_encode %{
 9936     __ addl($dst$$Register, $src$$constant);
 9937   %}
 9938   ins_pipe( ialu_reg );
 9939 %}
 9940 
 9941 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
 9942 %{
 9943   predicate(UseAPX);
 9944   match(Set dst (AddI src1 src2));
 9945   effect(KILL cr);
 9946   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
 9947 
 9948   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9949   ins_encode %{
 9950     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
 9951   %}
 9952   ins_pipe( ialu_reg );
 9953 %}
 9954 
 9955 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
 9956 %{
 9957   predicate(UseAPX);
 9958   match(Set dst (AddI (LoadI src1) src2));
 9959   effect(KILL cr);
 9960   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9961 
 9962   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9963   ins_encode %{
 9964     __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
 9965   %}
 9966   ins_pipe( ialu_reg );
 9967 %}
 9968 
 9969 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 9970 %{
 9971   predicate(!UseAPX);
 9972   match(Set dst (AddI dst (LoadI src)));
 9973   effect(KILL cr);
 9974   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9975 
 9976   ins_cost(150); // XXX
 9977   format %{ "addl    $dst, $src\t# int" %}
 9978   ins_encode %{
 9979     __ addl($dst$$Register, $src$$Address);
 9980   %}
 9981   ins_pipe(ialu_reg_mem);
 9982 %}
 9983 
 9984 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
 9985 %{
 9986   predicate(UseAPX);
 9987   match(Set dst (AddI src1 (LoadI src2)));
 9988   effect(KILL cr);
 9989   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
 9990 
 9991   ins_cost(150);
 9992   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9993   ins_encode %{
 9994     __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
 9995   %}
 9996   ins_pipe(ialu_reg_mem);
 9997 %}
 9998 
 9999 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10000 %{
10001   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10002   effect(KILL cr);
10003   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10004 
10005   ins_cost(150); // XXX
10006   format %{ "addl    $dst, $src\t# int" %}
10007   ins_encode %{
10008     __ addl($dst$$Address, $src$$Register);
10009   %}
10010   ins_pipe(ialu_mem_reg);
10011 %}
10012 
10013 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10014 %{
10015   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10016   effect(KILL cr);
10017   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10018 
10019 
10020   ins_cost(125); // XXX
10021   format %{ "addl    $dst, $src\t# int" %}
10022   ins_encode %{
10023     __ addl($dst$$Address, $src$$constant);
10024   %}
10025   ins_pipe(ialu_mem_imm);
10026 %}
10027 
10028 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10029 %{
10030   predicate(!UseAPX && UseIncDec);
10031   match(Set dst (AddI dst src));
10032   effect(KILL cr);
10033 
10034   format %{ "incl    $dst\t# int" %}
10035   ins_encode %{
10036     __ incrementl($dst$$Register);
10037   %}
10038   ins_pipe(ialu_reg);
10039 %}
10040 
10041 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10042 %{
10043   predicate(UseAPX && UseIncDec);
10044   match(Set dst (AddI src val));
10045   effect(KILL cr);
10046   flag(PD::Flag_ndd_demotable_opr1);
10047 
10048   format %{ "eincl    $dst, $src\t# int ndd" %}
10049   ins_encode %{
10050     __ eincl($dst$$Register, $src$$Register, false);
10051   %}
10052   ins_pipe(ialu_reg);
10053 %}
10054 
10055 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10056 %{
10057   predicate(UseAPX && UseIncDec);
10058   match(Set dst (AddI (LoadI src) val));
10059   effect(KILL cr);
10060 
10061   format %{ "eincl    $dst, $src\t# int ndd" %}
10062   ins_encode %{
10063     __ eincl($dst$$Register, $src$$Address, false);
10064   %}
10065   ins_pipe(ialu_reg);
10066 %}
10067 
10068 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10069 %{
10070   predicate(UseIncDec);
10071   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10072   effect(KILL cr);
10073 
10074   ins_cost(125); // XXX
10075   format %{ "incl    $dst\t# int" %}
10076   ins_encode %{
10077     __ incrementl($dst$$Address);
10078   %}
10079   ins_pipe(ialu_mem_imm);
10080 %}
10081 
10082 // XXX why does that use AddI
10083 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10084 %{
10085   predicate(!UseAPX && UseIncDec);
10086   match(Set dst (AddI dst src));
10087   effect(KILL cr);
10088 
10089   format %{ "decl    $dst\t# int" %}
10090   ins_encode %{
10091     __ decrementl($dst$$Register);
10092   %}
10093   ins_pipe(ialu_reg);
10094 %}
10095 
10096 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10097 %{
10098   predicate(UseAPX && UseIncDec);
10099   match(Set dst (AddI src val));
10100   effect(KILL cr);
10101   flag(PD::Flag_ndd_demotable_opr1);
10102 
10103   format %{ "edecl    $dst, $src\t# int ndd" %}
10104   ins_encode %{
10105     __ edecl($dst$$Register, $src$$Register, false);
10106   %}
10107   ins_pipe(ialu_reg);
10108 %}
10109 
10110 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10111 %{
10112   predicate(UseAPX && UseIncDec);
10113   match(Set dst (AddI (LoadI src) val));
10114   effect(KILL cr);
10115 
10116   format %{ "edecl    $dst, $src\t# int ndd" %}
10117   ins_encode %{
10118     __ edecl($dst$$Register, $src$$Address, false);
10119   %}
10120   ins_pipe(ialu_reg);
10121 %}
10122 
10123 // XXX why does that use AddI
10124 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10125 %{
10126   predicate(UseIncDec);
10127   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10128   effect(KILL cr);
10129 
10130   ins_cost(125); // XXX
10131   format %{ "decl    $dst\t# int" %}
10132   ins_encode %{
10133     __ decrementl($dst$$Address);
10134   %}
10135   ins_pipe(ialu_mem_imm);
10136 %}
10137 
10138 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10139 %{
10140   predicate(VM_Version::supports_fast_2op_lea());
10141   match(Set dst (AddI (LShiftI index scale) disp));
10142 
10143   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10144   ins_encode %{
10145     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10146     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10147   %}
10148   ins_pipe(ialu_reg_reg);
10149 %}
10150 
10151 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10152 %{
10153   predicate(VM_Version::supports_fast_3op_lea());
10154   match(Set dst (AddI (AddI base index) disp));
10155 
10156   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10157   ins_encode %{
10158     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10159   %}
10160   ins_pipe(ialu_reg_reg);
10161 %}
10162 
10163 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10164 %{
10165   predicate(VM_Version::supports_fast_2op_lea());
10166   match(Set dst (AddI base (LShiftI index scale)));
10167 
10168   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10169   ins_encode %{
10170     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10171     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10172   %}
10173   ins_pipe(ialu_reg_reg);
10174 %}
10175 
10176 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10177 %{
10178   predicate(VM_Version::supports_fast_3op_lea());
10179   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10180 
10181   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10182   ins_encode %{
10183     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10184     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10185   %}
10186   ins_pipe(ialu_reg_reg);
10187 %}
10188 
10189 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10190 %{
10191   predicate(!UseAPX);
10192   match(Set dst (AddL dst src));
10193   effect(KILL cr);
10194   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10195 
10196   format %{ "addq    $dst, $src\t# long" %}
10197   ins_encode %{
10198     __ addq($dst$$Register, $src$$Register);
10199   %}
10200   ins_pipe(ialu_reg_reg);
10201 %}
10202 
10203 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10204 %{
10205   predicate(UseAPX);
10206   match(Set dst (AddL src1 src2));
10207   effect(KILL cr);
10208   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10209 
10210   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10211   ins_encode %{
10212     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10213   %}
10214   ins_pipe(ialu_reg_reg);
10215 %}
10216 
10217 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10218 %{
10219   predicate(!UseAPX);
10220   match(Set dst (AddL dst src));
10221   effect(KILL cr);
10222   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10223 
10224   format %{ "addq    $dst, $src\t# long" %}
10225   ins_encode %{
10226     __ addq($dst$$Register, $src$$constant);
10227   %}
10228   ins_pipe( ialu_reg );
10229 %}
10230 
10231 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10232 %{
10233   predicate(UseAPX);
10234   match(Set dst (AddL src1 src2));
10235   effect(KILL cr);
10236   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10237 
10238   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10239   ins_encode %{
10240     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10241   %}
10242   ins_pipe( ialu_reg );
10243 %}
10244 
10245 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10246 %{
10247   predicate(UseAPX);
10248   match(Set dst (AddL (LoadL src1) src2));
10249   effect(KILL cr);
10250   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10251 
10252   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10253   ins_encode %{
10254     __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10255   %}
10256   ins_pipe( ialu_reg );
10257 %}
10258 
10259 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10260 %{
10261   predicate(!UseAPX);
10262   match(Set dst (AddL dst (LoadL src)));
10263   effect(KILL cr);
10264   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10265 
10266   ins_cost(150); // XXX
10267   format %{ "addq    $dst, $src\t# long" %}
10268   ins_encode %{
10269     __ addq($dst$$Register, $src$$Address);
10270   %}
10271   ins_pipe(ialu_reg_mem);
10272 %}
10273 
10274 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10275 %{
10276   predicate(UseAPX);
10277   match(Set dst (AddL src1 (LoadL src2)));
10278   effect(KILL cr);
10279   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10280 
10281   ins_cost(150);
10282   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10283   ins_encode %{
10284     __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10285   %}
10286   ins_pipe(ialu_reg_mem);
10287 %}
10288 
10289 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10290 %{
10291   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10292   effect(KILL cr);
10293   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10294 
10295   ins_cost(150); // XXX
10296   format %{ "addq    $dst, $src\t# long" %}
10297   ins_encode %{
10298     __ addq($dst$$Address, $src$$Register);
10299   %}
10300   ins_pipe(ialu_mem_reg);
10301 %}
10302 
10303 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10304 %{
10305   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10306   effect(KILL cr);
10307   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10308 
10309   ins_cost(125); // XXX
10310   format %{ "addq    $dst, $src\t# long" %}
10311   ins_encode %{
10312     __ addq($dst$$Address, $src$$constant);
10313   %}
10314   ins_pipe(ialu_mem_imm);
10315 %}
10316 
10317 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10318 %{
10319   predicate(!UseAPX && UseIncDec);
10320   match(Set dst (AddL dst src));
10321   effect(KILL cr);
10322 
10323   format %{ "incq    $dst\t# long" %}
10324   ins_encode %{
10325     __ incrementq($dst$$Register);
10326   %}
10327   ins_pipe(ialu_reg);
10328 %}
10329 
10330 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10331 %{
10332   predicate(UseAPX && UseIncDec);
10333   match(Set dst (AddL src val));
10334   effect(KILL cr);
10335   flag(PD::Flag_ndd_demotable_opr1);
10336 
10337   format %{ "eincq    $dst, $src\t# long ndd" %}
10338   ins_encode %{
10339     __ eincq($dst$$Register, $src$$Register, false);
10340   %}
10341   ins_pipe(ialu_reg);
10342 %}
10343 
10344 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10345 %{
10346   predicate(UseAPX && UseIncDec);
10347   match(Set dst (AddL (LoadL src) val));
10348   effect(KILL cr);
10349 
10350   format %{ "eincq    $dst, $src\t# long ndd" %}
10351   ins_encode %{
10352     __ eincq($dst$$Register, $src$$Address, false);
10353   %}
10354   ins_pipe(ialu_reg);
10355 %}
10356 
10357 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10358 %{
10359   predicate(UseIncDec);
10360   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10361   effect(KILL cr);
10362 
10363   ins_cost(125); // XXX
10364   format %{ "incq    $dst\t# long" %}
10365   ins_encode %{
10366     __ incrementq($dst$$Address);
10367   %}
10368   ins_pipe(ialu_mem_imm);
10369 %}
10370 
10371 // XXX why does that use AddL
10372 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10373 %{
10374   predicate(!UseAPX && UseIncDec);
10375   match(Set dst (AddL dst src));
10376   effect(KILL cr);
10377 
10378   format %{ "decq    $dst\t# long" %}
10379   ins_encode %{
10380     __ decrementq($dst$$Register);
10381   %}
10382   ins_pipe(ialu_reg);
10383 %}
10384 
10385 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10386 %{
10387   predicate(UseAPX && UseIncDec);
10388   match(Set dst (AddL src val));
10389   effect(KILL cr);
10390   flag(PD::Flag_ndd_demotable_opr1);
10391 
10392   format %{ "edecq    $dst, $src\t# long ndd" %}
10393   ins_encode %{
10394     __ edecq($dst$$Register, $src$$Register, false);
10395   %}
10396   ins_pipe(ialu_reg);
10397 %}
10398 
10399 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10400 %{
10401   predicate(UseAPX && UseIncDec);
10402   match(Set dst (AddL (LoadL src) val));
10403   effect(KILL cr);
10404 
10405   format %{ "edecq    $dst, $src\t# long ndd" %}
10406   ins_encode %{
10407     __ edecq($dst$$Register, $src$$Address, false);
10408   %}
10409   ins_pipe(ialu_reg);
10410 %}
10411 
10412 // XXX why does that use AddL
10413 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10414 %{
10415   predicate(UseIncDec);
10416   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10417   effect(KILL cr);
10418 
10419   ins_cost(125); // XXX
10420   format %{ "decq    $dst\t# long" %}
10421   ins_encode %{
10422     __ decrementq($dst$$Address);
10423   %}
10424   ins_pipe(ialu_mem_imm);
10425 %}
10426 
10427 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10428 %{
10429   predicate(VM_Version::supports_fast_2op_lea());
10430   match(Set dst (AddL (LShiftL index scale) disp));
10431 
10432   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10433   ins_encode %{
10434     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10435     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10436   %}
10437   ins_pipe(ialu_reg_reg);
10438 %}
10439 
10440 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10441 %{
10442   predicate(VM_Version::supports_fast_3op_lea());
10443   match(Set dst (AddL (AddL base index) disp));
10444 
10445   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10446   ins_encode %{
10447     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10448   %}
10449   ins_pipe(ialu_reg_reg);
10450 %}
10451 
10452 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10453 %{
10454   predicate(VM_Version::supports_fast_2op_lea());
10455   match(Set dst (AddL base (LShiftL index scale)));
10456 
10457   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10458   ins_encode %{
10459     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10460     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10461   %}
10462   ins_pipe(ialu_reg_reg);
10463 %}
10464 
10465 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10466 %{
10467   predicate(VM_Version::supports_fast_3op_lea());
10468   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10469 
10470   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10471   ins_encode %{
10472     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10473     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10474   %}
10475   ins_pipe(ialu_reg_reg);
10476 %}
10477 
10478 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10479 %{
10480   match(Set dst (AddP dst src));
10481   effect(KILL cr);
10482   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10483 
10484   format %{ "addq    $dst, $src\t# ptr" %}
10485   ins_encode %{
10486     __ addq($dst$$Register, $src$$Register);
10487   %}
10488   ins_pipe(ialu_reg_reg);
10489 %}
10490 
10491 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10492 %{
10493   match(Set dst (AddP dst src));
10494   effect(KILL cr);
10495   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10496 
10497   format %{ "addq    $dst, $src\t# ptr" %}
10498   ins_encode %{
10499     __ addq($dst$$Register, $src$$constant);
10500   %}
10501   ins_pipe( ialu_reg );
10502 %}
10503 
10504 // XXX addP mem ops ????
10505 
10506 instruct checkCastPP(rRegP dst)
10507 %{
10508   match(Set dst (CheckCastPP dst));
10509 
10510   size(0);
10511   format %{ "# checkcastPP of $dst" %}
10512   ins_encode(/* empty encoding */);
10513   ins_pipe(empty);
10514 %}
10515 
10516 instruct castPP(rRegP dst)
10517 %{
10518   match(Set dst (CastPP dst));
10519 
10520   size(0);
10521   format %{ "# castPP of $dst" %}
10522   ins_encode(/* empty encoding */);
10523   ins_pipe(empty);
10524 %}
10525 
10526 instruct castII(rRegI dst)
10527 %{
10528   predicate(VerifyConstraintCasts == 0);
10529   match(Set dst (CastII dst));
10530 
10531   size(0);
10532   format %{ "# castII of $dst" %}
10533   ins_encode(/* empty encoding */);
10534   ins_cost(0);
10535   ins_pipe(empty);
10536 %}
10537 
10538 instruct castII_checked(rRegI dst, rFlagsReg cr)
10539 %{
10540   predicate(VerifyConstraintCasts > 0);
10541   match(Set dst (CastII dst));
10542 
10543   effect(KILL cr);
10544   format %{ "# cast_checked_II $dst" %}
10545   ins_encode %{
10546     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10547   %}
10548   ins_pipe(pipe_slow);
10549 %}
10550 
10551 instruct castLL(rRegL dst)
10552 %{
10553   predicate(VerifyConstraintCasts == 0);
10554   match(Set dst (CastLL dst));
10555 
10556   size(0);
10557   format %{ "# castLL of $dst" %}
10558   ins_encode(/* empty encoding */);
10559   ins_cost(0);
10560   ins_pipe(empty);
10561 %}
10562 
10563 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10564 %{
10565   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10566   match(Set dst (CastLL dst));
10567 
10568   effect(KILL cr);
10569   format %{ "# cast_checked_LL $dst" %}
10570   ins_encode %{
10571     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10572   %}
10573   ins_pipe(pipe_slow);
10574 %}
10575 
10576 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10577 %{
10578   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10579   match(Set dst (CastLL dst));
10580 
10581   effect(KILL cr, TEMP tmp);
10582   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10583   ins_encode %{
10584     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10585   %}
10586   ins_pipe(pipe_slow);
10587 %}
10588 
10589 instruct castFF(regF dst)
10590 %{
10591   match(Set dst (CastFF dst));
10592 
10593   size(0);
10594   format %{ "# castFF of $dst" %}
10595   ins_encode(/* empty encoding */);
10596   ins_cost(0);
10597   ins_pipe(empty);
10598 %}
10599 
10600 instruct castHH(regF dst)
10601 %{
10602   match(Set dst (CastHH dst));
10603 
10604   size(0);
10605   format %{ "# castHH of $dst" %}
10606   ins_encode(/* empty encoding */);
10607   ins_cost(0);
10608   ins_pipe(empty);
10609 %}
10610 
10611 instruct castDD(regD dst)
10612 %{
10613   match(Set dst (CastDD dst));
10614 
10615   size(0);
10616   format %{ "# castDD of $dst" %}
10617   ins_encode(/* empty encoding */);
10618   ins_cost(0);
10619   ins_pipe(empty);
10620 %}
10621 
10622 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10623 instruct compareAndSwapP(rRegI res,
10624                          memory mem_ptr,
10625                          rax_RegP oldval, rRegP newval,
10626                          rFlagsReg cr)
10627 %{
10628   predicate(n->as_LoadStore()->barrier_data() == 0);
10629   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10630   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10631   effect(KILL cr, KILL oldval);
10632 
10633   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10634             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10635             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10636   ins_encode %{
10637     __ lock();
10638     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10639     __ setcc(Assembler::equal, $res$$Register);
10640   %}
10641   ins_pipe( pipe_cmpxchg );
10642 %}
10643 
10644 instruct compareAndSwapL(rRegI res,
10645                          memory mem_ptr,
10646                          rax_RegL oldval, rRegL newval,
10647                          rFlagsReg cr)
10648 %{
10649   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10650   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10651   effect(KILL cr, KILL oldval);
10652 
10653   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10654             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10655             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10656   ins_encode %{
10657     __ lock();
10658     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10659     __ setcc(Assembler::equal, $res$$Register);
10660   %}
10661   ins_pipe( pipe_cmpxchg );
10662 %}
10663 
10664 instruct compareAndSwapI(rRegI res,
10665                          memory mem_ptr,
10666                          rax_RegI oldval, rRegI newval,
10667                          rFlagsReg cr)
10668 %{
10669   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10670   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10671   effect(KILL cr, KILL oldval);
10672 
10673   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10674             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10675             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10676   ins_encode %{
10677     __ lock();
10678     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10679     __ setcc(Assembler::equal, $res$$Register);
10680   %}
10681   ins_pipe( pipe_cmpxchg );
10682 %}
10683 
10684 instruct compareAndSwapB(rRegI res,
10685                          memory mem_ptr,
10686                          rax_RegI oldval, rRegI newval,
10687                          rFlagsReg cr)
10688 %{
10689   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10690   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10691   effect(KILL cr, KILL oldval);
10692 
10693   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10694             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10695             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10696   ins_encode %{
10697     __ lock();
10698     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10699     __ setcc(Assembler::equal, $res$$Register);
10700   %}
10701   ins_pipe( pipe_cmpxchg );
10702 %}
10703 
10704 instruct compareAndSwapS(rRegI res,
10705                          memory mem_ptr,
10706                          rax_RegI oldval, rRegI newval,
10707                          rFlagsReg cr)
10708 %{
10709   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10710   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10711   effect(KILL cr, KILL oldval);
10712 
10713   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10714             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10715             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10716   ins_encode %{
10717     __ lock();
10718     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10719     __ setcc(Assembler::equal, $res$$Register);
10720   %}
10721   ins_pipe( pipe_cmpxchg );
10722 %}
10723 
10724 instruct compareAndSwapN(rRegI res,
10725                           memory mem_ptr,
10726                           rax_RegN oldval, rRegN newval,
10727                           rFlagsReg cr) %{
10728   predicate(n->as_LoadStore()->barrier_data() == 0);
10729   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10730   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10731   effect(KILL cr, KILL oldval);
10732 
10733   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10734             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10735             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10736   ins_encode %{
10737     __ lock();
10738     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10739     __ setcc(Assembler::equal, $res$$Register);
10740   %}
10741   ins_pipe( pipe_cmpxchg );
10742 %}
10743 
10744 instruct compareAndExchangeB(
10745                          memory mem_ptr,
10746                          rax_RegI oldval, rRegI newval,
10747                          rFlagsReg cr)
10748 %{
10749   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10750   effect(KILL cr);
10751 
10752   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10753             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10754   ins_encode %{
10755     __ lock();
10756     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10757   %}
10758   ins_pipe( pipe_cmpxchg );
10759 %}
10760 
10761 instruct compareAndExchangeS(
10762                          memory mem_ptr,
10763                          rax_RegI oldval, rRegI newval,
10764                          rFlagsReg cr)
10765 %{
10766   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10767   effect(KILL cr);
10768 
10769   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10770             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10771   ins_encode %{
10772     __ lock();
10773     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10774   %}
10775   ins_pipe( pipe_cmpxchg );
10776 %}
10777 
10778 instruct compareAndExchangeI(
10779                          memory mem_ptr,
10780                          rax_RegI oldval, rRegI newval,
10781                          rFlagsReg cr)
10782 %{
10783   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10784   effect(KILL cr);
10785 
10786   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10787             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10788   ins_encode %{
10789     __ lock();
10790     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10791   %}
10792   ins_pipe( pipe_cmpxchg );
10793 %}
10794 
10795 instruct compareAndExchangeL(
10796                          memory mem_ptr,
10797                          rax_RegL oldval, rRegL newval,
10798                          rFlagsReg cr)
10799 %{
10800   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10801   effect(KILL cr);
10802 
10803   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10804             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10805   ins_encode %{
10806     __ lock();
10807     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10808   %}
10809   ins_pipe( pipe_cmpxchg );
10810 %}
10811 
10812 instruct compareAndExchangeN(
10813                           memory mem_ptr,
10814                           rax_RegN oldval, rRegN newval,
10815                           rFlagsReg cr) %{
10816   predicate(n->as_LoadStore()->barrier_data() == 0);
10817   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10818   effect(KILL cr);
10819 
10820   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10821             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10822   ins_encode %{
10823     __ lock();
10824     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10825   %}
10826   ins_pipe( pipe_cmpxchg );
10827 %}
10828 
10829 instruct compareAndExchangeP(
10830                          memory mem_ptr,
10831                          rax_RegP oldval, rRegP newval,
10832                          rFlagsReg cr)
10833 %{
10834   predicate(n->as_LoadStore()->barrier_data() == 0);
10835   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10836   effect(KILL cr);
10837 
10838   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10839             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10840   ins_encode %{
10841     __ lock();
10842     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10843   %}
10844   ins_pipe( pipe_cmpxchg );
10845 %}
10846 
10847 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10848   predicate(n->as_LoadStore()->result_not_used());
10849   match(Set dummy (GetAndAddB mem add));
10850   effect(KILL cr);
10851   format %{ "addb_lock   $mem, $add" %}
10852   ins_encode %{
10853     __ lock();
10854     __ addb($mem$$Address, $add$$Register);
10855   %}
10856   ins_pipe(pipe_cmpxchg);
10857 %}
10858 
10859 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10860   predicate(n->as_LoadStore()->result_not_used());
10861   match(Set dummy (GetAndAddB mem add));
10862   effect(KILL cr);
10863   format %{ "addb_lock   $mem, $add" %}
10864   ins_encode %{
10865     __ lock();
10866     __ addb($mem$$Address, $add$$constant);
10867   %}
10868   ins_pipe(pipe_cmpxchg);
10869 %}
10870 
10871 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10872   predicate(!n->as_LoadStore()->result_not_used());
10873   match(Set newval (GetAndAddB mem newval));
10874   effect(KILL cr);
10875   format %{ "xaddb_lock  $mem, $newval" %}
10876   ins_encode %{
10877     __ lock();
10878     __ xaddb($mem$$Address, $newval$$Register);
10879   %}
10880   ins_pipe(pipe_cmpxchg);
10881 %}
10882 
10883 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10884   predicate(n->as_LoadStore()->result_not_used());
10885   match(Set dummy (GetAndAddS mem add));
10886   effect(KILL cr);
10887   format %{ "addw_lock   $mem, $add" %}
10888   ins_encode %{
10889     __ lock();
10890     __ addw($mem$$Address, $add$$Register);
10891   %}
10892   ins_pipe(pipe_cmpxchg);
10893 %}
10894 
10895 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10896   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10897   match(Set dummy (GetAndAddS mem add));
10898   effect(KILL cr);
10899   format %{ "addw_lock   $mem, $add" %}
10900   ins_encode %{
10901     __ lock();
10902     __ addw($mem$$Address, $add$$constant);
10903   %}
10904   ins_pipe(pipe_cmpxchg);
10905 %}
10906 
10907 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10908   predicate(!n->as_LoadStore()->result_not_used());
10909   match(Set newval (GetAndAddS mem newval));
10910   effect(KILL cr);
10911   format %{ "xaddw_lock  $mem, $newval" %}
10912   ins_encode %{
10913     __ lock();
10914     __ xaddw($mem$$Address, $newval$$Register);
10915   %}
10916   ins_pipe(pipe_cmpxchg);
10917 %}
10918 
10919 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10920   predicate(n->as_LoadStore()->result_not_used());
10921   match(Set dummy (GetAndAddI mem add));
10922   effect(KILL cr);
10923   format %{ "addl_lock   $mem, $add" %}
10924   ins_encode %{
10925     __ lock();
10926     __ addl($mem$$Address, $add$$Register);
10927   %}
10928   ins_pipe(pipe_cmpxchg);
10929 %}
10930 
10931 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10932   predicate(n->as_LoadStore()->result_not_used());
10933   match(Set dummy (GetAndAddI mem add));
10934   effect(KILL cr);
10935   format %{ "addl_lock   $mem, $add" %}
10936   ins_encode %{
10937     __ lock();
10938     __ addl($mem$$Address, $add$$constant);
10939   %}
10940   ins_pipe(pipe_cmpxchg);
10941 %}
10942 
10943 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10944   predicate(!n->as_LoadStore()->result_not_used());
10945   match(Set newval (GetAndAddI mem newval));
10946   effect(KILL cr);
10947   format %{ "xaddl_lock  $mem, $newval" %}
10948   ins_encode %{
10949     __ lock();
10950     __ xaddl($mem$$Address, $newval$$Register);
10951   %}
10952   ins_pipe(pipe_cmpxchg);
10953 %}
10954 
10955 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10956   predicate(n->as_LoadStore()->result_not_used());
10957   match(Set dummy (GetAndAddL mem add));
10958   effect(KILL cr);
10959   format %{ "addq_lock   $mem, $add" %}
10960   ins_encode %{
10961     __ lock();
10962     __ addq($mem$$Address, $add$$Register);
10963   %}
10964   ins_pipe(pipe_cmpxchg);
10965 %}
10966 
10967 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10968   predicate(n->as_LoadStore()->result_not_used());
10969   match(Set dummy (GetAndAddL mem add));
10970   effect(KILL cr);
10971   format %{ "addq_lock   $mem, $add" %}
10972   ins_encode %{
10973     __ lock();
10974     __ addq($mem$$Address, $add$$constant);
10975   %}
10976   ins_pipe(pipe_cmpxchg);
10977 %}
10978 
10979 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10980   predicate(!n->as_LoadStore()->result_not_used());
10981   match(Set newval (GetAndAddL mem newval));
10982   effect(KILL cr);
10983   format %{ "xaddq_lock  $mem, $newval" %}
10984   ins_encode %{
10985     __ lock();
10986     __ xaddq($mem$$Address, $newval$$Register);
10987   %}
10988   ins_pipe(pipe_cmpxchg);
10989 %}
10990 
10991 instruct xchgB( memory mem, rRegI newval) %{
10992   match(Set newval (GetAndSetB mem newval));
10993   format %{ "XCHGB  $newval,[$mem]" %}
10994   ins_encode %{
10995     __ xchgb($newval$$Register, $mem$$Address);
10996   %}
10997   ins_pipe( pipe_cmpxchg );
10998 %}
10999 
11000 instruct xchgS( memory mem, rRegI newval) %{
11001   match(Set newval (GetAndSetS mem newval));
11002   format %{ "XCHGW  $newval,[$mem]" %}
11003   ins_encode %{
11004     __ xchgw($newval$$Register, $mem$$Address);
11005   %}
11006   ins_pipe( pipe_cmpxchg );
11007 %}
11008 
11009 instruct xchgI( memory mem, rRegI newval) %{
11010   match(Set newval (GetAndSetI mem newval));
11011   format %{ "XCHGL  $newval,[$mem]" %}
11012   ins_encode %{
11013     __ xchgl($newval$$Register, $mem$$Address);
11014   %}
11015   ins_pipe( pipe_cmpxchg );
11016 %}
11017 
11018 instruct xchgL( memory mem, rRegL newval) %{
11019   match(Set newval (GetAndSetL mem newval));
11020   format %{ "XCHGL  $newval,[$mem]" %}
11021   ins_encode %{
11022     __ xchgq($newval$$Register, $mem$$Address);
11023   %}
11024   ins_pipe( pipe_cmpxchg );
11025 %}
11026 
11027 instruct xchgP( memory mem, rRegP newval) %{
11028   match(Set newval (GetAndSetP mem newval));
11029   predicate(n->as_LoadStore()->barrier_data() == 0);
11030   format %{ "XCHGQ  $newval,[$mem]" %}
11031   ins_encode %{
11032     __ xchgq($newval$$Register, $mem$$Address);
11033   %}
11034   ins_pipe( pipe_cmpxchg );
11035 %}
11036 
11037 instruct xchgN( memory mem, rRegN newval) %{
11038   predicate(n->as_LoadStore()->barrier_data() == 0);
11039   match(Set newval (GetAndSetN mem newval));
11040   format %{ "XCHGL  $newval,$mem]" %}
11041   ins_encode %{
11042     __ xchgl($newval$$Register, $mem$$Address);
11043   %}
11044   ins_pipe( pipe_cmpxchg );
11045 %}
11046 
11047 //----------Abs Instructions-------------------------------------------
11048 
11049 // Integer Absolute Instructions
11050 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11051 %{
11052   match(Set dst (AbsI src));
11053   effect(TEMP dst, KILL cr);
11054   format %{ "xorl    $dst, $dst\t# abs int\n\t"
11055             "subl    $dst, $src\n\t"
11056             "cmovll  $dst, $src" %}
11057   ins_encode %{
11058     __ xorl($dst$$Register, $dst$$Register);
11059     __ subl($dst$$Register, $src$$Register);
11060     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11061   %}
11062 
11063   ins_pipe(ialu_reg_reg);
11064 %}
11065 
11066 // Long Absolute Instructions
11067 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11068 %{
11069   match(Set dst (AbsL src));
11070   effect(TEMP dst, KILL cr);
11071   format %{ "xorl    $dst, $dst\t# abs long\n\t"
11072             "subq    $dst, $src\n\t"
11073             "cmovlq  $dst, $src" %}
11074   ins_encode %{
11075     __ xorl($dst$$Register, $dst$$Register);
11076     __ subq($dst$$Register, $src$$Register);
11077     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11078   %}
11079 
11080   ins_pipe(ialu_reg_reg);
11081 %}
11082 
11083 //----------Subtraction Instructions-------------------------------------------
11084 
11085 // Integer Subtraction Instructions
11086 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11087 %{
11088   predicate(!UseAPX);
11089   match(Set dst (SubI dst src));
11090   effect(KILL cr);
11091   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11092 
11093   format %{ "subl    $dst, $src\t# int" %}
11094   ins_encode %{
11095     __ subl($dst$$Register, $src$$Register);
11096   %}
11097   ins_pipe(ialu_reg_reg);
11098 %}
11099 
11100 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11101 %{
11102   predicate(UseAPX);
11103   match(Set dst (SubI src1 src2));
11104   effect(KILL cr);
11105   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11106 
11107   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11108   ins_encode %{
11109     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11110   %}
11111   ins_pipe(ialu_reg_reg);
11112 %}
11113 
11114 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11115 %{
11116   predicate(UseAPX);
11117   match(Set dst (SubI src1 src2));
11118   effect(KILL cr);
11119   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11120 
11121   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11122   ins_encode %{
11123     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11124   %}
11125   ins_pipe(ialu_reg_reg);
11126 %}
11127 
11128 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11129 %{
11130   predicate(UseAPX);
11131   match(Set dst (SubI (LoadI src1) src2));
11132   effect(KILL cr);
11133   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11134 
11135   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11136   ins_encode %{
11137     __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11138   %}
11139   ins_pipe(ialu_reg_reg);
11140 %}
11141 
11142 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11143 %{
11144   predicate(!UseAPX);
11145   match(Set dst (SubI dst (LoadI src)));
11146   effect(KILL cr);
11147   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11148 
11149   ins_cost(150);
11150   format %{ "subl    $dst, $src\t# int" %}
11151   ins_encode %{
11152     __ subl($dst$$Register, $src$$Address);
11153   %}
11154   ins_pipe(ialu_reg_mem);
11155 %}
11156 
11157 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11158 %{
11159   predicate(UseAPX);
11160   match(Set dst (SubI src1 (LoadI src2)));
11161   effect(KILL cr);
11162   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11163 
11164   ins_cost(150);
11165   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11166   ins_encode %{
11167     __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11168   %}
11169   ins_pipe(ialu_reg_mem);
11170 %}
11171 
11172 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11173 %{
11174   predicate(UseAPX);
11175   match(Set dst (SubI (LoadI src1) src2));
11176   effect(KILL cr);
11177   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11178 
11179   ins_cost(150);
11180   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11181   ins_encode %{
11182     __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11183   %}
11184   ins_pipe(ialu_reg_mem);
11185 %}
11186 
11187 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11188 %{
11189   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11190   effect(KILL cr);
11191   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11192 
11193   ins_cost(150);
11194   format %{ "subl    $dst, $src\t# int" %}
11195   ins_encode %{
11196     __ subl($dst$$Address, $src$$Register);
11197   %}
11198   ins_pipe(ialu_mem_reg);
11199 %}
11200 
11201 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11202 %{
11203   predicate(!UseAPX);
11204   match(Set dst (SubL dst src));
11205   effect(KILL cr);
11206   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11207 
11208   format %{ "subq    $dst, $src\t# long" %}
11209   ins_encode %{
11210     __ subq($dst$$Register, $src$$Register);
11211   %}
11212   ins_pipe(ialu_reg_reg);
11213 %}
11214 
11215 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11216 %{
11217   predicate(UseAPX);
11218   match(Set dst (SubL src1 src2));
11219   effect(KILL cr);
11220   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11221 
11222   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11223   ins_encode %{
11224     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11225   %}
11226   ins_pipe(ialu_reg_reg);
11227 %}
11228 
11229 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11230 %{
11231   predicate(UseAPX);
11232   match(Set dst (SubL src1 src2));
11233   effect(KILL cr);
11234   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11235 
11236   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11237   ins_encode %{
11238     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11239   %}
11240   ins_pipe(ialu_reg_reg);
11241 %}
11242 
11243 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11244 %{
11245   predicate(UseAPX);
11246   match(Set dst (SubL (LoadL src1) src2));
11247   effect(KILL cr);
11248   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11249 
11250   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11251   ins_encode %{
11252     __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11253   %}
11254   ins_pipe(ialu_reg_reg);
11255 %}
11256 
11257 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11258 %{
11259   predicate(!UseAPX);
11260   match(Set dst (SubL dst (LoadL src)));
11261   effect(KILL cr);
11262   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11263 
11264   ins_cost(150);
11265   format %{ "subq    $dst, $src\t# long" %}
11266   ins_encode %{
11267     __ subq($dst$$Register, $src$$Address);
11268   %}
11269   ins_pipe(ialu_reg_mem);
11270 %}
11271 
11272 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11273 %{
11274   predicate(UseAPX);
11275   match(Set dst (SubL src1 (LoadL src2)));
11276   effect(KILL cr);
11277   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11278 
11279   ins_cost(150);
11280   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11281   ins_encode %{
11282     __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11283   %}
11284   ins_pipe(ialu_reg_mem);
11285 %}
11286 
11287 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11288 %{
11289   predicate(UseAPX);
11290   match(Set dst (SubL (LoadL src1) src2));
11291   effect(KILL cr);
11292   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11293 
11294   ins_cost(150);
11295   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11296   ins_encode %{
11297     __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11298   %}
11299   ins_pipe(ialu_reg_mem);
11300 %}
11301 
11302 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11303 %{
11304   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11305   effect(KILL cr);
11306   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11307 
11308   ins_cost(150);
11309   format %{ "subq    $dst, $src\t# long" %}
11310   ins_encode %{
11311     __ subq($dst$$Address, $src$$Register);
11312   %}
11313   ins_pipe(ialu_mem_reg);
11314 %}
11315 
11316 // Subtract from a pointer
11317 // XXX hmpf???
11318 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11319 %{
11320   match(Set dst (AddP dst (SubI zero src)));
11321   effect(KILL cr);
11322 
11323   format %{ "subq    $dst, $src\t# ptr - int" %}
11324   ins_encode %{
11325     __ subq($dst$$Register, $src$$Register);
11326   %}
11327   ins_pipe(ialu_reg_reg);
11328 %}
11329 
11330 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11331 %{
11332   predicate(!UseAPX);
11333   match(Set dst (SubI zero dst));
11334   effect(KILL cr);
11335   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11336 
11337   format %{ "negl    $dst\t# int" %}
11338   ins_encode %{
11339     __ negl($dst$$Register);
11340   %}
11341   ins_pipe(ialu_reg);
11342 %}
11343 
11344 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11345 %{
11346   predicate(UseAPX);
11347   match(Set dst (SubI zero src));
11348   effect(KILL cr);
11349   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11350 
11351   format %{ "enegl    $dst, $src\t# int ndd" %}
11352   ins_encode %{
11353     __ enegl($dst$$Register, $src$$Register, false);
11354   %}
11355   ins_pipe(ialu_reg);
11356 %}
11357 
11358 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11359 %{
11360   predicate(!UseAPX);
11361   match(Set dst (NegI dst));
11362   effect(KILL cr);
11363   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11364 
11365   format %{ "negl    $dst\t# int" %}
11366   ins_encode %{
11367     __ negl($dst$$Register);
11368   %}
11369   ins_pipe(ialu_reg);
11370 %}
11371 
11372 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11373 %{
11374   predicate(UseAPX);
11375   match(Set dst (NegI src));
11376   effect(KILL cr);
11377   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11378 
11379   format %{ "enegl    $dst, $src\t# int ndd" %}
11380   ins_encode %{
11381     __ enegl($dst$$Register, $src$$Register, false);
11382   %}
11383   ins_pipe(ialu_reg);
11384 %}
11385 
11386 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11387 %{
11388   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11389   effect(KILL cr);
11390   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11391 
11392   format %{ "negl    $dst\t# int" %}
11393   ins_encode %{
11394     __ negl($dst$$Address);
11395   %}
11396   ins_pipe(ialu_reg);
11397 %}
11398 
11399 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11400 %{
11401   predicate(!UseAPX);
11402   match(Set dst (SubL zero dst));
11403   effect(KILL cr);
11404   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11405 
11406   format %{ "negq    $dst\t# long" %}
11407   ins_encode %{
11408     __ negq($dst$$Register);
11409   %}
11410   ins_pipe(ialu_reg);
11411 %}
11412 
11413 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11414 %{
11415   predicate(UseAPX);
11416   match(Set dst (SubL zero src));
11417   effect(KILL cr);
11418   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11419 
11420   format %{ "enegq    $dst, $src\t# long ndd" %}
11421   ins_encode %{
11422     __ enegq($dst$$Register, $src$$Register, false);
11423   %}
11424   ins_pipe(ialu_reg);
11425 %}
11426 
11427 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11428 %{
11429   predicate(!UseAPX);
11430   match(Set dst (NegL dst));
11431   effect(KILL cr);
11432   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11433 
11434   format %{ "negq    $dst\t# int" %}
11435   ins_encode %{
11436     __ negq($dst$$Register);
11437   %}
11438   ins_pipe(ialu_reg);
11439 %}
11440 
11441 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11442 %{
11443   predicate(UseAPX);
11444   match(Set dst (NegL src));
11445   effect(KILL cr);
11446   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11447 
11448   format %{ "enegq    $dst, $src\t# long ndd" %}
11449   ins_encode %{
11450     __ enegq($dst$$Register, $src$$Register, false);
11451   %}
11452   ins_pipe(ialu_reg);
11453 %}
11454 
11455 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11456 %{
11457   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11458   effect(KILL cr);
11459   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11460 
11461   format %{ "negq    $dst\t# long" %}
11462   ins_encode %{
11463     __ negq($dst$$Address);
11464   %}
11465   ins_pipe(ialu_reg);
11466 %}
11467 
11468 //----------Multiplication/Division Instructions-------------------------------
11469 // Integer Multiplication Instructions
11470 // Multiply Register
11471 
11472 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11473 %{
11474   predicate(!UseAPX);
11475   match(Set dst (MulI dst src));
11476   effect(KILL cr);
11477 
11478   ins_cost(300);
11479   format %{ "imull   $dst, $src\t# int" %}
11480   ins_encode %{
11481     __ imull($dst$$Register, $src$$Register);
11482   %}
11483   ins_pipe(ialu_reg_reg_alu0);
11484 %}
11485 
11486 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11487 %{
11488   predicate(UseAPX);
11489   match(Set dst (MulI src1 src2));
11490   effect(KILL cr);
11491   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11492 
11493   ins_cost(300);
11494   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11495   ins_encode %{
11496     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11497   %}
11498   ins_pipe(ialu_reg_reg_alu0);
11499 %}
11500 
11501 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11502 %{
11503   match(Set dst (MulI src imm));
11504   effect(KILL cr);
11505 
11506   ins_cost(300);
11507   format %{ "imull   $dst, $src, $imm\t# int" %}
11508   ins_encode %{
11509     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11510   %}
11511   ins_pipe(ialu_reg_reg_alu0);
11512 %}
11513 
11514 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11515 %{
11516   predicate(!UseAPX);
11517   match(Set dst (MulI dst (LoadI src)));
11518   effect(KILL cr);
11519 
11520   ins_cost(350);
11521   format %{ "imull   $dst, $src\t# int" %}
11522   ins_encode %{
11523     __ imull($dst$$Register, $src$$Address);
11524   %}
11525   ins_pipe(ialu_reg_mem_alu0);
11526 %}
11527 
11528 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11529 %{
11530   predicate(UseAPX);
11531   match(Set dst (MulI src1 (LoadI src2)));
11532   effect(KILL cr);
11533   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11534 
11535   ins_cost(350);
11536   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11537   ins_encode %{
11538     __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11539   %}
11540   ins_pipe(ialu_reg_mem_alu0);
11541 %}
11542 
11543 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11544 %{
11545   match(Set dst (MulI (LoadI src) imm));
11546   effect(KILL cr);
11547 
11548   ins_cost(300);
11549   format %{ "imull   $dst, $src, $imm\t# int" %}
11550   ins_encode %{
11551     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11552   %}
11553   ins_pipe(ialu_reg_mem_alu0);
11554 %}
11555 
11556 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11557 %{
11558   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11559   effect(KILL cr, KILL src2);
11560 
11561   expand %{ mulI_rReg(dst, src1, cr);
11562            mulI_rReg(src2, src3, cr);
11563            addI_rReg(dst, src2, cr); %}
11564 %}
11565 
11566 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11567 %{
11568   predicate(!UseAPX);
11569   match(Set dst (MulL dst src));
11570   effect(KILL cr);
11571 
11572   ins_cost(300);
11573   format %{ "imulq   $dst, $src\t# long" %}
11574   ins_encode %{
11575     __ imulq($dst$$Register, $src$$Register);
11576   %}
11577   ins_pipe(ialu_reg_reg_alu0);
11578 %}
11579 
11580 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11581 %{
11582   predicate(UseAPX);
11583   match(Set dst (MulL src1 src2));
11584   effect(KILL cr);
11585   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11586 
11587   ins_cost(300);
11588   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11589   ins_encode %{
11590     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11591   %}
11592   ins_pipe(ialu_reg_reg_alu0);
11593 %}
11594 
11595 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11596 %{
11597   match(Set dst (MulL src imm));
11598   effect(KILL cr);
11599 
11600   ins_cost(300);
11601   format %{ "imulq   $dst, $src, $imm\t# long" %}
11602   ins_encode %{
11603     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11604   %}
11605   ins_pipe(ialu_reg_reg_alu0);
11606 %}
11607 
11608 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11609 %{
11610   predicate(!UseAPX);
11611   match(Set dst (MulL dst (LoadL src)));
11612   effect(KILL cr);
11613 
11614   ins_cost(350);
11615   format %{ "imulq   $dst, $src\t# long" %}
11616   ins_encode %{
11617     __ imulq($dst$$Register, $src$$Address);
11618   %}
11619   ins_pipe(ialu_reg_mem_alu0);
11620 %}
11621 
11622 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11623 %{
11624   predicate(UseAPX);
11625   match(Set dst (MulL src1 (LoadL src2)));
11626   effect(KILL cr);
11627   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11628 
11629   ins_cost(350);
11630   format %{ "eimulq   $dst, $src1, $src2 \t# long" %}
11631   ins_encode %{
11632     __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11633   %}
11634   ins_pipe(ialu_reg_mem_alu0);
11635 %}
11636 
11637 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11638 %{
11639   match(Set dst (MulL (LoadL src) imm));
11640   effect(KILL cr);
11641 
11642   ins_cost(300);
11643   format %{ "imulq   $dst, $src, $imm\t# long" %}
11644   ins_encode %{
11645     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11646   %}
11647   ins_pipe(ialu_reg_mem_alu0);
11648 %}
11649 
11650 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11651 %{
11652   match(Set dst (MulHiL src rax));
11653   effect(USE_KILL rax, KILL cr);
11654 
11655   ins_cost(300);
11656   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11657   ins_encode %{
11658     __ imulq($src$$Register);
11659   %}
11660   ins_pipe(ialu_reg_reg_alu0);
11661 %}
11662 
11663 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11664 %{
11665   match(Set dst (UMulHiL src rax));
11666   effect(USE_KILL rax, KILL cr);
11667 
11668   ins_cost(300);
11669   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11670   ins_encode %{
11671     __ mulq($src$$Register);
11672   %}
11673   ins_pipe(ialu_reg_reg_alu0);
11674 %}
11675 
11676 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11677                    rFlagsReg cr)
11678 %{
11679   match(Set rax (DivI rax div));
11680   effect(KILL rdx, KILL cr);
11681 
11682   ins_cost(30*100+10*100); // XXX
11683   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11684             "jne,s   normal\n\t"
11685             "xorl    rdx, rdx\n\t"
11686             "cmpl    $div, -1\n\t"
11687             "je,s    done\n"
11688     "normal: cdql\n\t"
11689             "idivl   $div\n"
11690     "done:"        %}
11691   ins_encode(cdql_enc(div));
11692   ins_pipe(ialu_reg_reg_alu0);
11693 %}
11694 
11695 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11696                    rFlagsReg cr)
11697 %{
11698   match(Set rax (DivL rax div));
11699   effect(KILL rdx, KILL cr);
11700 
11701   ins_cost(30*100+10*100); // XXX
11702   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11703             "cmpq    rax, rdx\n\t"
11704             "jne,s   normal\n\t"
11705             "xorl    rdx, rdx\n\t"
11706             "cmpq    $div, -1\n\t"
11707             "je,s    done\n"
11708     "normal: cdqq\n\t"
11709             "idivq   $div\n"
11710     "done:"        %}
11711   ins_encode(cdqq_enc(div));
11712   ins_pipe(ialu_reg_reg_alu0);
11713 %}
11714 
11715 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11716 %{
11717   match(Set rax (UDivI rax div));
11718   effect(KILL rdx, KILL cr);
11719 
11720   ins_cost(300);
11721   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11722   ins_encode %{
11723     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11724   %}
11725   ins_pipe(ialu_reg_reg_alu0);
11726 %}
11727 
11728 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11729 %{
11730   match(Set rax (UDivL rax div));
11731   effect(KILL rdx, KILL cr);
11732 
11733   ins_cost(300);
11734   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11735   ins_encode %{
11736      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11737   %}
11738   ins_pipe(ialu_reg_reg_alu0);
11739 %}
11740 
11741 // Integer DIVMOD with Register, both quotient and mod results
11742 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11743                              rFlagsReg cr)
11744 %{
11745   match(DivModI rax div);
11746   effect(KILL cr);
11747 
11748   ins_cost(30*100+10*100); // XXX
11749   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11750             "jne,s   normal\n\t"
11751             "xorl    rdx, rdx\n\t"
11752             "cmpl    $div, -1\n\t"
11753             "je,s    done\n"
11754     "normal: cdql\n\t"
11755             "idivl   $div\n"
11756     "done:"        %}
11757   ins_encode(cdql_enc(div));
11758   ins_pipe(pipe_slow);
11759 %}
11760 
11761 // Long DIVMOD with Register, both quotient and mod results
11762 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11763                              rFlagsReg cr)
11764 %{
11765   match(DivModL rax div);
11766   effect(KILL cr);
11767 
11768   ins_cost(30*100+10*100); // XXX
11769   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11770             "cmpq    rax, rdx\n\t"
11771             "jne,s   normal\n\t"
11772             "xorl    rdx, rdx\n\t"
11773             "cmpq    $div, -1\n\t"
11774             "je,s    done\n"
11775     "normal: cdqq\n\t"
11776             "idivq   $div\n"
11777     "done:"        %}
11778   ins_encode(cdqq_enc(div));
11779   ins_pipe(pipe_slow);
11780 %}
11781 
11782 // Unsigned integer DIVMOD with Register, both quotient and mod results
11783 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11784                               no_rax_rdx_RegI div, rFlagsReg cr)
11785 %{
11786   match(UDivModI rax div);
11787   effect(TEMP tmp, KILL cr);
11788 
11789   ins_cost(300);
11790   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11791             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11792           %}
11793   ins_encode %{
11794     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11795   %}
11796   ins_pipe(pipe_slow);
11797 %}
11798 
11799 // Unsigned long DIVMOD with Register, both quotient and mod results
11800 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11801                               no_rax_rdx_RegL div, rFlagsReg cr)
11802 %{
11803   match(UDivModL rax div);
11804   effect(TEMP tmp, KILL cr);
11805 
11806   ins_cost(300);
11807   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11808             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11809           %}
11810   ins_encode %{
11811     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11812   %}
11813   ins_pipe(pipe_slow);
11814 %}
11815 
11816 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11817                    rFlagsReg cr)
11818 %{
11819   match(Set rdx (ModI rax div));
11820   effect(KILL rax, KILL cr);
11821 
11822   ins_cost(300); // XXX
11823   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11824             "jne,s   normal\n\t"
11825             "xorl    rdx, rdx\n\t"
11826             "cmpl    $div, -1\n\t"
11827             "je,s    done\n"
11828     "normal: cdql\n\t"
11829             "idivl   $div\n"
11830     "done:"        %}
11831   ins_encode(cdql_enc(div));
11832   ins_pipe(ialu_reg_reg_alu0);
11833 %}
11834 
11835 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11836                    rFlagsReg cr)
11837 %{
11838   match(Set rdx (ModL rax div));
11839   effect(KILL rax, KILL cr);
11840 
11841   ins_cost(300); // XXX
11842   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11843             "cmpq    rax, rdx\n\t"
11844             "jne,s   normal\n\t"
11845             "xorl    rdx, rdx\n\t"
11846             "cmpq    $div, -1\n\t"
11847             "je,s    done\n"
11848     "normal: cdqq\n\t"
11849             "idivq   $div\n"
11850     "done:"        %}
11851   ins_encode(cdqq_enc(div));
11852   ins_pipe(ialu_reg_reg_alu0);
11853 %}
11854 
11855 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11856 %{
11857   match(Set rdx (UModI rax div));
11858   effect(KILL rax, KILL cr);
11859 
11860   ins_cost(300);
11861   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11862   ins_encode %{
11863     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11864   %}
11865   ins_pipe(ialu_reg_reg_alu0);
11866 %}
11867 
11868 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11869 %{
11870   match(Set rdx (UModL rax div));
11871   effect(KILL rax, KILL cr);
11872 
11873   ins_cost(300);
11874   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11875   ins_encode %{
11876     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11877   %}
11878   ins_pipe(ialu_reg_reg_alu0);
11879 %}
11880 
11881 // Integer Shift Instructions
11882 // Shift Left by one, two, three
11883 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11884 %{
11885   predicate(!UseAPX);
11886   match(Set dst (LShiftI dst shift));
11887   effect(KILL cr);
11888 
11889   format %{ "sall    $dst, $shift" %}
11890   ins_encode %{
11891     __ sall($dst$$Register, $shift$$constant);
11892   %}
11893   ins_pipe(ialu_reg);
11894 %}
11895 
11896 // Shift Left by one, two, three
11897 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11898 %{
11899   predicate(UseAPX);
11900   match(Set dst (LShiftI src shift));
11901   effect(KILL cr);
11902   flag(PD::Flag_ndd_demotable_opr1);
11903 
11904   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
11905   ins_encode %{
11906     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11907   %}
11908   ins_pipe(ialu_reg);
11909 %}
11910 
11911 // Shift Left by 8-bit immediate
11912 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11913 %{
11914   predicate(!UseAPX);
11915   match(Set dst (LShiftI dst shift));
11916   effect(KILL cr);
11917 
11918   format %{ "sall    $dst, $shift" %}
11919   ins_encode %{
11920     __ sall($dst$$Register, $shift$$constant);
11921   %}
11922   ins_pipe(ialu_reg);
11923 %}
11924 
11925 // Shift Left by 8-bit immediate
11926 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11927 %{
11928   predicate(UseAPX);
11929   match(Set dst (LShiftI src shift));
11930   effect(KILL cr);
11931   flag(PD::Flag_ndd_demotable_opr1);
11932 
11933   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11934   ins_encode %{
11935     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11936   %}
11937   ins_pipe(ialu_reg);
11938 %}
11939 
11940 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11941 %{
11942   predicate(UseAPX);
11943   match(Set dst (LShiftI (LoadI src) shift));
11944   effect(KILL cr);
11945 
11946   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11947   ins_encode %{
11948     __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11949   %}
11950   ins_pipe(ialu_reg);
11951 %}
11952 
11953 // Shift Left by 8-bit immediate
11954 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11955 %{
11956   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11957   effect(KILL cr);
11958 
11959   format %{ "sall    $dst, $shift" %}
11960   ins_encode %{
11961     __ sall($dst$$Address, $shift$$constant);
11962   %}
11963   ins_pipe(ialu_mem_imm);
11964 %}
11965 
11966 // Shift Left by variable
11967 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11968 %{
11969   predicate(!VM_Version::supports_bmi2());
11970   match(Set dst (LShiftI dst shift));
11971   effect(KILL cr);
11972 
11973   format %{ "sall    $dst, $shift" %}
11974   ins_encode %{
11975     __ sall($dst$$Register);
11976   %}
11977   ins_pipe(ialu_reg_reg);
11978 %}
11979 
11980 // Shift Left by variable
11981 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11982 %{
11983   predicate(!VM_Version::supports_bmi2());
11984   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11985   effect(KILL cr);
11986 
11987   format %{ "sall    $dst, $shift" %}
11988   ins_encode %{
11989     __ sall($dst$$Address);
11990   %}
11991   ins_pipe(ialu_mem_reg);
11992 %}
11993 
11994 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11995 %{
11996   predicate(VM_Version::supports_bmi2());
11997   match(Set dst (LShiftI src shift));
11998 
11999   format %{ "shlxl   $dst, $src, $shift" %}
12000   ins_encode %{
12001     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12002   %}
12003   ins_pipe(ialu_reg_reg);
12004 %}
12005 
12006 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12007 %{
12008   predicate(VM_Version::supports_bmi2());
12009   match(Set dst (LShiftI (LoadI src) shift));
12010   ins_cost(175);
12011   format %{ "shlxl   $dst, $src, $shift" %}
12012   ins_encode %{
12013     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12014   %}
12015   ins_pipe(ialu_reg_mem);
12016 %}
12017 
12018 // Arithmetic Shift Right by 8-bit immediate
12019 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12020 %{
12021   predicate(!UseAPX);
12022   match(Set dst (RShiftI dst shift));
12023   effect(KILL cr);
12024 
12025   format %{ "sarl    $dst, $shift" %}
12026   ins_encode %{
12027     __ sarl($dst$$Register, $shift$$constant);
12028   %}
12029   ins_pipe(ialu_mem_imm);
12030 %}
12031 
12032 // Arithmetic Shift Right by 8-bit immediate
12033 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12034 %{
12035   predicate(UseAPX);
12036   match(Set dst (RShiftI src shift));
12037   effect(KILL cr);
12038   flag(PD::Flag_ndd_demotable_opr1);
12039 
12040   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12041   ins_encode %{
12042     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12043   %}
12044   ins_pipe(ialu_mem_imm);
12045 %}
12046 
12047 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12048 %{
12049   predicate(UseAPX);
12050   match(Set dst (RShiftI (LoadI src) shift));
12051   effect(KILL cr);
12052 
12053   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12054   ins_encode %{
12055     __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12056   %}
12057   ins_pipe(ialu_mem_imm);
12058 %}
12059 
12060 // Arithmetic Shift Right by 8-bit immediate
12061 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12062 %{
12063   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12064   effect(KILL cr);
12065 
12066   format %{ "sarl    $dst, $shift" %}
12067   ins_encode %{
12068     __ sarl($dst$$Address, $shift$$constant);
12069   %}
12070   ins_pipe(ialu_mem_imm);
12071 %}
12072 
12073 // Arithmetic Shift Right by variable
12074 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12075 %{
12076   predicate(!VM_Version::supports_bmi2());
12077   match(Set dst (RShiftI dst shift));
12078   effect(KILL cr);
12079 
12080   format %{ "sarl    $dst, $shift" %}
12081   ins_encode %{
12082     __ sarl($dst$$Register);
12083   %}
12084   ins_pipe(ialu_reg_reg);
12085 %}
12086 
12087 // Arithmetic Shift Right by variable
12088 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12089 %{
12090   predicate(!VM_Version::supports_bmi2());
12091   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12092   effect(KILL cr);
12093 
12094   format %{ "sarl    $dst, $shift" %}
12095   ins_encode %{
12096     __ sarl($dst$$Address);
12097   %}
12098   ins_pipe(ialu_mem_reg);
12099 %}
12100 
12101 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12102 %{
12103   predicate(VM_Version::supports_bmi2());
12104   match(Set dst (RShiftI src shift));
12105 
12106   format %{ "sarxl   $dst, $src, $shift" %}
12107   ins_encode %{
12108     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12109   %}
12110   ins_pipe(ialu_reg_reg);
12111 %}
12112 
12113 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12114 %{
12115   predicate(VM_Version::supports_bmi2());
12116   match(Set dst (RShiftI (LoadI src) shift));
12117   ins_cost(175);
12118   format %{ "sarxl   $dst, $src, $shift" %}
12119   ins_encode %{
12120     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12121   %}
12122   ins_pipe(ialu_reg_mem);
12123 %}
12124 
12125 // Logical Shift Right by 8-bit immediate
12126 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12127 %{
12128   predicate(!UseAPX);
12129   match(Set dst (URShiftI dst shift));
12130   effect(KILL cr);
12131 
12132   format %{ "shrl    $dst, $shift" %}
12133   ins_encode %{
12134     __ shrl($dst$$Register, $shift$$constant);
12135   %}
12136   ins_pipe(ialu_reg);
12137 %}
12138 
12139 // Logical Shift Right by 8-bit immediate
12140 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12141 %{
12142   predicate(UseAPX);
12143   match(Set dst (URShiftI src shift));
12144   effect(KILL cr);
12145   flag(PD::Flag_ndd_demotable_opr1);
12146 
12147   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12148   ins_encode %{
12149     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12150   %}
12151   ins_pipe(ialu_reg);
12152 %}
12153 
12154 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12155 %{
12156   predicate(UseAPX);
12157   match(Set dst (URShiftI (LoadI src) shift));
12158   effect(KILL cr);
12159 
12160   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12161   ins_encode %{
12162     __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12163   %}
12164   ins_pipe(ialu_reg);
12165 %}
12166 
12167 // Logical Shift Right by 8-bit immediate
12168 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12169 %{
12170   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12171   effect(KILL cr);
12172 
12173   format %{ "shrl    $dst, $shift" %}
12174   ins_encode %{
12175     __ shrl($dst$$Address, $shift$$constant);
12176   %}
12177   ins_pipe(ialu_mem_imm);
12178 %}
12179 
12180 // Logical Shift Right by variable
12181 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12182 %{
12183   predicate(!VM_Version::supports_bmi2());
12184   match(Set dst (URShiftI dst shift));
12185   effect(KILL cr);
12186 
12187   format %{ "shrl    $dst, $shift" %}
12188   ins_encode %{
12189     __ shrl($dst$$Register);
12190   %}
12191   ins_pipe(ialu_reg_reg);
12192 %}
12193 
12194 // Logical Shift Right by variable
12195 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12196 %{
12197   predicate(!VM_Version::supports_bmi2());
12198   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12199   effect(KILL cr);
12200 
12201   format %{ "shrl    $dst, $shift" %}
12202   ins_encode %{
12203     __ shrl($dst$$Address);
12204   %}
12205   ins_pipe(ialu_mem_reg);
12206 %}
12207 
12208 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12209 %{
12210   predicate(VM_Version::supports_bmi2());
12211   match(Set dst (URShiftI src shift));
12212 
12213   format %{ "shrxl   $dst, $src, $shift" %}
12214   ins_encode %{
12215     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12216   %}
12217   ins_pipe(ialu_reg_reg);
12218 %}
12219 
12220 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12221 %{
12222   predicate(VM_Version::supports_bmi2());
12223   match(Set dst (URShiftI (LoadI src) shift));
12224   ins_cost(175);
12225   format %{ "shrxl   $dst, $src, $shift" %}
12226   ins_encode %{
12227     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12228   %}
12229   ins_pipe(ialu_reg_mem);
12230 %}
12231 
12232 // Long Shift Instructions
12233 // Shift Left by one, two, three
12234 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12235 %{
12236   predicate(!UseAPX);
12237   match(Set dst (LShiftL dst shift));
12238   effect(KILL cr);
12239 
12240   format %{ "salq    $dst, $shift" %}
12241   ins_encode %{
12242     __ salq($dst$$Register, $shift$$constant);
12243   %}
12244   ins_pipe(ialu_reg);
12245 %}
12246 
12247 // Shift Left by one, two, three
12248 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12249 %{
12250   predicate(UseAPX);
12251   match(Set dst (LShiftL src shift));
12252   effect(KILL cr);
12253   flag(PD::Flag_ndd_demotable_opr1);
12254 
12255   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12256   ins_encode %{
12257     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12258   %}
12259   ins_pipe(ialu_reg);
12260 %}
12261 
12262 // Shift Left by 8-bit immediate
12263 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12264 %{
12265   predicate(!UseAPX);
12266   match(Set dst (LShiftL dst shift));
12267   effect(KILL cr);
12268 
12269   format %{ "salq    $dst, $shift" %}
12270   ins_encode %{
12271     __ salq($dst$$Register, $shift$$constant);
12272   %}
12273   ins_pipe(ialu_reg);
12274 %}
12275 
12276 // Shift Left by 8-bit immediate
12277 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12278 %{
12279   predicate(UseAPX);
12280   match(Set dst (LShiftL src shift));
12281   effect(KILL cr);
12282   flag(PD::Flag_ndd_demotable_opr1);
12283 
12284   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12285   ins_encode %{
12286     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12287   %}
12288   ins_pipe(ialu_reg);
12289 %}
12290 
12291 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12292 %{
12293   predicate(UseAPX);
12294   match(Set dst (LShiftL (LoadL src) shift));
12295   effect(KILL cr);
12296 
12297   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12298   ins_encode %{
12299     __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12300   %}
12301   ins_pipe(ialu_reg);
12302 %}
12303 
12304 // Shift Left by 8-bit immediate
12305 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12306 %{
12307   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12308   effect(KILL cr);
12309 
12310   format %{ "salq    $dst, $shift" %}
12311   ins_encode %{
12312     __ salq($dst$$Address, $shift$$constant);
12313   %}
12314   ins_pipe(ialu_mem_imm);
12315 %}
12316 
12317 // Shift Left by variable
12318 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12319 %{
12320   predicate(!VM_Version::supports_bmi2());
12321   match(Set dst (LShiftL dst shift));
12322   effect(KILL cr);
12323 
12324   format %{ "salq    $dst, $shift" %}
12325   ins_encode %{
12326     __ salq($dst$$Register);
12327   %}
12328   ins_pipe(ialu_reg_reg);
12329 %}
12330 
12331 // Shift Left by variable
12332 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12333 %{
12334   predicate(!VM_Version::supports_bmi2());
12335   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12336   effect(KILL cr);
12337 
12338   format %{ "salq    $dst, $shift" %}
12339   ins_encode %{
12340     __ salq($dst$$Address);
12341   %}
12342   ins_pipe(ialu_mem_reg);
12343 %}
12344 
12345 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12346 %{
12347   predicate(VM_Version::supports_bmi2());
12348   match(Set dst (LShiftL src shift));
12349 
12350   format %{ "shlxq   $dst, $src, $shift" %}
12351   ins_encode %{
12352     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12353   %}
12354   ins_pipe(ialu_reg_reg);
12355 %}
12356 
12357 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12358 %{
12359   predicate(VM_Version::supports_bmi2());
12360   match(Set dst (LShiftL (LoadL src) shift));
12361   ins_cost(175);
12362   format %{ "shlxq   $dst, $src, $shift" %}
12363   ins_encode %{
12364     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12365   %}
12366   ins_pipe(ialu_reg_mem);
12367 %}
12368 
12369 // Arithmetic Shift Right by 8-bit immediate
12370 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12371 %{
12372   predicate(!UseAPX);
12373   match(Set dst (RShiftL dst shift));
12374   effect(KILL cr);
12375 
12376   format %{ "sarq    $dst, $shift" %}
12377   ins_encode %{
12378     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12379   %}
12380   ins_pipe(ialu_mem_imm);
12381 %}
12382 
12383 // Arithmetic Shift Right by 8-bit immediate
12384 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12385 %{
12386   predicate(UseAPX);
12387   match(Set dst (RShiftL src shift));
12388   effect(KILL cr);
12389   flag(PD::Flag_ndd_demotable_opr1);
12390 
12391   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12392   ins_encode %{
12393     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12394   %}
12395   ins_pipe(ialu_mem_imm);
12396 %}
12397 
12398 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12399 %{
12400   predicate(UseAPX);
12401   match(Set dst (RShiftL (LoadL src) shift));
12402   effect(KILL cr);
12403 
12404   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12405   ins_encode %{
12406     __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12407   %}
12408   ins_pipe(ialu_mem_imm);
12409 %}
12410 
12411 // Arithmetic Shift Right by 8-bit immediate
12412 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12413 %{
12414   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12415   effect(KILL cr);
12416 
12417   format %{ "sarq    $dst, $shift" %}
12418   ins_encode %{
12419     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12420   %}
12421   ins_pipe(ialu_mem_imm);
12422 %}
12423 
12424 // Arithmetic Shift Right by variable
12425 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12426 %{
12427   predicate(!VM_Version::supports_bmi2());
12428   match(Set dst (RShiftL dst shift));
12429   effect(KILL cr);
12430 
12431   format %{ "sarq    $dst, $shift" %}
12432   ins_encode %{
12433     __ sarq($dst$$Register);
12434   %}
12435   ins_pipe(ialu_reg_reg);
12436 %}
12437 
12438 // Arithmetic Shift Right by variable
12439 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12440 %{
12441   predicate(!VM_Version::supports_bmi2());
12442   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12443   effect(KILL cr);
12444 
12445   format %{ "sarq    $dst, $shift" %}
12446   ins_encode %{
12447     __ sarq($dst$$Address);
12448   %}
12449   ins_pipe(ialu_mem_reg);
12450 %}
12451 
12452 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12453 %{
12454   predicate(VM_Version::supports_bmi2());
12455   match(Set dst (RShiftL src shift));
12456 
12457   format %{ "sarxq   $dst, $src, $shift" %}
12458   ins_encode %{
12459     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12460   %}
12461   ins_pipe(ialu_reg_reg);
12462 %}
12463 
12464 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12465 %{
12466   predicate(VM_Version::supports_bmi2());
12467   match(Set dst (RShiftL (LoadL src) shift));
12468   ins_cost(175);
12469   format %{ "sarxq   $dst, $src, $shift" %}
12470   ins_encode %{
12471     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12472   %}
12473   ins_pipe(ialu_reg_mem);
12474 %}
12475 
12476 // Logical Shift Right by 8-bit immediate
12477 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12478 %{
12479   predicate(!UseAPX);
12480   match(Set dst (URShiftL dst shift));
12481   effect(KILL cr);
12482 
12483   format %{ "shrq    $dst, $shift" %}
12484   ins_encode %{
12485     __ shrq($dst$$Register, $shift$$constant);
12486   %}
12487   ins_pipe(ialu_reg);
12488 %}
12489 
12490 // Logical Shift Right by 8-bit immediate
12491 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12492 %{
12493   predicate(UseAPX);
12494   match(Set dst (URShiftL src shift));
12495   effect(KILL cr);
12496   flag(PD::Flag_ndd_demotable_opr1);
12497 
12498   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12499   ins_encode %{
12500     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12501   %}
12502   ins_pipe(ialu_reg);
12503 %}
12504 
12505 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12506 %{
12507   predicate(UseAPX);
12508   match(Set dst (URShiftL (LoadL src) shift));
12509   effect(KILL cr);
12510 
12511   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12512   ins_encode %{
12513     __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12514   %}
12515   ins_pipe(ialu_reg);
12516 %}
12517 
12518 // Logical Shift Right by 8-bit immediate
12519 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12520 %{
12521   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12522   effect(KILL cr);
12523 
12524   format %{ "shrq    $dst, $shift" %}
12525   ins_encode %{
12526     __ shrq($dst$$Address, $shift$$constant);
12527   %}
12528   ins_pipe(ialu_mem_imm);
12529 %}
12530 
12531 // Logical Shift Right by variable
12532 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12533 %{
12534   predicate(!VM_Version::supports_bmi2());
12535   match(Set dst (URShiftL dst shift));
12536   effect(KILL cr);
12537 
12538   format %{ "shrq    $dst, $shift" %}
12539   ins_encode %{
12540     __ shrq($dst$$Register);
12541   %}
12542   ins_pipe(ialu_reg_reg);
12543 %}
12544 
12545 // Logical Shift Right by variable
12546 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12547 %{
12548   predicate(!VM_Version::supports_bmi2());
12549   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12550   effect(KILL cr);
12551 
12552   format %{ "shrq    $dst, $shift" %}
12553   ins_encode %{
12554     __ shrq($dst$$Address);
12555   %}
12556   ins_pipe(ialu_mem_reg);
12557 %}
12558 
12559 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12560 %{
12561   predicate(VM_Version::supports_bmi2());
12562   match(Set dst (URShiftL src shift));
12563 
12564   format %{ "shrxq   $dst, $src, $shift" %}
12565   ins_encode %{
12566     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12567   %}
12568   ins_pipe(ialu_reg_reg);
12569 %}
12570 
12571 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12572 %{
12573   predicate(VM_Version::supports_bmi2());
12574   match(Set dst (URShiftL (LoadL src) shift));
12575   ins_cost(175);
12576   format %{ "shrxq   $dst, $src, $shift" %}
12577   ins_encode %{
12578     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12579   %}
12580   ins_pipe(ialu_reg_mem);
12581 %}
12582 
12583 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12584 // This idiom is used by the compiler for the i2b bytecode.
12585 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12586 %{
12587   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12588 
12589   format %{ "movsbl  $dst, $src\t# i2b" %}
12590   ins_encode %{
12591     __ movsbl($dst$$Register, $src$$Register);
12592   %}
12593   ins_pipe(ialu_reg_reg);
12594 %}
12595 
12596 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12597 // This idiom is used by the compiler the i2s bytecode.
12598 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12599 %{
12600   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12601 
12602   format %{ "movswl  $dst, $src\t# i2s" %}
12603   ins_encode %{
12604     __ movswl($dst$$Register, $src$$Register);
12605   %}
12606   ins_pipe(ialu_reg_reg);
12607 %}
12608 
12609 // ROL/ROR instructions
12610 
12611 // Rotate left by constant.
12612 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12613 %{
12614   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12615   match(Set dst (RotateLeft dst shift));
12616   effect(KILL cr);
12617   format %{ "roll    $dst, $shift" %}
12618   ins_encode %{
12619     __ roll($dst$$Register, $shift$$constant);
12620   %}
12621   ins_pipe(ialu_reg);
12622 %}
12623 
12624 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12625 %{
12626   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12627   match(Set dst (RotateLeft src shift));
12628   format %{ "rolxl   $dst, $src, $shift" %}
12629   ins_encode %{
12630     int shift = 32 - ($shift$$constant & 31);
12631     __ rorxl($dst$$Register, $src$$Register, shift);
12632   %}
12633   ins_pipe(ialu_reg_reg);
12634 %}
12635 
12636 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12637 %{
12638   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12639   match(Set dst (RotateLeft (LoadI src) shift));
12640   ins_cost(175);
12641   format %{ "rolxl   $dst, $src, $shift" %}
12642   ins_encode %{
12643     int shift = 32 - ($shift$$constant & 31);
12644     __ rorxl($dst$$Register, $src$$Address, shift);
12645   %}
12646   ins_pipe(ialu_reg_mem);
12647 %}
12648 
12649 // Rotate Left by variable
12650 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12651 %{
12652   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12653   match(Set dst (RotateLeft dst shift));
12654   effect(KILL cr);
12655   format %{ "roll    $dst, $shift" %}
12656   ins_encode %{
12657     __ roll($dst$$Register);
12658   %}
12659   ins_pipe(ialu_reg_reg);
12660 %}
12661 
12662 // Rotate Left by variable
12663 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12664 %{
12665   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12666   match(Set dst (RotateLeft src shift));
12667   effect(KILL cr);
12668   flag(PD::Flag_ndd_demotable_opr1);
12669 
12670   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12671   ins_encode %{
12672     __ eroll($dst$$Register, $src$$Register, false);
12673   %}
12674   ins_pipe(ialu_reg_reg);
12675 %}
12676 
12677 // Rotate Right by constant.
12678 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12679 %{
12680   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12681   match(Set dst (RotateRight dst shift));
12682   effect(KILL cr);
12683   format %{ "rorl    $dst, $shift" %}
12684   ins_encode %{
12685     __ rorl($dst$$Register, $shift$$constant);
12686   %}
12687   ins_pipe(ialu_reg);
12688 %}
12689 
12690 // Rotate Right by constant.
12691 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12692 %{
12693   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12694   match(Set dst (RotateRight src shift));
12695   format %{ "rorxl   $dst, $src, $shift" %}
12696   ins_encode %{
12697     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12698   %}
12699   ins_pipe(ialu_reg_reg);
12700 %}
12701 
12702 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12703 %{
12704   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12705   match(Set dst (RotateRight (LoadI src) shift));
12706   ins_cost(175);
12707   format %{ "rorxl   $dst, $src, $shift" %}
12708   ins_encode %{
12709     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12710   %}
12711   ins_pipe(ialu_reg_mem);
12712 %}
12713 
12714 // Rotate Right by variable
12715 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12716 %{
12717   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12718   match(Set dst (RotateRight dst shift));
12719   effect(KILL cr);
12720   format %{ "rorl    $dst, $shift" %}
12721   ins_encode %{
12722     __ rorl($dst$$Register);
12723   %}
12724   ins_pipe(ialu_reg_reg);
12725 %}
12726 
12727 // Rotate Right by variable
12728 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12729 %{
12730   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12731   match(Set dst (RotateRight src shift));
12732   effect(KILL cr);
12733   flag(PD::Flag_ndd_demotable_opr1);
12734 
12735   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12736   ins_encode %{
12737     __ erorl($dst$$Register, $src$$Register, false);
12738   %}
12739   ins_pipe(ialu_reg_reg);
12740 %}
12741 
12742 // Rotate Left by constant.
12743 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12744 %{
12745   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12746   match(Set dst (RotateLeft dst shift));
12747   effect(KILL cr);
12748   format %{ "rolq    $dst, $shift" %}
12749   ins_encode %{
12750     __ rolq($dst$$Register, $shift$$constant);
12751   %}
12752   ins_pipe(ialu_reg);
12753 %}
12754 
12755 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12756 %{
12757   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12758   match(Set dst (RotateLeft src shift));
12759   format %{ "rolxq   $dst, $src, $shift" %}
12760   ins_encode %{
12761     int shift = 64 - ($shift$$constant & 63);
12762     __ rorxq($dst$$Register, $src$$Register, shift);
12763   %}
12764   ins_pipe(ialu_reg_reg);
12765 %}
12766 
12767 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12768 %{
12769   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12770   match(Set dst (RotateLeft (LoadL src) shift));
12771   ins_cost(175);
12772   format %{ "rolxq   $dst, $src, $shift" %}
12773   ins_encode %{
12774     int shift = 64 - ($shift$$constant & 63);
12775     __ rorxq($dst$$Register, $src$$Address, shift);
12776   %}
12777   ins_pipe(ialu_reg_mem);
12778 %}
12779 
12780 // Rotate Left by variable
12781 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12782 %{
12783   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12784   match(Set dst (RotateLeft dst shift));
12785   effect(KILL cr);
12786 
12787   format %{ "rolq    $dst, $shift" %}
12788   ins_encode %{
12789     __ rolq($dst$$Register);
12790   %}
12791   ins_pipe(ialu_reg_reg);
12792 %}
12793 
12794 // Rotate Left by variable
12795 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12796 %{
12797   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12798   match(Set dst (RotateLeft src shift));
12799   effect(KILL cr);
12800   flag(PD::Flag_ndd_demotable_opr1);
12801 
12802   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12803   ins_encode %{
12804     __ erolq($dst$$Register, $src$$Register, false);
12805   %}
12806   ins_pipe(ialu_reg_reg);
12807 %}
12808 
12809 // Rotate Right by constant.
12810 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12811 %{
12812   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12813   match(Set dst (RotateRight dst shift));
12814   effect(KILL cr);
12815   format %{ "rorq    $dst, $shift" %}
12816   ins_encode %{
12817     __ rorq($dst$$Register, $shift$$constant);
12818   %}
12819   ins_pipe(ialu_reg);
12820 %}
12821 
12822 // Rotate Right by constant
12823 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12824 %{
12825   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12826   match(Set dst (RotateRight src shift));
12827   format %{ "rorxq   $dst, $src, $shift" %}
12828   ins_encode %{
12829     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12830   %}
12831   ins_pipe(ialu_reg_reg);
12832 %}
12833 
12834 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12835 %{
12836   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12837   match(Set dst (RotateRight (LoadL src) shift));
12838   ins_cost(175);
12839   format %{ "rorxq   $dst, $src, $shift" %}
12840   ins_encode %{
12841     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12842   %}
12843   ins_pipe(ialu_reg_mem);
12844 %}
12845 
12846 // Rotate Right by variable
12847 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12848 %{
12849   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12850   match(Set dst (RotateRight dst shift));
12851   effect(KILL cr);
12852   format %{ "rorq    $dst, $shift" %}
12853   ins_encode %{
12854     __ rorq($dst$$Register);
12855   %}
12856   ins_pipe(ialu_reg_reg);
12857 %}
12858 
12859 // Rotate Right by variable
12860 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12861 %{
12862   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12863   match(Set dst (RotateRight src shift));
12864   effect(KILL cr);
12865   flag(PD::Flag_ndd_demotable_opr1);
12866 
12867   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12868   ins_encode %{
12869     __ erorq($dst$$Register, $src$$Register, false);
12870   %}
12871   ins_pipe(ialu_reg_reg);
12872 %}
12873 
12874 //----------------------------- CompressBits/ExpandBits ------------------------
12875 
12876 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12877   predicate(n->bottom_type()->isa_long());
12878   match(Set dst (CompressBits src mask));
12879   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12880   ins_encode %{
12881     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12882   %}
12883   ins_pipe( pipe_slow );
12884 %}
12885 
12886 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12887   predicate(n->bottom_type()->isa_long());
12888   match(Set dst (ExpandBits src mask));
12889   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12890   ins_encode %{
12891     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12892   %}
12893   ins_pipe( pipe_slow );
12894 %}
12895 
12896 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12897   predicate(n->bottom_type()->isa_long());
12898   match(Set dst (CompressBits src (LoadL mask)));
12899   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12900   ins_encode %{
12901     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12902   %}
12903   ins_pipe( pipe_slow );
12904 %}
12905 
12906 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12907   predicate(n->bottom_type()->isa_long());
12908   match(Set dst (ExpandBits src (LoadL mask)));
12909   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12910   ins_encode %{
12911     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12912   %}
12913   ins_pipe( pipe_slow );
12914 %}
12915 
12916 
12917 // Logical Instructions
12918 
12919 // Integer Logical Instructions
12920 
12921 // And Instructions
12922 // And Register with Register
12923 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12924 %{
12925   predicate(!UseAPX);
12926   match(Set dst (AndI dst src));
12927   effect(KILL cr);
12928   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12929 
12930   format %{ "andl    $dst, $src\t# int" %}
12931   ins_encode %{
12932     __ andl($dst$$Register, $src$$Register);
12933   %}
12934   ins_pipe(ialu_reg_reg);
12935 %}
12936 
12937 // And Register with Register using New Data Destination (NDD)
12938 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12939 %{
12940   predicate(UseAPX);
12941   match(Set dst (AndI src1 src2));
12942   effect(KILL cr);
12943   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12944 
12945   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
12946   ins_encode %{
12947     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12948 
12949   %}
12950   ins_pipe(ialu_reg_reg);
12951 %}
12952 
12953 // And Register with Immediate 255
12954 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12955 %{
12956   match(Set dst (AndI src mask));
12957 
12958   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
12959   ins_encode %{
12960     __ movzbl($dst$$Register, $src$$Register);
12961   %}
12962   ins_pipe(ialu_reg);
12963 %}
12964 
12965 // And Register with Immediate 255 and promote to long
12966 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12967 %{
12968   match(Set dst (ConvI2L (AndI src mask)));
12969 
12970   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
12971   ins_encode %{
12972     __ movzbl($dst$$Register, $src$$Register);
12973   %}
12974   ins_pipe(ialu_reg);
12975 %}
12976 
12977 // And Register with Immediate 65535
12978 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12979 %{
12980   match(Set dst (AndI src mask));
12981 
12982   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
12983   ins_encode %{
12984     __ movzwl($dst$$Register, $src$$Register);
12985   %}
12986   ins_pipe(ialu_reg);
12987 %}
12988 
12989 // And Register with Immediate 65535 and promote to long
12990 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12991 %{
12992   match(Set dst (ConvI2L (AndI src mask)));
12993 
12994   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
12995   ins_encode %{
12996     __ movzwl($dst$$Register, $src$$Register);
12997   %}
12998   ins_pipe(ialu_reg);
12999 %}
13000 
13001 // Can skip int2long conversions after AND with small bitmask
13002 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13003 %{
13004   predicate(VM_Version::supports_bmi2());
13005   ins_cost(125);
13006   effect(TEMP tmp, KILL cr);
13007   match(Set dst (ConvI2L (AndI src mask)));
13008   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
13009   ins_encode %{
13010     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13011     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13012   %}
13013   ins_pipe(ialu_reg_reg);
13014 %}
13015 
13016 // And Register with Immediate
13017 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13018 %{
13019   predicate(!UseAPX);
13020   match(Set dst (AndI dst src));
13021   effect(KILL cr);
13022   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13023 
13024   format %{ "andl    $dst, $src\t# int" %}
13025   ins_encode %{
13026     __ andl($dst$$Register, $src$$constant);
13027   %}
13028   ins_pipe(ialu_reg);
13029 %}
13030 
13031 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13032 %{
13033   predicate(UseAPX);
13034   match(Set dst (AndI src1 src2));
13035   effect(KILL cr);
13036   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13037 
13038   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13039   ins_encode %{
13040     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13041   %}
13042   ins_pipe(ialu_reg);
13043 %}
13044 
13045 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13046 %{
13047   predicate(UseAPX);
13048   match(Set dst (AndI (LoadI src1) src2));
13049   effect(KILL cr);
13050   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13051 
13052   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13053   ins_encode %{
13054     __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13055   %}
13056   ins_pipe(ialu_reg);
13057 %}
13058 
13059 // And Register with Memory
13060 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13061 %{
13062   predicate(!UseAPX);
13063   match(Set dst (AndI dst (LoadI src)));
13064   effect(KILL cr);
13065   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13066 
13067   ins_cost(150);
13068   format %{ "andl    $dst, $src\t# int" %}
13069   ins_encode %{
13070     __ andl($dst$$Register, $src$$Address);
13071   %}
13072   ins_pipe(ialu_reg_mem);
13073 %}
13074 
13075 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13076 %{
13077   predicate(UseAPX);
13078   match(Set dst (AndI src1 (LoadI src2)));
13079   effect(KILL cr);
13080   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13081 
13082   ins_cost(150);
13083   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13084   ins_encode %{
13085     __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13086   %}
13087   ins_pipe(ialu_reg_mem);
13088 %}
13089 
13090 // And Memory with Register
13091 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13092 %{
13093   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13094   effect(KILL cr);
13095   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13096 
13097   ins_cost(150);
13098   format %{ "andb    $dst, $src\t# byte" %}
13099   ins_encode %{
13100     __ andb($dst$$Address, $src$$Register);
13101   %}
13102   ins_pipe(ialu_mem_reg);
13103 %}
13104 
13105 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13106 %{
13107   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13108   effect(KILL cr);
13109   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13110 
13111   ins_cost(150);
13112   format %{ "andl    $dst, $src\t# int" %}
13113   ins_encode %{
13114     __ andl($dst$$Address, $src$$Register);
13115   %}
13116   ins_pipe(ialu_mem_reg);
13117 %}
13118 
13119 // And Memory with Immediate
13120 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13121 %{
13122   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13123   effect(KILL cr);
13124   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13125 
13126   ins_cost(125);
13127   format %{ "andl    $dst, $src\t# int" %}
13128   ins_encode %{
13129     __ andl($dst$$Address, $src$$constant);
13130   %}
13131   ins_pipe(ialu_mem_imm);
13132 %}
13133 
13134 // BMI1 instructions
13135 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13136   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13137   predicate(UseBMI1Instructions);
13138   effect(KILL cr);
13139   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13140 
13141   ins_cost(125);
13142   format %{ "andnl  $dst, $src1, $src2" %}
13143 
13144   ins_encode %{
13145     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13146   %}
13147   ins_pipe(ialu_reg_mem);
13148 %}
13149 
13150 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13151   match(Set dst (AndI (XorI src1 minus_1) src2));
13152   predicate(UseBMI1Instructions);
13153   effect(KILL cr);
13154   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13155 
13156   format %{ "andnl  $dst, $src1, $src2" %}
13157 
13158   ins_encode %{
13159     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13160   %}
13161   ins_pipe(ialu_reg);
13162 %}
13163 
13164 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13165   match(Set dst (AndI (SubI imm_zero src) src));
13166   predicate(UseBMI1Instructions);
13167   effect(KILL cr);
13168   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13169 
13170   format %{ "blsil  $dst, $src" %}
13171 
13172   ins_encode %{
13173     __ blsil($dst$$Register, $src$$Register);
13174   %}
13175   ins_pipe(ialu_reg);
13176 %}
13177 
13178 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13179   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13180   predicate(UseBMI1Instructions);
13181   effect(KILL cr);
13182   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13183 
13184   ins_cost(125);
13185   format %{ "blsil  $dst, $src" %}
13186 
13187   ins_encode %{
13188     __ blsil($dst$$Register, $src$$Address);
13189   %}
13190   ins_pipe(ialu_reg_mem);
13191 %}
13192 
13193 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13194 %{
13195   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13196   predicate(UseBMI1Instructions);
13197   effect(KILL cr);
13198   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13199 
13200   ins_cost(125);
13201   format %{ "blsmskl $dst, $src" %}
13202 
13203   ins_encode %{
13204     __ blsmskl($dst$$Register, $src$$Address);
13205   %}
13206   ins_pipe(ialu_reg_mem);
13207 %}
13208 
13209 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13210 %{
13211   match(Set dst (XorI (AddI src minus_1) src));
13212   predicate(UseBMI1Instructions);
13213   effect(KILL cr);
13214   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13215 
13216   format %{ "blsmskl $dst, $src" %}
13217 
13218   ins_encode %{
13219     __ blsmskl($dst$$Register, $src$$Register);
13220   %}
13221 
13222   ins_pipe(ialu_reg);
13223 %}
13224 
13225 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13226 %{
13227   match(Set dst (AndI (AddI src minus_1) src) );
13228   predicate(UseBMI1Instructions);
13229   effect(KILL cr);
13230   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13231 
13232   format %{ "blsrl  $dst, $src" %}
13233 
13234   ins_encode %{
13235     __ blsrl($dst$$Register, $src$$Register);
13236   %}
13237 
13238   ins_pipe(ialu_reg_mem);
13239 %}
13240 
13241 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13242 %{
13243   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13244   predicate(UseBMI1Instructions);
13245   effect(KILL cr);
13246   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13247 
13248   ins_cost(125);
13249   format %{ "blsrl  $dst, $src" %}
13250 
13251   ins_encode %{
13252     __ blsrl($dst$$Register, $src$$Address);
13253   %}
13254 
13255   ins_pipe(ialu_reg);
13256 %}
13257 
13258 // Or Instructions
13259 // Or Register with Register
13260 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13261 %{
13262   predicate(!UseAPX);
13263   match(Set dst (OrI dst src));
13264   effect(KILL cr);
13265   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13266 
13267   format %{ "orl     $dst, $src\t# int" %}
13268   ins_encode %{
13269     __ orl($dst$$Register, $src$$Register);
13270   %}
13271   ins_pipe(ialu_reg_reg);
13272 %}
13273 
13274 // Or Register with Register using New Data Destination (NDD)
13275 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13276 %{
13277   predicate(UseAPX);
13278   match(Set dst (OrI src1 src2));
13279   effect(KILL cr);
13280   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13281 
13282   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13283   ins_encode %{
13284     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13285   %}
13286   ins_pipe(ialu_reg_reg);
13287 %}
13288 
13289 // Or Register with Immediate
13290 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13291 %{
13292   predicate(!UseAPX);
13293   match(Set dst (OrI dst src));
13294   effect(KILL cr);
13295   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13296 
13297   format %{ "orl     $dst, $src\t# int" %}
13298   ins_encode %{
13299     __ orl($dst$$Register, $src$$constant);
13300   %}
13301   ins_pipe(ialu_reg);
13302 %}
13303 
13304 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13305 %{
13306   predicate(UseAPX);
13307   match(Set dst (OrI src1 src2));
13308   effect(KILL cr);
13309   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13310 
13311   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13312   ins_encode %{
13313     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13314   %}
13315   ins_pipe(ialu_reg);
13316 %}
13317 
13318 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13319 %{
13320   predicate(UseAPX);
13321   match(Set dst (OrI src1 src2));
13322   effect(KILL cr);
13323   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13324 
13325   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
13326   ins_encode %{
13327     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13328   %}
13329   ins_pipe(ialu_reg);
13330 %}
13331 
13332 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13333 %{
13334   predicate(UseAPX);
13335   match(Set dst (OrI (LoadI src1) src2));
13336   effect(KILL cr);
13337   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13338 
13339   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13340   ins_encode %{
13341     __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13342   %}
13343   ins_pipe(ialu_reg);
13344 %}
13345 
13346 // Or Register with Memory
13347 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13348 %{
13349   predicate(!UseAPX);
13350   match(Set dst (OrI dst (LoadI src)));
13351   effect(KILL cr);
13352   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13353 
13354   ins_cost(150);
13355   format %{ "orl     $dst, $src\t# int" %}
13356   ins_encode %{
13357     __ orl($dst$$Register, $src$$Address);
13358   %}
13359   ins_pipe(ialu_reg_mem);
13360 %}
13361 
13362 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13363 %{
13364   predicate(UseAPX);
13365   match(Set dst (OrI src1 (LoadI src2)));
13366   effect(KILL cr);
13367   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13368 
13369   ins_cost(150);
13370   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13371   ins_encode %{
13372     __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13373   %}
13374   ins_pipe(ialu_reg_mem);
13375 %}
13376 
13377 // Or Memory with Register
13378 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13379 %{
13380   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13381   effect(KILL cr);
13382   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13383 
13384   ins_cost(150);
13385   format %{ "orb    $dst, $src\t# byte" %}
13386   ins_encode %{
13387     __ orb($dst$$Address, $src$$Register);
13388   %}
13389   ins_pipe(ialu_mem_reg);
13390 %}
13391 
13392 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13393 %{
13394   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13395   effect(KILL cr);
13396   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13397 
13398   ins_cost(150);
13399   format %{ "orl     $dst, $src\t# int" %}
13400   ins_encode %{
13401     __ orl($dst$$Address, $src$$Register);
13402   %}
13403   ins_pipe(ialu_mem_reg);
13404 %}
13405 
13406 // Or Memory with Immediate
13407 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13408 %{
13409   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13410   effect(KILL cr);
13411   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13412 
13413   ins_cost(125);
13414   format %{ "orl     $dst, $src\t# int" %}
13415   ins_encode %{
13416     __ orl($dst$$Address, $src$$constant);
13417   %}
13418   ins_pipe(ialu_mem_imm);
13419 %}
13420 
13421 // Xor Instructions
13422 // Xor Register with Register
13423 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13424 %{
13425   predicate(!UseAPX);
13426   match(Set dst (XorI dst src));
13427   effect(KILL cr);
13428   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13429 
13430   format %{ "xorl    $dst, $src\t# int" %}
13431   ins_encode %{
13432     __ xorl($dst$$Register, $src$$Register);
13433   %}
13434   ins_pipe(ialu_reg_reg);
13435 %}
13436 
13437 // Xor Register with Register using New Data Destination (NDD)
13438 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13439 %{
13440   predicate(UseAPX);
13441   match(Set dst (XorI src1 src2));
13442   effect(KILL cr);
13443   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13444 
13445   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13446   ins_encode %{
13447     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13448   %}
13449   ins_pipe(ialu_reg_reg);
13450 %}
13451 
13452 // Xor Register with Immediate -1
13453 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13454 %{
13455   predicate(!UseAPX);
13456   match(Set dst (XorI dst imm));
13457 
13458   format %{ "notl    $dst" %}
13459   ins_encode %{
13460      __ notl($dst$$Register);
13461   %}
13462   ins_pipe(ialu_reg);
13463 %}
13464 
13465 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13466 %{
13467   match(Set dst (XorI src imm));
13468   predicate(UseAPX);
13469   flag(PD::Flag_ndd_demotable_opr1);
13470 
13471   format %{ "enotl    $dst, $src" %}
13472   ins_encode %{
13473      __ enotl($dst$$Register, $src$$Register);
13474   %}
13475   ins_pipe(ialu_reg);
13476 %}
13477 
13478 // Xor Register with Immediate
13479 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13480 %{
13481   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13482   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13483   match(Set dst (XorI dst src));
13484   effect(KILL cr);
13485   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13486 
13487   format %{ "xorl    $dst, $src\t# int" %}
13488   ins_encode %{
13489     __ xorl($dst$$Register, $src$$constant);
13490   %}
13491   ins_pipe(ialu_reg);
13492 %}
13493 
13494 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13495 %{
13496   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13497   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13498   match(Set dst (XorI src1 src2));
13499   effect(KILL cr);
13500   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13501 
13502   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13503   ins_encode %{
13504     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13505   %}
13506   ins_pipe(ialu_reg);
13507 %}
13508 
13509 // Xor Memory with Immediate
13510 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13511 %{
13512   predicate(UseAPX);
13513   match(Set dst (XorI (LoadI src1) src2));
13514   effect(KILL cr);
13515   ins_cost(150);
13516   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13517 
13518   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13519   ins_encode %{
13520     __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13521   %}
13522   ins_pipe(ialu_reg);
13523 %}
13524 
13525 // Xor Register with Memory
13526 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13527 %{
13528   predicate(!UseAPX);
13529   match(Set dst (XorI dst (LoadI src)));
13530   effect(KILL cr);
13531   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13532 
13533   ins_cost(150);
13534   format %{ "xorl    $dst, $src\t# int" %}
13535   ins_encode %{
13536     __ xorl($dst$$Register, $src$$Address);
13537   %}
13538   ins_pipe(ialu_reg_mem);
13539 %}
13540 
13541 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13542 %{
13543   predicate(UseAPX);
13544   match(Set dst (XorI src1 (LoadI src2)));
13545   effect(KILL cr);
13546   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13547 
13548   ins_cost(150);
13549   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13550   ins_encode %{
13551     __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13552   %}
13553   ins_pipe(ialu_reg_mem);
13554 %}
13555 
13556 // Xor Memory with Register
13557 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13558 %{
13559   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13560   effect(KILL cr);
13561   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13562 
13563   ins_cost(150);
13564   format %{ "xorb    $dst, $src\t# byte" %}
13565   ins_encode %{
13566     __ xorb($dst$$Address, $src$$Register);
13567   %}
13568   ins_pipe(ialu_mem_reg);
13569 %}
13570 
13571 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13572 %{
13573   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13574   effect(KILL cr);
13575   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13576 
13577   ins_cost(150);
13578   format %{ "xorl    $dst, $src\t# int" %}
13579   ins_encode %{
13580     __ xorl($dst$$Address, $src$$Register);
13581   %}
13582   ins_pipe(ialu_mem_reg);
13583 %}
13584 
13585 // Xor Memory with Immediate
13586 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13587 %{
13588   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13589   effect(KILL cr);
13590   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13591 
13592   ins_cost(125);
13593   format %{ "xorl    $dst, $src\t# int" %}
13594   ins_encode %{
13595     __ xorl($dst$$Address, $src$$constant);
13596   %}
13597   ins_pipe(ialu_mem_imm);
13598 %}
13599 
13600 
13601 // Long Logical Instructions
13602 
13603 // And Instructions
13604 // And Register with Register
13605 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13606 %{
13607   predicate(!UseAPX);
13608   match(Set dst (AndL dst src));
13609   effect(KILL cr);
13610   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13611 
13612   format %{ "andq    $dst, $src\t# long" %}
13613   ins_encode %{
13614     __ andq($dst$$Register, $src$$Register);
13615   %}
13616   ins_pipe(ialu_reg_reg);
13617 %}
13618 
13619 // And Register with Register using New Data Destination (NDD)
13620 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13621 %{
13622   predicate(UseAPX);
13623   match(Set dst (AndL src1 src2));
13624   effect(KILL cr);
13625   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13626 
13627   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13628   ins_encode %{
13629     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13630 
13631   %}
13632   ins_pipe(ialu_reg_reg);
13633 %}
13634 
13635 // And Register with Immediate 255
13636 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13637 %{
13638   match(Set dst (AndL src mask));
13639 
13640   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13641   ins_encode %{
13642     // movzbl zeroes out the upper 32-bit and does not need REX.W
13643     __ movzbl($dst$$Register, $src$$Register);
13644   %}
13645   ins_pipe(ialu_reg);
13646 %}
13647 
13648 // And Register with Immediate 65535
13649 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13650 %{
13651   match(Set dst (AndL src mask));
13652 
13653   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13654   ins_encode %{
13655     // movzwl zeroes out the upper 32-bit and does not need REX.W
13656     __ movzwl($dst$$Register, $src$$Register);
13657   %}
13658   ins_pipe(ialu_reg);
13659 %}
13660 
13661 // And Register with Immediate
13662 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13663 %{
13664   predicate(!UseAPX);
13665   match(Set dst (AndL dst src));
13666   effect(KILL cr);
13667   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13668 
13669   format %{ "andq    $dst, $src\t# long" %}
13670   ins_encode %{
13671     __ andq($dst$$Register, $src$$constant);
13672   %}
13673   ins_pipe(ialu_reg);
13674 %}
13675 
13676 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13677 %{
13678   predicate(UseAPX);
13679   match(Set dst (AndL src1 src2));
13680   effect(KILL cr);
13681   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13682 
13683   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13684   ins_encode %{
13685     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13686   %}
13687   ins_pipe(ialu_reg);
13688 %}
13689 
13690 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13691 %{
13692   predicate(UseAPX);
13693   match(Set dst (AndL (LoadL src1) src2));
13694   effect(KILL cr);
13695   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13696 
13697   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13698   ins_encode %{
13699     __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13700   %}
13701   ins_pipe(ialu_reg);
13702 %}
13703 
13704 // And Register with Memory
13705 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13706 %{
13707   predicate(!UseAPX);
13708   match(Set dst (AndL dst (LoadL src)));
13709   effect(KILL cr);
13710   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13711 
13712   ins_cost(150);
13713   format %{ "andq    $dst, $src\t# long" %}
13714   ins_encode %{
13715     __ andq($dst$$Register, $src$$Address);
13716   %}
13717   ins_pipe(ialu_reg_mem);
13718 %}
13719 
13720 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13721 %{
13722   predicate(UseAPX);
13723   match(Set dst (AndL src1 (LoadL src2)));
13724   effect(KILL cr);
13725   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13726 
13727   ins_cost(150);
13728   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13729   ins_encode %{
13730     __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13731   %}
13732   ins_pipe(ialu_reg_mem);
13733 %}
13734 
13735 // And Memory with Register
13736 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13737 %{
13738   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13739   effect(KILL cr);
13740   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13741 
13742   ins_cost(150);
13743   format %{ "andq    $dst, $src\t# long" %}
13744   ins_encode %{
13745     __ andq($dst$$Address, $src$$Register);
13746   %}
13747   ins_pipe(ialu_mem_reg);
13748 %}
13749 
13750 // And Memory with Immediate
13751 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13752 %{
13753   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13754   effect(KILL cr);
13755   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13756 
13757   ins_cost(125);
13758   format %{ "andq    $dst, $src\t# long" %}
13759   ins_encode %{
13760     __ andq($dst$$Address, $src$$constant);
13761   %}
13762   ins_pipe(ialu_mem_imm);
13763 %}
13764 
13765 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13766 %{
13767   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13768   // because AND/OR works well enough for 8/32-bit values.
13769   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13770 
13771   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13772   effect(KILL cr);
13773 
13774   ins_cost(125);
13775   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13776   ins_encode %{
13777     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13778   %}
13779   ins_pipe(ialu_mem_imm);
13780 %}
13781 
13782 // BMI1 instructions
13783 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13784   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13785   predicate(UseBMI1Instructions);
13786   effect(KILL cr);
13787   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13788 
13789   ins_cost(125);
13790   format %{ "andnq  $dst, $src1, $src2" %}
13791 
13792   ins_encode %{
13793     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13794   %}
13795   ins_pipe(ialu_reg_mem);
13796 %}
13797 
13798 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13799   match(Set dst (AndL (XorL src1 minus_1) src2));
13800   predicate(UseBMI1Instructions);
13801   effect(KILL cr);
13802   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13803 
13804   format %{ "andnq  $dst, $src1, $src2" %}
13805 
13806   ins_encode %{
13807   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13808   %}
13809   ins_pipe(ialu_reg_mem);
13810 %}
13811 
13812 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13813   match(Set dst (AndL (SubL imm_zero src) src));
13814   predicate(UseBMI1Instructions);
13815   effect(KILL cr);
13816   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13817 
13818   format %{ "blsiq  $dst, $src" %}
13819 
13820   ins_encode %{
13821     __ blsiq($dst$$Register, $src$$Register);
13822   %}
13823   ins_pipe(ialu_reg);
13824 %}
13825 
13826 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13827   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13828   predicate(UseBMI1Instructions);
13829   effect(KILL cr);
13830   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13831 
13832   ins_cost(125);
13833   format %{ "blsiq  $dst, $src" %}
13834 
13835   ins_encode %{
13836     __ blsiq($dst$$Register, $src$$Address);
13837   %}
13838   ins_pipe(ialu_reg_mem);
13839 %}
13840 
13841 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13842 %{
13843   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13844   predicate(UseBMI1Instructions);
13845   effect(KILL cr);
13846   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13847 
13848   ins_cost(125);
13849   format %{ "blsmskq $dst, $src" %}
13850 
13851   ins_encode %{
13852     __ blsmskq($dst$$Register, $src$$Address);
13853   %}
13854   ins_pipe(ialu_reg_mem);
13855 %}
13856 
13857 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13858 %{
13859   match(Set dst (XorL (AddL src minus_1) src));
13860   predicate(UseBMI1Instructions);
13861   effect(KILL cr);
13862   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13863 
13864   format %{ "blsmskq $dst, $src" %}
13865 
13866   ins_encode %{
13867     __ blsmskq($dst$$Register, $src$$Register);
13868   %}
13869 
13870   ins_pipe(ialu_reg);
13871 %}
13872 
13873 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13874 %{
13875   match(Set dst (AndL (AddL src minus_1) src) );
13876   predicate(UseBMI1Instructions);
13877   effect(KILL cr);
13878   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13879 
13880   format %{ "blsrq  $dst, $src" %}
13881 
13882   ins_encode %{
13883     __ blsrq($dst$$Register, $src$$Register);
13884   %}
13885 
13886   ins_pipe(ialu_reg);
13887 %}
13888 
13889 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13890 %{
13891   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13892   predicate(UseBMI1Instructions);
13893   effect(KILL cr);
13894   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13895 
13896   ins_cost(125);
13897   format %{ "blsrq  $dst, $src" %}
13898 
13899   ins_encode %{
13900     __ blsrq($dst$$Register, $src$$Address);
13901   %}
13902 
13903   ins_pipe(ialu_reg);
13904 %}
13905 
13906 // Or Instructions
13907 // Or Register with Register
13908 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13909 %{
13910   predicate(!UseAPX);
13911   match(Set dst (OrL dst src));
13912   effect(KILL cr);
13913   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13914 
13915   format %{ "orq     $dst, $src\t# long" %}
13916   ins_encode %{
13917     __ orq($dst$$Register, $src$$Register);
13918   %}
13919   ins_pipe(ialu_reg_reg);
13920 %}
13921 
13922 // Or Register with Register using New Data Destination (NDD)
13923 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13924 %{
13925   predicate(UseAPX);
13926   match(Set dst (OrL src1 src2));
13927   effect(KILL cr);
13928   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13929 
13930   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13931   ins_encode %{
13932     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13933 
13934   %}
13935   ins_pipe(ialu_reg_reg);
13936 %}
13937 
13938 // Use any_RegP to match R15 (TLS register) without spilling.
13939 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13940   match(Set dst (OrL dst (CastP2X src)));
13941   effect(KILL cr);
13942   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13943 
13944   format %{ "orq     $dst, $src\t# long" %}
13945   ins_encode %{
13946     __ orq($dst$$Register, $src$$Register);
13947   %}
13948   ins_pipe(ialu_reg_reg);
13949 %}
13950 
13951 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13952   match(Set dst (OrL src1 (CastP2X src2)));
13953   effect(KILL cr);
13954   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13955 
13956   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13957   ins_encode %{
13958     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13959   %}
13960   ins_pipe(ialu_reg_reg);
13961 %}
13962 
13963 // Or Register with Immediate
13964 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13965 %{
13966   predicate(!UseAPX);
13967   match(Set dst (OrL dst src));
13968   effect(KILL cr);
13969   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13970 
13971   format %{ "orq     $dst, $src\t# long" %}
13972   ins_encode %{
13973     __ orq($dst$$Register, $src$$constant);
13974   %}
13975   ins_pipe(ialu_reg);
13976 %}
13977 
13978 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13979 %{
13980   predicate(UseAPX);
13981   match(Set dst (OrL src1 src2));
13982   effect(KILL cr);
13983   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13984 
13985   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13986   ins_encode %{
13987     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13988   %}
13989   ins_pipe(ialu_reg);
13990 %}
13991 
13992 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13993 %{
13994   predicate(UseAPX);
13995   match(Set dst (OrL src1 src2));
13996   effect(KILL cr);
13997   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13998 
13999   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
14000   ins_encode %{
14001     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14002   %}
14003   ins_pipe(ialu_reg);
14004 %}
14005 
14006 // Or Memory with Immediate
14007 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14008 %{
14009   predicate(UseAPX);
14010   match(Set dst (OrL (LoadL src1) src2));
14011   effect(KILL cr);
14012   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14013 
14014   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14015   ins_encode %{
14016     __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14017   %}
14018   ins_pipe(ialu_reg);
14019 %}
14020 
14021 // Or Register with Memory
14022 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14023 %{
14024   predicate(!UseAPX);
14025   match(Set dst (OrL dst (LoadL src)));
14026   effect(KILL cr);
14027   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14028 
14029   ins_cost(150);
14030   format %{ "orq     $dst, $src\t# long" %}
14031   ins_encode %{
14032     __ orq($dst$$Register, $src$$Address);
14033   %}
14034   ins_pipe(ialu_reg_mem);
14035 %}
14036 
14037 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14038 %{
14039   predicate(UseAPX);
14040   match(Set dst (OrL src1 (LoadL src2)));
14041   effect(KILL cr);
14042   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14043 
14044   ins_cost(150);
14045   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14046   ins_encode %{
14047     __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14048   %}
14049   ins_pipe(ialu_reg_mem);
14050 %}
14051 
14052 // Or Memory with Register
14053 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14054 %{
14055   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14056   effect(KILL cr);
14057   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14058 
14059   ins_cost(150);
14060   format %{ "orq     $dst, $src\t# long" %}
14061   ins_encode %{
14062     __ orq($dst$$Address, $src$$Register);
14063   %}
14064   ins_pipe(ialu_mem_reg);
14065 %}
14066 
14067 // Or Memory with Immediate
14068 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14069 %{
14070   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14071   effect(KILL cr);
14072   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14073 
14074   ins_cost(125);
14075   format %{ "orq     $dst, $src\t# long" %}
14076   ins_encode %{
14077     __ orq($dst$$Address, $src$$constant);
14078   %}
14079   ins_pipe(ialu_mem_imm);
14080 %}
14081 
14082 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14083 %{
14084   // con should be a pure 64-bit power of 2 immediate
14085   // because AND/OR works well enough for 8/32-bit values.
14086   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14087 
14088   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14089   effect(KILL cr);
14090 
14091   ins_cost(125);
14092   format %{ "btsq    $dst, log2($con)\t# long" %}
14093   ins_encode %{
14094     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14095   %}
14096   ins_pipe(ialu_mem_imm);
14097 %}
14098 
14099 // Xor Instructions
14100 // Xor Register with Register
14101 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14102 %{
14103   predicate(!UseAPX);
14104   match(Set dst (XorL dst src));
14105   effect(KILL cr);
14106   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14107 
14108   format %{ "xorq    $dst, $src\t# long" %}
14109   ins_encode %{
14110     __ xorq($dst$$Register, $src$$Register);
14111   %}
14112   ins_pipe(ialu_reg_reg);
14113 %}
14114 
14115 // Xor Register with Register using New Data Destination (NDD)
14116 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14117 %{
14118   predicate(UseAPX);
14119   match(Set dst (XorL src1 src2));
14120   effect(KILL cr);
14121   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14122 
14123   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14124   ins_encode %{
14125     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14126   %}
14127   ins_pipe(ialu_reg_reg);
14128 %}
14129 
14130 // Xor Register with Immediate -1
14131 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14132 %{
14133   predicate(!UseAPX);
14134   match(Set dst (XorL dst imm));
14135 
14136   format %{ "notq   $dst" %}
14137   ins_encode %{
14138      __ notq($dst$$Register);
14139   %}
14140   ins_pipe(ialu_reg);
14141 %}
14142 
14143 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14144 %{
14145   predicate(UseAPX);
14146   match(Set dst (XorL src imm));
14147   flag(PD::Flag_ndd_demotable_opr1);
14148 
14149   format %{ "enotq   $dst, $src" %}
14150   ins_encode %{
14151     __ enotq($dst$$Register, $src$$Register);
14152   %}
14153   ins_pipe(ialu_reg);
14154 %}
14155 
14156 // Xor Register with Immediate
14157 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14158 %{
14159   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14160   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14161   match(Set dst (XorL dst src));
14162   effect(KILL cr);
14163   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14164 
14165   format %{ "xorq    $dst, $src\t# long" %}
14166   ins_encode %{
14167     __ xorq($dst$$Register, $src$$constant);
14168   %}
14169   ins_pipe(ialu_reg);
14170 %}
14171 
14172 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14173 %{
14174   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14175   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14176   match(Set dst (XorL src1 src2));
14177   effect(KILL cr);
14178   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14179 
14180   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14181   ins_encode %{
14182     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14183   %}
14184   ins_pipe(ialu_reg);
14185 %}
14186 
14187 // Xor Memory with Immediate
14188 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14189 %{
14190   predicate(UseAPX);
14191   match(Set dst (XorL (LoadL src1) src2));
14192   effect(KILL cr);
14193   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14194   ins_cost(150);
14195 
14196   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14197   ins_encode %{
14198     __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14199   %}
14200   ins_pipe(ialu_reg);
14201 %}
14202 
14203 // Xor Register with Memory
14204 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14205 %{
14206   predicate(!UseAPX);
14207   match(Set dst (XorL dst (LoadL src)));
14208   effect(KILL cr);
14209   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14210 
14211   ins_cost(150);
14212   format %{ "xorq    $dst, $src\t# long" %}
14213   ins_encode %{
14214     __ xorq($dst$$Register, $src$$Address);
14215   %}
14216   ins_pipe(ialu_reg_mem);
14217 %}
14218 
14219 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14220 %{
14221   predicate(UseAPX);
14222   match(Set dst (XorL src1 (LoadL src2)));
14223   effect(KILL cr);
14224   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14225 
14226   ins_cost(150);
14227   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14228   ins_encode %{
14229     __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14230   %}
14231   ins_pipe(ialu_reg_mem);
14232 %}
14233 
14234 // Xor Memory with Register
14235 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14236 %{
14237   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14238   effect(KILL cr);
14239   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14240 
14241   ins_cost(150);
14242   format %{ "xorq    $dst, $src\t# long" %}
14243   ins_encode %{
14244     __ xorq($dst$$Address, $src$$Register);
14245   %}
14246   ins_pipe(ialu_mem_reg);
14247 %}
14248 
14249 // Xor Memory with Immediate
14250 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14251 %{
14252   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14253   effect(KILL cr);
14254   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14255 
14256   ins_cost(125);
14257   format %{ "xorq    $dst, $src\t# long" %}
14258   ins_encode %{
14259     __ xorq($dst$$Address, $src$$constant);
14260   %}
14261   ins_pipe(ialu_mem_imm);
14262 %}
14263 
14264 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14265 %{
14266   match(Set dst (CmpLTMask p q));
14267   effect(KILL cr);
14268 
14269   ins_cost(400);
14270   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
14271             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
14272             "negl    $dst" %}
14273   ins_encode %{
14274     __ cmpl($p$$Register, $q$$Register);
14275     __ setcc(Assembler::less, $dst$$Register);
14276     __ negl($dst$$Register);
14277   %}
14278   ins_pipe(pipe_slow);
14279 %}
14280 
14281 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14282 %{
14283   match(Set dst (CmpLTMask dst zero));
14284   effect(KILL cr);
14285 
14286   ins_cost(100);
14287   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
14288   ins_encode %{
14289     __ sarl($dst$$Register, 31);
14290   %}
14291   ins_pipe(ialu_reg);
14292 %}
14293 
14294 /* Better to save a register than avoid a branch */
14295 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14296 %{
14297   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14298   effect(KILL cr);
14299   ins_cost(300);
14300   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
14301             "jge     done\n\t"
14302             "addl    $p,$y\n"
14303             "done:   " %}
14304   ins_encode %{
14305     Register Rp = $p$$Register;
14306     Register Rq = $q$$Register;
14307     Register Ry = $y$$Register;
14308     Label done;
14309     __ subl(Rp, Rq);
14310     __ jccb(Assembler::greaterEqual, done);
14311     __ addl(Rp, Ry);
14312     __ bind(done);
14313   %}
14314   ins_pipe(pipe_cmplt);
14315 %}
14316 
14317 /* Better to save a register than avoid a branch */
14318 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14319 %{
14320   match(Set y (AndI (CmpLTMask p q) y));
14321   effect(KILL cr);
14322 
14323   ins_cost(300);
14324 
14325   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
14326             "jlt     done\n\t"
14327             "xorl    $y, $y\n"
14328             "done:   " %}
14329   ins_encode %{
14330     Register Rp = $p$$Register;
14331     Register Rq = $q$$Register;
14332     Register Ry = $y$$Register;
14333     Label done;
14334     __ cmpl(Rp, Rq);
14335     __ jccb(Assembler::less, done);
14336     __ xorl(Ry, Ry);
14337     __ bind(done);
14338   %}
14339   ins_pipe(pipe_cmplt);
14340 %}
14341 
14342 
14343 //---------- FP Instructions------------------------------------------------
14344 
14345 // Really expensive, avoid
14346 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14347 %{
14348   match(Set cr (CmpF src1 src2));
14349 
14350   ins_cost(500);
14351   format %{ "ucomiss $src1, $src2\n\t"
14352             "jnp,s   exit\n\t"
14353             "pushfq\t# saw NaN, set CF\n\t"
14354             "andq    [rsp], #0xffffff2b\n\t"
14355             "popfq\n"
14356     "exit:" %}
14357   ins_encode %{
14358     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14359     emit_cmpfp_fixup(masm);
14360   %}
14361   ins_pipe(pipe_slow);
14362 %}
14363 
14364 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14365   match(Set cr (CmpF src1 src2));
14366 
14367   ins_cost(100);
14368   format %{ "ucomiss $src1, $src2" %}
14369   ins_encode %{
14370     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14371   %}
14372   ins_pipe(pipe_slow);
14373 %}
14374 
14375 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14376   match(Set cr (CmpF src1 src2));
14377 
14378   ins_cost(100);
14379   format %{ "vucomxss $src1, $src2" %}
14380   ins_encode %{
14381     __ vucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14382   %}
14383   ins_pipe(pipe_slow);
14384 %}
14385 
14386 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14387   match(Set cr (CmpF src1 (LoadF src2)));
14388 
14389   ins_cost(100);
14390   format %{ "ucomiss $src1, $src2" %}
14391   ins_encode %{
14392     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14393   %}
14394   ins_pipe(pipe_slow);
14395 %}
14396 
14397 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14398   match(Set cr (CmpF src1 (LoadF src2)));
14399 
14400   ins_cost(100);
14401   format %{ "vucomxss $src1, $src2" %}
14402   ins_encode %{
14403     __ vucomxss($src1$$XMMRegister, $src2$$Address);
14404   %}
14405   ins_pipe(pipe_slow);
14406 %}
14407 
14408 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14409   match(Set cr (CmpF src con));
14410 
14411   ins_cost(100);
14412   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14413   ins_encode %{
14414     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14415   %}
14416   ins_pipe(pipe_slow);
14417 %}
14418 
14419 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14420   match(Set cr (CmpF src con));
14421 
14422   ins_cost(100);
14423   format %{ "vucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14424   ins_encode %{
14425     __ vucomxss($src$$XMMRegister, $constantaddress($con));
14426   %}
14427   ins_pipe(pipe_slow);
14428 %}
14429 
14430 // Really expensive, avoid
14431 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14432 %{
14433   match(Set cr (CmpD src1 src2));
14434 
14435   ins_cost(500);
14436   format %{ "ucomisd $src1, $src2\n\t"
14437             "jnp,s   exit\n\t"
14438             "pushfq\t# saw NaN, set CF\n\t"
14439             "andq    [rsp], #0xffffff2b\n\t"
14440             "popfq\n"
14441     "exit:" %}
14442   ins_encode %{
14443     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14444     emit_cmpfp_fixup(masm);
14445   %}
14446   ins_pipe(pipe_slow);
14447 %}
14448 
14449 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14450   match(Set cr (CmpD src1 src2));
14451 
14452   ins_cost(100);
14453   format %{ "ucomisd $src1, $src2 test" %}
14454   ins_encode %{
14455     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14456   %}
14457   ins_pipe(pipe_slow);
14458 %}
14459 
14460 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14461   match(Set cr (CmpD src1 src2));
14462 
14463   ins_cost(100);
14464   format %{ "vucomxsd $src1, $src2 test" %}
14465   ins_encode %{
14466     __ vucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14467   %}
14468   ins_pipe(pipe_slow);
14469 %}
14470 
14471 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14472   match(Set cr (CmpD src1 (LoadD src2)));
14473 
14474   ins_cost(100);
14475   format %{ "ucomisd $src1, $src2" %}
14476   ins_encode %{
14477     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14478   %}
14479   ins_pipe(pipe_slow);
14480 %}
14481 
14482 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14483   match(Set cr (CmpD src1 (LoadD src2)));
14484 
14485   ins_cost(100);
14486   format %{ "vucomxsd $src1, $src2" %}
14487   ins_encode %{
14488     __ vucomxsd($src1$$XMMRegister, $src2$$Address);
14489   %}
14490   ins_pipe(pipe_slow);
14491 %}
14492 
14493 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14494   match(Set cr (CmpD src con));
14495   ins_cost(100);
14496   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14497   ins_encode %{
14498     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14499   %}
14500   ins_pipe(pipe_slow);
14501 %}
14502 
14503 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14504   match(Set cr (CmpD src con));
14505 
14506   ins_cost(100);
14507   format %{ "vucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14508   ins_encode %{
14509     __ vucomxsd($src$$XMMRegister, $constantaddress($con));
14510   %}
14511   ins_pipe(pipe_slow);
14512 %}
14513 
14514 // Compare into -1,0,1
14515 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14516 %{
14517   match(Set dst (CmpF3 src1 src2));
14518   effect(KILL cr);
14519 
14520   ins_cost(275);
14521   format %{ "ucomiss $src1, $src2\n\t"
14522             "movl    $dst, #-1\n\t"
14523             "jp,s    done\n\t"
14524             "jb,s    done\n\t"
14525             "setne   $dst\n\t"
14526             "movzbl  $dst, $dst\n"
14527     "done:" %}
14528   ins_encode %{
14529     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14530     emit_cmpfp3(masm, $dst$$Register);
14531   %}
14532   ins_pipe(pipe_slow);
14533 %}
14534 
14535 // Compare into -1,0,1
14536 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14537 %{
14538   match(Set dst (CmpF3 src1 (LoadF src2)));
14539   effect(KILL cr);
14540 
14541   ins_cost(275);
14542   format %{ "ucomiss $src1, $src2\n\t"
14543             "movl    $dst, #-1\n\t"
14544             "jp,s    done\n\t"
14545             "jb,s    done\n\t"
14546             "setne   $dst\n\t"
14547             "movzbl  $dst, $dst\n"
14548     "done:" %}
14549   ins_encode %{
14550     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14551     emit_cmpfp3(masm, $dst$$Register);
14552   %}
14553   ins_pipe(pipe_slow);
14554 %}
14555 
14556 // Compare into -1,0,1
14557 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14558   match(Set dst (CmpF3 src con));
14559   effect(KILL cr);
14560 
14561   ins_cost(275);
14562   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14563             "movl    $dst, #-1\n\t"
14564             "jp,s    done\n\t"
14565             "jb,s    done\n\t"
14566             "setne   $dst\n\t"
14567             "movzbl  $dst, $dst\n"
14568     "done:" %}
14569   ins_encode %{
14570     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14571     emit_cmpfp3(masm, $dst$$Register);
14572   %}
14573   ins_pipe(pipe_slow);
14574 %}
14575 
14576 // Compare into -1,0,1
14577 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14578 %{
14579   match(Set dst (CmpD3 src1 src2));
14580   effect(KILL cr);
14581 
14582   ins_cost(275);
14583   format %{ "ucomisd $src1, $src2\n\t"
14584             "movl    $dst, #-1\n\t"
14585             "jp,s    done\n\t"
14586             "jb,s    done\n\t"
14587             "setne   $dst\n\t"
14588             "movzbl  $dst, $dst\n"
14589     "done:" %}
14590   ins_encode %{
14591     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14592     emit_cmpfp3(masm, $dst$$Register);
14593   %}
14594   ins_pipe(pipe_slow);
14595 %}
14596 
14597 // Compare into -1,0,1
14598 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14599 %{
14600   match(Set dst (CmpD3 src1 (LoadD src2)));
14601   effect(KILL cr);
14602 
14603   ins_cost(275);
14604   format %{ "ucomisd $src1, $src2\n\t"
14605             "movl    $dst, #-1\n\t"
14606             "jp,s    done\n\t"
14607             "jb,s    done\n\t"
14608             "setne   $dst\n\t"
14609             "movzbl  $dst, $dst\n"
14610     "done:" %}
14611   ins_encode %{
14612     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14613     emit_cmpfp3(masm, $dst$$Register);
14614   %}
14615   ins_pipe(pipe_slow);
14616 %}
14617 
14618 // Compare into -1,0,1
14619 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14620   match(Set dst (CmpD3 src con));
14621   effect(KILL cr);
14622 
14623   ins_cost(275);
14624   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14625             "movl    $dst, #-1\n\t"
14626             "jp,s    done\n\t"
14627             "jb,s    done\n\t"
14628             "setne   $dst\n\t"
14629             "movzbl  $dst, $dst\n"
14630     "done:" %}
14631   ins_encode %{
14632     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14633     emit_cmpfp3(masm, $dst$$Register);
14634   %}
14635   ins_pipe(pipe_slow);
14636 %}
14637 
14638 //----------Arithmetic Conversion Instructions---------------------------------
14639 
14640 instruct convF2D_reg_reg(regD dst, regF src)
14641 %{
14642   match(Set dst (ConvF2D src));
14643 
14644   format %{ "cvtss2sd $dst, $src" %}
14645   ins_encode %{
14646     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14647   %}
14648   ins_pipe(pipe_slow); // XXX
14649 %}
14650 
14651 instruct convF2D_reg_mem(regD dst, memory src)
14652 %{
14653   predicate(UseAVX == 0);
14654   match(Set dst (ConvF2D (LoadF src)));
14655 
14656   format %{ "cvtss2sd $dst, $src" %}
14657   ins_encode %{
14658     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14659   %}
14660   ins_pipe(pipe_slow); // XXX
14661 %}
14662 
14663 instruct convD2F_reg_reg(regF dst, regD src)
14664 %{
14665   match(Set dst (ConvD2F src));
14666 
14667   format %{ "cvtsd2ss $dst, $src" %}
14668   ins_encode %{
14669     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14670   %}
14671   ins_pipe(pipe_slow); // XXX
14672 %}
14673 
14674 instruct convD2F_reg_mem(regF dst, memory src)
14675 %{
14676   predicate(UseAVX == 0);
14677   match(Set dst (ConvD2F (LoadD src)));
14678 
14679   format %{ "cvtsd2ss $dst, $src" %}
14680   ins_encode %{
14681     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14682   %}
14683   ins_pipe(pipe_slow); // XXX
14684 %}
14685 
14686 // XXX do mem variants
14687 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14688 %{
14689   predicate(!VM_Version::supports_avx10_2());
14690   match(Set dst (ConvF2I src));
14691   effect(KILL cr);
14692   format %{ "convert_f2i $dst, $src" %}
14693   ins_encode %{
14694     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14695   %}
14696   ins_pipe(pipe_slow);
14697 %}
14698 
14699 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14700 %{
14701   predicate(VM_Version::supports_avx10_2());
14702   match(Set dst (ConvF2I src));
14703   format %{ "evcvttss2sisl $dst, $src" %}
14704   ins_encode %{
14705     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14706   %}
14707   ins_pipe(pipe_slow);
14708 %}
14709 
14710 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14711 %{
14712   predicate(VM_Version::supports_avx10_2());
14713   match(Set dst (ConvF2I (LoadF src)));
14714   format %{ "evcvttss2sisl $dst, $src" %}
14715   ins_encode %{
14716     __ evcvttss2sisl($dst$$Register, $src$$Address);
14717   %}
14718   ins_pipe(pipe_slow);
14719 %}
14720 
14721 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14722 %{
14723   predicate(!VM_Version::supports_avx10_2());
14724   match(Set dst (ConvF2L src));
14725   effect(KILL cr);
14726   format %{ "convert_f2l $dst, $src"%}
14727   ins_encode %{
14728     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14729   %}
14730   ins_pipe(pipe_slow);
14731 %}
14732 
14733 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14734 %{
14735   predicate(VM_Version::supports_avx10_2());
14736   match(Set dst (ConvF2L src));
14737   format %{ "evcvttss2sisq $dst, $src" %}
14738   ins_encode %{
14739     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14740   %}
14741   ins_pipe(pipe_slow);
14742 %}
14743 
14744 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14745 %{
14746   predicate(VM_Version::supports_avx10_2());
14747   match(Set dst (ConvF2L (LoadF src)));
14748   format %{ "evcvttss2sisq $dst, $src" %}
14749   ins_encode %{
14750     __ evcvttss2sisq($dst$$Register, $src$$Address);
14751   %}
14752   ins_pipe(pipe_slow);
14753 %}
14754 
14755 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14756 %{
14757   predicate(!VM_Version::supports_avx10_2());
14758   match(Set dst (ConvD2I src));
14759   effect(KILL cr);
14760   format %{ "convert_d2i $dst, $src"%}
14761   ins_encode %{
14762     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14763   %}
14764   ins_pipe(pipe_slow);
14765 %}
14766 
14767 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14768 %{
14769   predicate(VM_Version::supports_avx10_2());
14770   match(Set dst (ConvD2I src));
14771   format %{ "evcvttsd2sisl $dst, $src" %}
14772   ins_encode %{
14773     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14774   %}
14775   ins_pipe(pipe_slow);
14776 %}
14777 
14778 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14779 %{
14780   predicate(VM_Version::supports_avx10_2());
14781   match(Set dst (ConvD2I (LoadD src)));
14782   format %{ "evcvttsd2sisl $dst, $src" %}
14783   ins_encode %{
14784     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14785   %}
14786   ins_pipe(pipe_slow);
14787 %}
14788 
14789 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14790 %{
14791   predicate(!VM_Version::supports_avx10_2());
14792   match(Set dst (ConvD2L src));
14793   effect(KILL cr);
14794   format %{ "convert_d2l $dst, $src"%}
14795   ins_encode %{
14796     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14797   %}
14798   ins_pipe(pipe_slow);
14799 %}
14800 
14801 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14802 %{
14803   predicate(VM_Version::supports_avx10_2());
14804   match(Set dst (ConvD2L src));
14805   format %{ "evcvttsd2sisq $dst, $src" %}
14806   ins_encode %{
14807     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14808   %}
14809   ins_pipe(pipe_slow);
14810 %}
14811 
14812 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14813 %{
14814   predicate(VM_Version::supports_avx10_2());
14815   match(Set dst (ConvD2L (LoadD src)));
14816   format %{ "evcvttsd2sisq $dst, $src" %}
14817   ins_encode %{
14818     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14819   %}
14820   ins_pipe(pipe_slow);
14821 %}
14822 
14823 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14824 %{
14825   match(Set dst (RoundD src));
14826   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14827   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14828   ins_encode %{
14829     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14830   %}
14831   ins_pipe(pipe_slow);
14832 %}
14833 
14834 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14835 %{
14836   match(Set dst (RoundF src));
14837   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14838   format %{ "round_float $dst,$src" %}
14839   ins_encode %{
14840     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14841   %}
14842   ins_pipe(pipe_slow);
14843 %}
14844 
14845 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14846 %{
14847   predicate(!UseXmmI2F);
14848   match(Set dst (ConvI2F src));
14849 
14850   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14851   ins_encode %{
14852     if (UseAVX > 0) {
14853       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14854     }
14855     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14856   %}
14857   ins_pipe(pipe_slow); // XXX
14858 %}
14859 
14860 instruct convI2F_reg_mem(regF dst, memory src)
14861 %{
14862   predicate(UseAVX == 0);
14863   match(Set dst (ConvI2F (LoadI src)));
14864 
14865   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14866   ins_encode %{
14867     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14868   %}
14869   ins_pipe(pipe_slow); // XXX
14870 %}
14871 
14872 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14873 %{
14874   predicate(!UseXmmI2D);
14875   match(Set dst (ConvI2D src));
14876 
14877   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14878   ins_encode %{
14879     if (UseAVX > 0) {
14880       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14881     }
14882     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14883   %}
14884   ins_pipe(pipe_slow); // XXX
14885 %}
14886 
14887 instruct convI2D_reg_mem(regD dst, memory src)
14888 %{
14889   predicate(UseAVX == 0);
14890   match(Set dst (ConvI2D (LoadI src)));
14891 
14892   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14893   ins_encode %{
14894     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14895   %}
14896   ins_pipe(pipe_slow); // XXX
14897 %}
14898 
14899 instruct convXI2F_reg(regF dst, rRegI src)
14900 %{
14901   predicate(UseXmmI2F);
14902   match(Set dst (ConvI2F src));
14903 
14904   format %{ "movdl $dst, $src\n\t"
14905             "cvtdq2psl $dst, $dst\t# i2f" %}
14906   ins_encode %{
14907     __ movdl($dst$$XMMRegister, $src$$Register);
14908     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14909   %}
14910   ins_pipe(pipe_slow); // XXX
14911 %}
14912 
14913 instruct convXI2D_reg(regD dst, rRegI src)
14914 %{
14915   predicate(UseXmmI2D);
14916   match(Set dst (ConvI2D src));
14917 
14918   format %{ "movdl $dst, $src\n\t"
14919             "cvtdq2pdl $dst, $dst\t# i2d" %}
14920   ins_encode %{
14921     __ movdl($dst$$XMMRegister, $src$$Register);
14922     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14923   %}
14924   ins_pipe(pipe_slow); // XXX
14925 %}
14926 
14927 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14928 %{
14929   match(Set dst (ConvL2F src));
14930 
14931   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14932   ins_encode %{
14933     if (UseAVX > 0) {
14934       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14935     }
14936     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14937   %}
14938   ins_pipe(pipe_slow); // XXX
14939 %}
14940 
14941 instruct convL2F_reg_mem(regF dst, memory src)
14942 %{
14943   predicate(UseAVX == 0);
14944   match(Set dst (ConvL2F (LoadL src)));
14945 
14946   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14947   ins_encode %{
14948     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14949   %}
14950   ins_pipe(pipe_slow); // XXX
14951 %}
14952 
14953 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14954 %{
14955   match(Set dst (ConvL2D src));
14956 
14957   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14958   ins_encode %{
14959     if (UseAVX > 0) {
14960       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14961     }
14962     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14963   %}
14964   ins_pipe(pipe_slow); // XXX
14965 %}
14966 
14967 instruct convL2D_reg_mem(regD dst, memory src)
14968 %{
14969   predicate(UseAVX == 0);
14970   match(Set dst (ConvL2D (LoadL src)));
14971 
14972   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14973   ins_encode %{
14974     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14975   %}
14976   ins_pipe(pipe_slow); // XXX
14977 %}
14978 
14979 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14980 %{
14981   match(Set dst (ConvI2L src));
14982 
14983   ins_cost(125);
14984   format %{ "movslq  $dst, $src\t# i2l" %}
14985   ins_encode %{
14986     __ movslq($dst$$Register, $src$$Register);
14987   %}
14988   ins_pipe(ialu_reg_reg);
14989 %}
14990 
14991 // Zero-extend convert int to long
14992 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14993 %{
14994   match(Set dst (AndL (ConvI2L src) mask));
14995 
14996   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14997   ins_encode %{
14998     if ($dst$$reg != $src$$reg) {
14999       __ movl($dst$$Register, $src$$Register);
15000     }
15001   %}
15002   ins_pipe(ialu_reg_reg);
15003 %}
15004 
15005 // Zero-extend convert int to long
15006 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15007 %{
15008   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15009 
15010   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15011   ins_encode %{
15012     __ movl($dst$$Register, $src$$Address);
15013   %}
15014   ins_pipe(ialu_reg_mem);
15015 %}
15016 
15017 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15018 %{
15019   match(Set dst (AndL src mask));
15020 
15021   format %{ "movl    $dst, $src\t# zero-extend long" %}
15022   ins_encode %{
15023     __ movl($dst$$Register, $src$$Register);
15024   %}
15025   ins_pipe(ialu_reg_reg);
15026 %}
15027 
15028 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15029 %{
15030   match(Set dst (ConvL2I src));
15031 
15032   format %{ "movl    $dst, $src\t# l2i" %}
15033   ins_encode %{
15034     __ movl($dst$$Register, $src$$Register);
15035   %}
15036   ins_pipe(ialu_reg_reg);
15037 %}
15038 
15039 
15040 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15041   match(Set dst (MoveF2I src));
15042   effect(DEF dst, USE src);
15043 
15044   ins_cost(125);
15045   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
15046   ins_encode %{
15047     __ movl($dst$$Register, Address(rsp, $src$$disp));
15048   %}
15049   ins_pipe(ialu_reg_mem);
15050 %}
15051 
15052 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15053   match(Set dst (MoveI2F src));
15054   effect(DEF dst, USE src);
15055 
15056   ins_cost(125);
15057   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
15058   ins_encode %{
15059     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15060   %}
15061   ins_pipe(pipe_slow);
15062 %}
15063 
15064 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15065   match(Set dst (MoveD2L src));
15066   effect(DEF dst, USE src);
15067 
15068   ins_cost(125);
15069   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
15070   ins_encode %{
15071     __ movq($dst$$Register, Address(rsp, $src$$disp));
15072   %}
15073   ins_pipe(ialu_reg_mem);
15074 %}
15075 
15076 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15077   predicate(!UseXmmLoadAndClearUpper);
15078   match(Set dst (MoveL2D src));
15079   effect(DEF dst, USE src);
15080 
15081   ins_cost(125);
15082   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
15083   ins_encode %{
15084     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15085   %}
15086   ins_pipe(pipe_slow);
15087 %}
15088 
15089 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15090   predicate(UseXmmLoadAndClearUpper);
15091   match(Set dst (MoveL2D src));
15092   effect(DEF dst, USE src);
15093 
15094   ins_cost(125);
15095   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
15096   ins_encode %{
15097     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15098   %}
15099   ins_pipe(pipe_slow);
15100 %}
15101 
15102 
15103 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15104   match(Set dst (MoveF2I src));
15105   effect(DEF dst, USE src);
15106 
15107   ins_cost(95); // XXX
15108   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
15109   ins_encode %{
15110     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15111   %}
15112   ins_pipe(pipe_slow);
15113 %}
15114 
15115 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15116   match(Set dst (MoveI2F src));
15117   effect(DEF dst, USE src);
15118 
15119   ins_cost(100);
15120   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
15121   ins_encode %{
15122     __ movl(Address(rsp, $dst$$disp), $src$$Register);
15123   %}
15124   ins_pipe( ialu_mem_reg );
15125 %}
15126 
15127 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15128   match(Set dst (MoveD2L src));
15129   effect(DEF dst, USE src);
15130 
15131   ins_cost(95); // XXX
15132   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
15133   ins_encode %{
15134     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15135   %}
15136   ins_pipe(pipe_slow);
15137 %}
15138 
15139 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15140   match(Set dst (MoveL2D src));
15141   effect(DEF dst, USE src);
15142 
15143   ins_cost(100);
15144   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
15145   ins_encode %{
15146     __ movq(Address(rsp, $dst$$disp), $src$$Register);
15147   %}
15148   ins_pipe(ialu_mem_reg);
15149 %}
15150 
15151 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15152   match(Set dst (MoveF2I src));
15153   effect(DEF dst, USE src);
15154   ins_cost(85);
15155   format %{ "movd    $dst,$src\t# MoveF2I" %}
15156   ins_encode %{
15157     __ movdl($dst$$Register, $src$$XMMRegister);
15158   %}
15159   ins_pipe( pipe_slow );
15160 %}
15161 
15162 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15163   match(Set dst (MoveD2L src));
15164   effect(DEF dst, USE src);
15165   ins_cost(85);
15166   format %{ "movd    $dst,$src\t# MoveD2L" %}
15167   ins_encode %{
15168     __ movdq($dst$$Register, $src$$XMMRegister);
15169   %}
15170   ins_pipe( pipe_slow );
15171 %}
15172 
15173 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15174   match(Set dst (MoveI2F src));
15175   effect(DEF dst, USE src);
15176   ins_cost(100);
15177   format %{ "movd    $dst,$src\t# MoveI2F" %}
15178   ins_encode %{
15179     __ movdl($dst$$XMMRegister, $src$$Register);
15180   %}
15181   ins_pipe( pipe_slow );
15182 %}
15183 
15184 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15185   match(Set dst (MoveL2D src));
15186   effect(DEF dst, USE src);
15187   ins_cost(100);
15188   format %{ "movd    $dst,$src\t# MoveL2D" %}
15189   ins_encode %{
15190      __ movdq($dst$$XMMRegister, $src$$Register);
15191   %}
15192   ins_pipe( pipe_slow );
15193 %}
15194 
15195 // Fast clearing of an array
15196 // Small non-constant lenght ClearArray for non-AVX512 targets.
15197 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15198                   Universe dummy, rFlagsReg cr)
15199 %{
15200   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15201   match(Set dummy (ClearArray cnt base));
15202   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15203 
15204   format %{ $$template
15205     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15206     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15207     $$emit$$"jg      LARGE\n\t"
15208     $$emit$$"dec     rcx\n\t"
15209     $$emit$$"js      DONE\t# Zero length\n\t"
15210     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15211     $$emit$$"dec     rcx\n\t"
15212     $$emit$$"jge     LOOP\n\t"
15213     $$emit$$"jmp     DONE\n\t"
15214     $$emit$$"# LARGE:\n\t"
15215     if (UseFastStosb) {
15216        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15217        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15218     } else if (UseXMMForObjInit) {
15219        $$emit$$"mov     rdi,rax\n\t"
15220        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15221        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15222        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15223        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15224        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15225        $$emit$$"add     0x40,rax\n\t"
15226        $$emit$$"# L_zero_64_bytes:\n\t"
15227        $$emit$$"sub     0x8,rcx\n\t"
15228        $$emit$$"jge     L_loop\n\t"
15229        $$emit$$"add     0x4,rcx\n\t"
15230        $$emit$$"jl      L_tail\n\t"
15231        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15232        $$emit$$"add     0x20,rax\n\t"
15233        $$emit$$"sub     0x4,rcx\n\t"
15234        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15235        $$emit$$"add     0x4,rcx\n\t"
15236        $$emit$$"jle     L_end\n\t"
15237        $$emit$$"dec     rcx\n\t"
15238        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15239        $$emit$$"vmovq   xmm0,(rax)\n\t"
15240        $$emit$$"add     0x8,rax\n\t"
15241        $$emit$$"dec     rcx\n\t"
15242        $$emit$$"jge     L_sloop\n\t"
15243        $$emit$$"# L_end:\n\t"
15244     } else {
15245        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15246     }
15247     $$emit$$"# DONE"
15248   %}
15249   ins_encode %{
15250     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15251                  $tmp$$XMMRegister, false, knoreg);
15252   %}
15253   ins_pipe(pipe_slow);
15254 %}
15255 
15256 // Small non-constant length ClearArray for AVX512 targets.
15257 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15258                        Universe dummy, rFlagsReg cr)
15259 %{
15260   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15261   match(Set dummy (ClearArray cnt base));
15262   ins_cost(125);
15263   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15264 
15265   format %{ $$template
15266     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15267     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15268     $$emit$$"jg      LARGE\n\t"
15269     $$emit$$"dec     rcx\n\t"
15270     $$emit$$"js      DONE\t# Zero length\n\t"
15271     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15272     $$emit$$"dec     rcx\n\t"
15273     $$emit$$"jge     LOOP\n\t"
15274     $$emit$$"jmp     DONE\n\t"
15275     $$emit$$"# LARGE:\n\t"
15276     if (UseFastStosb) {
15277        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15278        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15279     } else if (UseXMMForObjInit) {
15280        $$emit$$"mov     rdi,rax\n\t"
15281        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15282        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15283        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15284        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15285        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15286        $$emit$$"add     0x40,rax\n\t"
15287        $$emit$$"# L_zero_64_bytes:\n\t"
15288        $$emit$$"sub     0x8,rcx\n\t"
15289        $$emit$$"jge     L_loop\n\t"
15290        $$emit$$"add     0x4,rcx\n\t"
15291        $$emit$$"jl      L_tail\n\t"
15292        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15293        $$emit$$"add     0x20,rax\n\t"
15294        $$emit$$"sub     0x4,rcx\n\t"
15295        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15296        $$emit$$"add     0x4,rcx\n\t"
15297        $$emit$$"jle     L_end\n\t"
15298        $$emit$$"dec     rcx\n\t"
15299        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15300        $$emit$$"vmovq   xmm0,(rax)\n\t"
15301        $$emit$$"add     0x8,rax\n\t"
15302        $$emit$$"dec     rcx\n\t"
15303        $$emit$$"jge     L_sloop\n\t"
15304        $$emit$$"# L_end:\n\t"
15305     } else {
15306        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15307     }
15308     $$emit$$"# DONE"
15309   %}
15310   ins_encode %{
15311     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15312                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15313   %}
15314   ins_pipe(pipe_slow);
15315 %}
15316 
15317 // Large non-constant length ClearArray for non-AVX512 targets.
15318 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15319                         Universe dummy, rFlagsReg cr)
15320 %{
15321   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15322   match(Set dummy (ClearArray cnt base));
15323   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15324 
15325   format %{ $$template
15326     if (UseFastStosb) {
15327        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15328        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15329        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15330     } else if (UseXMMForObjInit) {
15331        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15332        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15333        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15334        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15335        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15336        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15337        $$emit$$"add     0x40,rax\n\t"
15338        $$emit$$"# L_zero_64_bytes:\n\t"
15339        $$emit$$"sub     0x8,rcx\n\t"
15340        $$emit$$"jge     L_loop\n\t"
15341        $$emit$$"add     0x4,rcx\n\t"
15342        $$emit$$"jl      L_tail\n\t"
15343        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15344        $$emit$$"add     0x20,rax\n\t"
15345        $$emit$$"sub     0x4,rcx\n\t"
15346        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15347        $$emit$$"add     0x4,rcx\n\t"
15348        $$emit$$"jle     L_end\n\t"
15349        $$emit$$"dec     rcx\n\t"
15350        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15351        $$emit$$"vmovq   xmm0,(rax)\n\t"
15352        $$emit$$"add     0x8,rax\n\t"
15353        $$emit$$"dec     rcx\n\t"
15354        $$emit$$"jge     L_sloop\n\t"
15355        $$emit$$"# L_end:\n\t"
15356     } else {
15357        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15358        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15359     }
15360   %}
15361   ins_encode %{
15362     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15363                  $tmp$$XMMRegister, true, knoreg);
15364   %}
15365   ins_pipe(pipe_slow);
15366 %}
15367 
15368 // Large non-constant length ClearArray for AVX512 targets.
15369 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15370                              Universe dummy, rFlagsReg cr)
15371 %{
15372   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15373   match(Set dummy (ClearArray cnt base));
15374   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15375 
15376   format %{ $$template
15377     if (UseFastStosb) {
15378        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15379        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15380        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15381     } else if (UseXMMForObjInit) {
15382        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15383        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15384        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15385        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15386        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15387        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15388        $$emit$$"add     0x40,rax\n\t"
15389        $$emit$$"# L_zero_64_bytes:\n\t"
15390        $$emit$$"sub     0x8,rcx\n\t"
15391        $$emit$$"jge     L_loop\n\t"
15392        $$emit$$"add     0x4,rcx\n\t"
15393        $$emit$$"jl      L_tail\n\t"
15394        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15395        $$emit$$"add     0x20,rax\n\t"
15396        $$emit$$"sub     0x4,rcx\n\t"
15397        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15398        $$emit$$"add     0x4,rcx\n\t"
15399        $$emit$$"jle     L_end\n\t"
15400        $$emit$$"dec     rcx\n\t"
15401        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15402        $$emit$$"vmovq   xmm0,(rax)\n\t"
15403        $$emit$$"add     0x8,rax\n\t"
15404        $$emit$$"dec     rcx\n\t"
15405        $$emit$$"jge     L_sloop\n\t"
15406        $$emit$$"# L_end:\n\t"
15407     } else {
15408        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15409        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15410     }
15411   %}
15412   ins_encode %{
15413     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15414                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15415   %}
15416   ins_pipe(pipe_slow);
15417 %}
15418 
15419 // Small constant length ClearArray for AVX512 targets.
15420 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15421 %{
15422   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15423   match(Set dummy (ClearArray cnt base));
15424   ins_cost(100);
15425   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15426   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15427   ins_encode %{
15428    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15429   %}
15430   ins_pipe(pipe_slow);
15431 %}
15432 
15433 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15434                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15435 %{
15436   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15437   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15438   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15439 
15440   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15441   ins_encode %{
15442     __ string_compare($str1$$Register, $str2$$Register,
15443                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15444                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15445   %}
15446   ins_pipe( pipe_slow );
15447 %}
15448 
15449 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15450                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15451 %{
15452   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15453   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15454   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15455 
15456   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15457   ins_encode %{
15458     __ string_compare($str1$$Register, $str2$$Register,
15459                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15460                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15461   %}
15462   ins_pipe( pipe_slow );
15463 %}
15464 
15465 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15466                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15467 %{
15468   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15469   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15470   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15471 
15472   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15473   ins_encode %{
15474     __ string_compare($str1$$Register, $str2$$Register,
15475                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15476                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15477   %}
15478   ins_pipe( pipe_slow );
15479 %}
15480 
15481 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15482                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15483 %{
15484   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15485   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15486   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15487 
15488   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15489   ins_encode %{
15490     __ string_compare($str1$$Register, $str2$$Register,
15491                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15492                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15493   %}
15494   ins_pipe( pipe_slow );
15495 %}
15496 
15497 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15498                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15499 %{
15500   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15501   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15502   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15503 
15504   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15505   ins_encode %{
15506     __ string_compare($str1$$Register, $str2$$Register,
15507                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15508                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15509   %}
15510   ins_pipe( pipe_slow );
15511 %}
15512 
15513 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15514                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15515 %{
15516   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15517   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15518   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15519 
15520   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15521   ins_encode %{
15522     __ string_compare($str1$$Register, $str2$$Register,
15523                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15524                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15525   %}
15526   ins_pipe( pipe_slow );
15527 %}
15528 
15529 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15530                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15531 %{
15532   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15533   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15534   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15535 
15536   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15537   ins_encode %{
15538     __ string_compare($str2$$Register, $str1$$Register,
15539                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15540                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15541   %}
15542   ins_pipe( pipe_slow );
15543 %}
15544 
15545 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15546                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15547 %{
15548   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15549   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15550   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15551 
15552   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15553   ins_encode %{
15554     __ string_compare($str2$$Register, $str1$$Register,
15555                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15556                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15557   %}
15558   ins_pipe( pipe_slow );
15559 %}
15560 
15561 // fast search of substring with known size.
15562 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15563                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15564 %{
15565   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15566   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15567   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15568 
15569   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15570   ins_encode %{
15571     int icnt2 = (int)$int_cnt2$$constant;
15572     if (icnt2 >= 16) {
15573       // IndexOf for constant substrings with size >= 16 elements
15574       // which don't need to be loaded through stack.
15575       __ string_indexofC8($str1$$Register, $str2$$Register,
15576                           $cnt1$$Register, $cnt2$$Register,
15577                           icnt2, $result$$Register,
15578                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15579     } else {
15580       // Small strings are loaded through stack if they cross page boundary.
15581       __ string_indexof($str1$$Register, $str2$$Register,
15582                         $cnt1$$Register, $cnt2$$Register,
15583                         icnt2, $result$$Register,
15584                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15585     }
15586   %}
15587   ins_pipe( pipe_slow );
15588 %}
15589 
15590 // fast search of substring with known size.
15591 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15592                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15593 %{
15594   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15595   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15596   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15597 
15598   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15599   ins_encode %{
15600     int icnt2 = (int)$int_cnt2$$constant;
15601     if (icnt2 >= 8) {
15602       // IndexOf for constant substrings with size >= 8 elements
15603       // which don't need to be loaded through stack.
15604       __ string_indexofC8($str1$$Register, $str2$$Register,
15605                           $cnt1$$Register, $cnt2$$Register,
15606                           icnt2, $result$$Register,
15607                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15608     } else {
15609       // Small strings are loaded through stack if they cross page boundary.
15610       __ string_indexof($str1$$Register, $str2$$Register,
15611                         $cnt1$$Register, $cnt2$$Register,
15612                         icnt2, $result$$Register,
15613                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15614     }
15615   %}
15616   ins_pipe( pipe_slow );
15617 %}
15618 
15619 // fast search of substring with known size.
15620 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15621                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15622 %{
15623   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15624   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15625   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15626 
15627   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15628   ins_encode %{
15629     int icnt2 = (int)$int_cnt2$$constant;
15630     if (icnt2 >= 8) {
15631       // IndexOf for constant substrings with size >= 8 elements
15632       // which don't need to be loaded through stack.
15633       __ string_indexofC8($str1$$Register, $str2$$Register,
15634                           $cnt1$$Register, $cnt2$$Register,
15635                           icnt2, $result$$Register,
15636                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15637     } else {
15638       // Small strings are loaded through stack if they cross page boundary.
15639       __ string_indexof($str1$$Register, $str2$$Register,
15640                         $cnt1$$Register, $cnt2$$Register,
15641                         icnt2, $result$$Register,
15642                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15643     }
15644   %}
15645   ins_pipe( pipe_slow );
15646 %}
15647 
15648 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15649                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15650 %{
15651   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15652   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15653   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15654 
15655   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15656   ins_encode %{
15657     __ string_indexof($str1$$Register, $str2$$Register,
15658                       $cnt1$$Register, $cnt2$$Register,
15659                       (-1), $result$$Register,
15660                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15661   %}
15662   ins_pipe( pipe_slow );
15663 %}
15664 
15665 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15666                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15667 %{
15668   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15669   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15670   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15671 
15672   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15673   ins_encode %{
15674     __ string_indexof($str1$$Register, $str2$$Register,
15675                       $cnt1$$Register, $cnt2$$Register,
15676                       (-1), $result$$Register,
15677                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15678   %}
15679   ins_pipe( pipe_slow );
15680 %}
15681 
15682 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15683                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15684 %{
15685   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15686   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15687   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15688 
15689   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15690   ins_encode %{
15691     __ string_indexof($str1$$Register, $str2$$Register,
15692                       $cnt1$$Register, $cnt2$$Register,
15693                       (-1), $result$$Register,
15694                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15695   %}
15696   ins_pipe( pipe_slow );
15697 %}
15698 
15699 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15700                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15701 %{
15702   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15703   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15704   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15705   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15706   ins_encode %{
15707     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15708                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15709   %}
15710   ins_pipe( pipe_slow );
15711 %}
15712 
15713 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15714                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15715 %{
15716   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15717   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15718   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15719   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15720   ins_encode %{
15721     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15722                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15723   %}
15724   ins_pipe( pipe_slow );
15725 %}
15726 
15727 // fast string equals
15728 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15729                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15730 %{
15731   predicate(!VM_Version::supports_avx512vlbw());
15732   match(Set result (StrEquals (Binary str1 str2) cnt));
15733   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15734 
15735   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15736   ins_encode %{
15737     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15738                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15739                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15740   %}
15741   ins_pipe( pipe_slow );
15742 %}
15743 
15744 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15745                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15746 %{
15747   predicate(VM_Version::supports_avx512vlbw());
15748   match(Set result (StrEquals (Binary str1 str2) cnt));
15749   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15750 
15751   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15752   ins_encode %{
15753     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15754                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15755                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15756   %}
15757   ins_pipe( pipe_slow );
15758 %}
15759 
15760 // fast array equals
15761 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15762                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15763 %{
15764   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15765   match(Set result (AryEq ary1 ary2));
15766   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15767 
15768   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15769   ins_encode %{
15770     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15771                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15772                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15773   %}
15774   ins_pipe( pipe_slow );
15775 %}
15776 
15777 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15778                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15779 %{
15780   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15781   match(Set result (AryEq ary1 ary2));
15782   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15783 
15784   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15785   ins_encode %{
15786     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15787                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15788                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15789   %}
15790   ins_pipe( pipe_slow );
15791 %}
15792 
15793 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15794                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15795 %{
15796   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15797   match(Set result (AryEq ary1 ary2));
15798   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15799 
15800   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15801   ins_encode %{
15802     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15803                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15804                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15805   %}
15806   ins_pipe( pipe_slow );
15807 %}
15808 
15809 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15810                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15811 %{
15812   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15813   match(Set result (AryEq ary1 ary2));
15814   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15815 
15816   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15817   ins_encode %{
15818     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15819                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15820                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15821   %}
15822   ins_pipe( pipe_slow );
15823 %}
15824 
15825 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15826                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15827                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15828                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15829                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15830 %{
15831   predicate(UseAVX >= 2);
15832   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15833   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15834          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15835          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15836          USE basic_type, KILL cr);
15837 
15838   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
15839   ins_encode %{
15840     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15841                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15842                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15843                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15844                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15845                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15846                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15847   %}
15848   ins_pipe( pipe_slow );
15849 %}
15850 
15851 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15852                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15853 %{
15854   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15855   match(Set result (CountPositives ary1 len));
15856   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15857 
15858   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15859   ins_encode %{
15860     __ count_positives($ary1$$Register, $len$$Register,
15861                        $result$$Register, $tmp3$$Register,
15862                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15863   %}
15864   ins_pipe( pipe_slow );
15865 %}
15866 
15867 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15868                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15869 %{
15870   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15871   match(Set result (CountPositives ary1 len));
15872   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15873 
15874   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15875   ins_encode %{
15876     __ count_positives($ary1$$Register, $len$$Register,
15877                        $result$$Register, $tmp3$$Register,
15878                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15879   %}
15880   ins_pipe( pipe_slow );
15881 %}
15882 
15883 // fast char[] to byte[] compression
15884 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15885                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15886   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15887   match(Set result (StrCompressedCopy src (Binary dst len)));
15888   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15889          USE_KILL len, KILL tmp5, KILL cr);
15890 
15891   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15892   ins_encode %{
15893     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15894                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15895                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15896                            knoreg, knoreg);
15897   %}
15898   ins_pipe( pipe_slow );
15899 %}
15900 
15901 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15902                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15903   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15904   match(Set result (StrCompressedCopy src (Binary dst len)));
15905   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15906          USE_KILL len, KILL tmp5, KILL cr);
15907 
15908   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15909   ins_encode %{
15910     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15911                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15912                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15913                            $ktmp1$$KRegister, $ktmp2$$KRegister);
15914   %}
15915   ins_pipe( pipe_slow );
15916 %}
15917 // fast byte[] to char[] inflation
15918 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15919                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15920   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15921   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15922   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15923 
15924   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15925   ins_encode %{
15926     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15927                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15928   %}
15929   ins_pipe( pipe_slow );
15930 %}
15931 
15932 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15933                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15934   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15935   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15936   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15937 
15938   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15939   ins_encode %{
15940     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15941                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15942   %}
15943   ins_pipe( pipe_slow );
15944 %}
15945 
15946 // encode char[] to byte[] in ISO_8859_1
15947 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15948                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15949                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15950   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15951   match(Set result (EncodeISOArray src (Binary dst len)));
15952   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15953 
15954   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15955   ins_encode %{
15956     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15957                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15958                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15959   %}
15960   ins_pipe( pipe_slow );
15961 %}
15962 
15963 // encode char[] to byte[] in ASCII
15964 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15965                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15966                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15967   predicate(((EncodeISOArrayNode*)n)->is_ascii());
15968   match(Set result (EncodeISOArray src (Binary dst len)));
15969   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15970 
15971   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15972   ins_encode %{
15973     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15974                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15975                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15976   %}
15977   ins_pipe( pipe_slow );
15978 %}
15979 
15980 //----------Overflow Math Instructions-----------------------------------------
15981 
15982 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15983 %{
15984   match(Set cr (OverflowAddI op1 op2));
15985   effect(DEF cr, USE_KILL op1, USE op2);
15986 
15987   format %{ "addl    $op1, $op2\t# overflow check int" %}
15988 
15989   ins_encode %{
15990     __ addl($op1$$Register, $op2$$Register);
15991   %}
15992   ins_pipe(ialu_reg_reg);
15993 %}
15994 
15995 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
15996 %{
15997   match(Set cr (OverflowAddI op1 op2));
15998   effect(DEF cr, USE_KILL op1, USE op2);
15999 
16000   format %{ "addl    $op1, $op2\t# overflow check int" %}
16001 
16002   ins_encode %{
16003     __ addl($op1$$Register, $op2$$constant);
16004   %}
16005   ins_pipe(ialu_reg_reg);
16006 %}
16007 
16008 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16009 %{
16010   match(Set cr (OverflowAddL op1 op2));
16011   effect(DEF cr, USE_KILL op1, USE op2);
16012 
16013   format %{ "addq    $op1, $op2\t# overflow check long" %}
16014   ins_encode %{
16015     __ addq($op1$$Register, $op2$$Register);
16016   %}
16017   ins_pipe(ialu_reg_reg);
16018 %}
16019 
16020 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16021 %{
16022   match(Set cr (OverflowAddL op1 op2));
16023   effect(DEF cr, USE_KILL op1, USE op2);
16024 
16025   format %{ "addq    $op1, $op2\t# overflow check long" %}
16026   ins_encode %{
16027     __ addq($op1$$Register, $op2$$constant);
16028   %}
16029   ins_pipe(ialu_reg_reg);
16030 %}
16031 
16032 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16033 %{
16034   match(Set cr (OverflowSubI op1 op2));
16035 
16036   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16037   ins_encode %{
16038     __ cmpl($op1$$Register, $op2$$Register);
16039   %}
16040   ins_pipe(ialu_reg_reg);
16041 %}
16042 
16043 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16044 %{
16045   match(Set cr (OverflowSubI op1 op2));
16046 
16047   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16048   ins_encode %{
16049     __ cmpl($op1$$Register, $op2$$constant);
16050   %}
16051   ins_pipe(ialu_reg_reg);
16052 %}
16053 
16054 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16055 %{
16056   match(Set cr (OverflowSubL op1 op2));
16057 
16058   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16059   ins_encode %{
16060     __ cmpq($op1$$Register, $op2$$Register);
16061   %}
16062   ins_pipe(ialu_reg_reg);
16063 %}
16064 
16065 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16066 %{
16067   match(Set cr (OverflowSubL op1 op2));
16068 
16069   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16070   ins_encode %{
16071     __ cmpq($op1$$Register, $op2$$constant);
16072   %}
16073   ins_pipe(ialu_reg_reg);
16074 %}
16075 
16076 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16077 %{
16078   match(Set cr (OverflowSubI zero op2));
16079   effect(DEF cr, USE_KILL op2);
16080 
16081   format %{ "negl    $op2\t# overflow check int" %}
16082   ins_encode %{
16083     __ negl($op2$$Register);
16084   %}
16085   ins_pipe(ialu_reg_reg);
16086 %}
16087 
16088 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16089 %{
16090   match(Set cr (OverflowSubL zero op2));
16091   effect(DEF cr, USE_KILL op2);
16092 
16093   format %{ "negq    $op2\t# overflow check long" %}
16094   ins_encode %{
16095     __ negq($op2$$Register);
16096   %}
16097   ins_pipe(ialu_reg_reg);
16098 %}
16099 
16100 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16101 %{
16102   match(Set cr (OverflowMulI op1 op2));
16103   effect(DEF cr, USE_KILL op1, USE op2);
16104 
16105   format %{ "imull    $op1, $op2\t# overflow check int" %}
16106   ins_encode %{
16107     __ imull($op1$$Register, $op2$$Register);
16108   %}
16109   ins_pipe(ialu_reg_reg_alu0);
16110 %}
16111 
16112 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16113 %{
16114   match(Set cr (OverflowMulI op1 op2));
16115   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16116 
16117   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
16118   ins_encode %{
16119     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16120   %}
16121   ins_pipe(ialu_reg_reg_alu0);
16122 %}
16123 
16124 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16125 %{
16126   match(Set cr (OverflowMulL op1 op2));
16127   effect(DEF cr, USE_KILL op1, USE op2);
16128 
16129   format %{ "imulq    $op1, $op2\t# overflow check long" %}
16130   ins_encode %{
16131     __ imulq($op1$$Register, $op2$$Register);
16132   %}
16133   ins_pipe(ialu_reg_reg_alu0);
16134 %}
16135 
16136 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16137 %{
16138   match(Set cr (OverflowMulL op1 op2));
16139   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16140 
16141   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
16142   ins_encode %{
16143     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16144   %}
16145   ins_pipe(ialu_reg_reg_alu0);
16146 %}
16147 
16148 
16149 //----------Control Flow Instructions------------------------------------------
16150 // Signed compare Instructions
16151 
16152 // XXX more variants!!
16153 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16154 %{
16155   match(Set cr (CmpI op1 op2));
16156   effect(DEF cr, USE op1, USE op2);
16157 
16158   format %{ "cmpl    $op1, $op2" %}
16159   ins_encode %{
16160     __ cmpl($op1$$Register, $op2$$Register);
16161   %}
16162   ins_pipe(ialu_cr_reg_reg);
16163 %}
16164 
16165 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16166 %{
16167   match(Set cr (CmpI op1 op2));
16168 
16169   format %{ "cmpl    $op1, $op2" %}
16170   ins_encode %{
16171     __ cmpl($op1$$Register, $op2$$constant);
16172   %}
16173   ins_pipe(ialu_cr_reg_imm);
16174 %}
16175 
16176 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16177 %{
16178   match(Set cr (CmpI op1 (LoadI op2)));
16179 
16180   ins_cost(500); // XXX
16181   format %{ "cmpl    $op1, $op2" %}
16182   ins_encode %{
16183     __ cmpl($op1$$Register, $op2$$Address);
16184   %}
16185   ins_pipe(ialu_cr_reg_mem);
16186 %}
16187 
16188 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16189 %{
16190   match(Set cr (CmpI src zero));
16191 
16192   format %{ "testl   $src, $src" %}
16193   ins_encode %{
16194     __ testl($src$$Register, $src$$Register);
16195   %}
16196   ins_pipe(ialu_cr_reg_imm);
16197 %}
16198 
16199 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16200 %{
16201   match(Set cr (CmpI (AndI src con) zero));
16202 
16203   format %{ "testl   $src, $con" %}
16204   ins_encode %{
16205     __ testl($src$$Register, $con$$constant);
16206   %}
16207   ins_pipe(ialu_cr_reg_imm);
16208 %}
16209 
16210 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16211 %{
16212   match(Set cr (CmpI (AndI src1 src2) zero));
16213 
16214   format %{ "testl   $src1, $src2" %}
16215   ins_encode %{
16216     __ testl($src1$$Register, $src2$$Register);
16217   %}
16218   ins_pipe(ialu_cr_reg_imm);
16219 %}
16220 
16221 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16222 %{
16223   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16224 
16225   format %{ "testl   $src, $mem" %}
16226   ins_encode %{
16227     __ testl($src$$Register, $mem$$Address);
16228   %}
16229   ins_pipe(ialu_cr_reg_mem);
16230 %}
16231 
16232 // Unsigned compare Instructions; really, same as signed except they
16233 // produce an rFlagsRegU instead of rFlagsReg.
16234 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16235 %{
16236   match(Set cr (CmpU op1 op2));
16237 
16238   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16239   ins_encode %{
16240     __ cmpl($op1$$Register, $op2$$Register);
16241   %}
16242   ins_pipe(ialu_cr_reg_reg);
16243 %}
16244 
16245 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16246 %{
16247   match(Set cr (CmpU op1 op2));
16248 
16249   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16250   ins_encode %{
16251     __ cmpl($op1$$Register, $op2$$constant);
16252   %}
16253   ins_pipe(ialu_cr_reg_imm);
16254 %}
16255 
16256 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16257 %{
16258   match(Set cr (CmpU op1 (LoadI op2)));
16259 
16260   ins_cost(500); // XXX
16261   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16262   ins_encode %{
16263     __ cmpl($op1$$Register, $op2$$Address);
16264   %}
16265   ins_pipe(ialu_cr_reg_mem);
16266 %}
16267 
16268 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16269 %{
16270   match(Set cr (CmpU src zero));
16271 
16272   format %{ "testl   $src, $src\t# unsigned" %}
16273   ins_encode %{
16274     __ testl($src$$Register, $src$$Register);
16275   %}
16276   ins_pipe(ialu_cr_reg_imm);
16277 %}
16278 
16279 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16280 %{
16281   match(Set cr (CmpP op1 op2));
16282 
16283   format %{ "cmpq    $op1, $op2\t# ptr" %}
16284   ins_encode %{
16285     __ cmpq($op1$$Register, $op2$$Register);
16286   %}
16287   ins_pipe(ialu_cr_reg_reg);
16288 %}
16289 
16290 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16291 %{
16292   match(Set cr (CmpP op1 (LoadP op2)));
16293   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16294 
16295   ins_cost(500); // XXX
16296   format %{ "cmpq    $op1, $op2\t# ptr" %}
16297   ins_encode %{
16298     __ cmpq($op1$$Register, $op2$$Address);
16299   %}
16300   ins_pipe(ialu_cr_reg_mem);
16301 %}
16302 
16303 // XXX this is generalized by compP_rReg_mem???
16304 // Compare raw pointer (used in out-of-heap check).
16305 // Only works because non-oop pointers must be raw pointers
16306 // and raw pointers have no anti-dependencies.
16307 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16308 %{
16309   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16310             n->in(2)->as_Load()->barrier_data() == 0);
16311   match(Set cr (CmpP op1 (LoadP op2)));
16312 
16313   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
16314   ins_encode %{
16315     __ cmpq($op1$$Register, $op2$$Address);
16316   %}
16317   ins_pipe(ialu_cr_reg_mem);
16318 %}
16319 
16320 // This will generate a signed flags result. This should be OK since
16321 // any compare to a zero should be eq/neq.
16322 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16323 %{
16324   match(Set cr (CmpP src zero));
16325 
16326   format %{ "testq   $src, $src\t# ptr" %}
16327   ins_encode %{
16328     __ testq($src$$Register, $src$$Register);
16329   %}
16330   ins_pipe(ialu_cr_reg_imm);
16331 %}
16332 
16333 // This will generate a signed flags result. This should be OK since
16334 // any compare to a zero should be eq/neq.
16335 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16336 %{
16337   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16338             n->in(1)->as_Load()->barrier_data() == 0);
16339   match(Set cr (CmpP (LoadP op) zero));
16340 
16341   ins_cost(500); // XXX
16342   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
16343   ins_encode %{
16344     __ testq($op$$Address, 0xFFFFFFFF);
16345   %}
16346   ins_pipe(ialu_cr_reg_imm);
16347 %}
16348 
16349 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16350 %{
16351   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16352             n->in(1)->as_Load()->barrier_data() == 0);
16353   match(Set cr (CmpP (LoadP mem) zero));
16354 
16355   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
16356   ins_encode %{
16357     __ cmpq(r12, $mem$$Address);
16358   %}
16359   ins_pipe(ialu_cr_reg_mem);
16360 %}
16361 
16362 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16363 %{
16364   match(Set cr (CmpN op1 op2));
16365 
16366   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16367   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16368   ins_pipe(ialu_cr_reg_reg);
16369 %}
16370 
16371 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16372 %{
16373   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16374   match(Set cr (CmpN src (LoadN mem)));
16375 
16376   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
16377   ins_encode %{
16378     __ cmpl($src$$Register, $mem$$Address);
16379   %}
16380   ins_pipe(ialu_cr_reg_mem);
16381 %}
16382 
16383 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16384   match(Set cr (CmpN op1 op2));
16385 
16386   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16387   ins_encode %{
16388     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16389   %}
16390   ins_pipe(ialu_cr_reg_imm);
16391 %}
16392 
16393 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16394 %{
16395   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16396   match(Set cr (CmpN src (LoadN mem)));
16397 
16398   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
16399   ins_encode %{
16400     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16401   %}
16402   ins_pipe(ialu_cr_reg_mem);
16403 %}
16404 
16405 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16406   match(Set cr (CmpN op1 op2));
16407 
16408   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
16409   ins_encode %{
16410     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16411   %}
16412   ins_pipe(ialu_cr_reg_imm);
16413 %}
16414 
16415 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16416 %{
16417   predicate(!UseCompactObjectHeaders);
16418   match(Set cr (CmpN src (LoadNKlass mem)));
16419 
16420   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
16421   ins_encode %{
16422     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16423   %}
16424   ins_pipe(ialu_cr_reg_mem);
16425 %}
16426 
16427 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16428   match(Set cr (CmpN src zero));
16429 
16430   format %{ "testl   $src, $src\t# compressed ptr" %}
16431   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16432   ins_pipe(ialu_cr_reg_imm);
16433 %}
16434 
16435 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16436 %{
16437   predicate(CompressedOops::base() != nullptr &&
16438             n->in(1)->as_Load()->barrier_data() == 0);
16439   match(Set cr (CmpN (LoadN mem) zero));
16440 
16441   ins_cost(500); // XXX
16442   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
16443   ins_encode %{
16444     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16445   %}
16446   ins_pipe(ialu_cr_reg_mem);
16447 %}
16448 
16449 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16450 %{
16451   predicate(CompressedOops::base() == nullptr &&
16452             n->in(1)->as_Load()->barrier_data() == 0);
16453   match(Set cr (CmpN (LoadN mem) zero));
16454 
16455   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16456   ins_encode %{
16457     __ cmpl(r12, $mem$$Address);
16458   %}
16459   ins_pipe(ialu_cr_reg_mem);
16460 %}
16461 
16462 // Yanked all unsigned pointer compare operations.
16463 // Pointer compares are done with CmpP which is already unsigned.
16464 
16465 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16466 %{
16467   match(Set cr (CmpL op1 op2));
16468 
16469   format %{ "cmpq    $op1, $op2" %}
16470   ins_encode %{
16471     __ cmpq($op1$$Register, $op2$$Register);
16472   %}
16473   ins_pipe(ialu_cr_reg_reg);
16474 %}
16475 
16476 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16477 %{
16478   match(Set cr (CmpL op1 op2));
16479 
16480   format %{ "cmpq    $op1, $op2" %}
16481   ins_encode %{
16482     __ cmpq($op1$$Register, $op2$$constant);
16483   %}
16484   ins_pipe(ialu_cr_reg_imm);
16485 %}
16486 
16487 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16488 %{
16489   match(Set cr (CmpL op1 (LoadL op2)));
16490 
16491   format %{ "cmpq    $op1, $op2" %}
16492   ins_encode %{
16493     __ cmpq($op1$$Register, $op2$$Address);
16494   %}
16495   ins_pipe(ialu_cr_reg_mem);
16496 %}
16497 
16498 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16499 %{
16500   match(Set cr (CmpL src zero));
16501 
16502   format %{ "testq   $src, $src" %}
16503   ins_encode %{
16504     __ testq($src$$Register, $src$$Register);
16505   %}
16506   ins_pipe(ialu_cr_reg_imm);
16507 %}
16508 
16509 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16510 %{
16511   match(Set cr (CmpL (AndL src con) zero));
16512 
16513   format %{ "testq   $src, $con\t# long" %}
16514   ins_encode %{
16515     __ testq($src$$Register, $con$$constant);
16516   %}
16517   ins_pipe(ialu_cr_reg_imm);
16518 %}
16519 
16520 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16521 %{
16522   match(Set cr (CmpL (AndL src1 src2) zero));
16523 
16524   format %{ "testq   $src1, $src2\t# long" %}
16525   ins_encode %{
16526     __ testq($src1$$Register, $src2$$Register);
16527   %}
16528   ins_pipe(ialu_cr_reg_imm);
16529 %}
16530 
16531 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16532 %{
16533   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16534 
16535   format %{ "testq   $src, $mem" %}
16536   ins_encode %{
16537     __ testq($src$$Register, $mem$$Address);
16538   %}
16539   ins_pipe(ialu_cr_reg_mem);
16540 %}
16541 
16542 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16543 %{
16544   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16545 
16546   format %{ "testq   $src, $mem" %}
16547   ins_encode %{
16548     __ testq($src$$Register, $mem$$Address);
16549   %}
16550   ins_pipe(ialu_cr_reg_mem);
16551 %}
16552 
16553 // Manifest a CmpU result in an integer register.  Very painful.
16554 // This is the test to avoid.
16555 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16556 %{
16557   match(Set dst (CmpU3 src1 src2));
16558   effect(KILL flags);
16559 
16560   ins_cost(275); // XXX
16561   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16562             "movl    $dst, -1\n\t"
16563             "jb,u    done\n\t"
16564             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16565     "done:" %}
16566   ins_encode %{
16567     Label done;
16568     __ cmpl($src1$$Register, $src2$$Register);
16569     __ movl($dst$$Register, -1);
16570     __ jccb(Assembler::below, done);
16571     __ setcc(Assembler::notZero, $dst$$Register);
16572     __ bind(done);
16573   %}
16574   ins_pipe(pipe_slow);
16575 %}
16576 
16577 // Manifest a CmpL result in an integer register.  Very painful.
16578 // This is the test to avoid.
16579 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16580 %{
16581   match(Set dst (CmpL3 src1 src2));
16582   effect(KILL flags);
16583 
16584   ins_cost(275); // XXX
16585   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16586             "movl    $dst, -1\n\t"
16587             "jl,s    done\n\t"
16588             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16589     "done:" %}
16590   ins_encode %{
16591     Label done;
16592     __ cmpq($src1$$Register, $src2$$Register);
16593     __ movl($dst$$Register, -1);
16594     __ jccb(Assembler::less, done);
16595     __ setcc(Assembler::notZero, $dst$$Register);
16596     __ bind(done);
16597   %}
16598   ins_pipe(pipe_slow);
16599 %}
16600 
16601 // Manifest a CmpUL result in an integer register.  Very painful.
16602 // This is the test to avoid.
16603 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16604 %{
16605   match(Set dst (CmpUL3 src1 src2));
16606   effect(KILL flags);
16607 
16608   ins_cost(275); // XXX
16609   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16610             "movl    $dst, -1\n\t"
16611             "jb,u    done\n\t"
16612             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16613     "done:" %}
16614   ins_encode %{
16615     Label done;
16616     __ cmpq($src1$$Register, $src2$$Register);
16617     __ movl($dst$$Register, -1);
16618     __ jccb(Assembler::below, done);
16619     __ setcc(Assembler::notZero, $dst$$Register);
16620     __ bind(done);
16621   %}
16622   ins_pipe(pipe_slow);
16623 %}
16624 
16625 // Unsigned long compare Instructions; really, same as signed long except they
16626 // produce an rFlagsRegU instead of rFlagsReg.
16627 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16628 %{
16629   match(Set cr (CmpUL op1 op2));
16630 
16631   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16632   ins_encode %{
16633     __ cmpq($op1$$Register, $op2$$Register);
16634   %}
16635   ins_pipe(ialu_cr_reg_reg);
16636 %}
16637 
16638 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16639 %{
16640   match(Set cr (CmpUL op1 op2));
16641 
16642   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16643   ins_encode %{
16644     __ cmpq($op1$$Register, $op2$$constant);
16645   %}
16646   ins_pipe(ialu_cr_reg_imm);
16647 %}
16648 
16649 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16650 %{
16651   match(Set cr (CmpUL op1 (LoadL op2)));
16652 
16653   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16654   ins_encode %{
16655     __ cmpq($op1$$Register, $op2$$Address);
16656   %}
16657   ins_pipe(ialu_cr_reg_mem);
16658 %}
16659 
16660 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16661 %{
16662   match(Set cr (CmpUL src zero));
16663 
16664   format %{ "testq   $src, $src\t# unsigned" %}
16665   ins_encode %{
16666     __ testq($src$$Register, $src$$Register);
16667   %}
16668   ins_pipe(ialu_cr_reg_imm);
16669 %}
16670 
16671 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16672 %{
16673   match(Set cr (CmpI (LoadB mem) imm));
16674 
16675   ins_cost(125);
16676   format %{ "cmpb    $mem, $imm" %}
16677   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16678   ins_pipe(ialu_cr_reg_mem);
16679 %}
16680 
16681 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16682 %{
16683   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16684 
16685   ins_cost(125);
16686   format %{ "testb   $mem, $imm\t# ubyte" %}
16687   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16688   ins_pipe(ialu_cr_reg_mem);
16689 %}
16690 
16691 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16692 %{
16693   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16694 
16695   ins_cost(125);
16696   format %{ "testb   $mem, $imm\t# byte" %}
16697   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16698   ins_pipe(ialu_cr_reg_mem);
16699 %}
16700 
16701 //----------Max and Min--------------------------------------------------------
16702 // Min Instructions
16703 
16704 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16705 %{
16706   predicate(!UseAPX);
16707   effect(USE_DEF dst, USE src, USE cr);
16708 
16709   format %{ "cmovlgt $dst, $src\t# min" %}
16710   ins_encode %{
16711     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16712   %}
16713   ins_pipe(pipe_cmov_reg);
16714 %}
16715 
16716 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16717 %{
16718   predicate(UseAPX);
16719   effect(DEF dst, USE src1, USE src2, USE cr);
16720 
16721   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16722   ins_encode %{
16723     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16724   %}
16725   ins_pipe(pipe_cmov_reg);
16726 %}
16727 
16728 instruct minI_rReg(rRegI dst, rRegI src)
16729 %{
16730   predicate(!UseAPX);
16731   match(Set dst (MinI dst src));
16732 
16733   ins_cost(200);
16734   expand %{
16735     rFlagsReg cr;
16736     compI_rReg(cr, dst, src);
16737     cmovI_reg_g(dst, src, cr);
16738   %}
16739 %}
16740 
16741 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16742 %{
16743   predicate(UseAPX);
16744   match(Set dst (MinI src1 src2));
16745   effect(DEF dst, USE src1, USE src2);
16746   flag(PD::Flag_ndd_demotable_opr1);
16747 
16748   ins_cost(200);
16749   expand %{
16750     rFlagsReg cr;
16751     compI_rReg(cr, src1, src2);
16752     cmovI_reg_g_ndd(dst, src1, src2, cr);
16753   %}
16754 %}
16755 
16756 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16757 %{
16758   predicate(!UseAPX);
16759   effect(USE_DEF dst, USE src, USE cr);
16760 
16761   format %{ "cmovllt $dst, $src\t# max" %}
16762   ins_encode %{
16763     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16764   %}
16765   ins_pipe(pipe_cmov_reg);
16766 %}
16767 
16768 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16769 %{
16770   predicate(UseAPX);
16771   effect(DEF dst, USE src1, USE src2, USE cr);
16772 
16773   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16774   ins_encode %{
16775     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16776   %}
16777   ins_pipe(pipe_cmov_reg);
16778 %}
16779 
16780 instruct maxI_rReg(rRegI dst, rRegI src)
16781 %{
16782   predicate(!UseAPX);
16783   match(Set dst (MaxI dst src));
16784 
16785   ins_cost(200);
16786   expand %{
16787     rFlagsReg cr;
16788     compI_rReg(cr, dst, src);
16789     cmovI_reg_l(dst, src, cr);
16790   %}
16791 %}
16792 
16793 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16794 %{
16795   predicate(UseAPX);
16796   match(Set dst (MaxI src1 src2));
16797   effect(DEF dst, USE src1, USE src2);
16798   flag(PD::Flag_ndd_demotable_opr1);
16799 
16800   ins_cost(200);
16801   expand %{
16802     rFlagsReg cr;
16803     compI_rReg(cr, src1, src2);
16804     cmovI_reg_l_ndd(dst, src1, src2, cr);
16805   %}
16806 %}
16807 
16808 // ============================================================================
16809 // Branch Instructions
16810 
16811 // Jump Direct - Label defines a relative address from JMP+1
16812 instruct jmpDir(label labl)
16813 %{
16814   match(Goto);
16815   effect(USE labl);
16816 
16817   ins_cost(300);
16818   format %{ "jmp     $labl" %}
16819   size(5);
16820   ins_encode %{
16821     Label* L = $labl$$label;
16822     __ jmp(*L, false); // Always long jump
16823   %}
16824   ins_pipe(pipe_jmp);
16825 %}
16826 
16827 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16828 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16829 %{
16830   match(If cop cr);
16831   effect(USE labl);
16832 
16833   ins_cost(300);
16834   format %{ "j$cop     $labl" %}
16835   size(6);
16836   ins_encode %{
16837     Label* L = $labl$$label;
16838     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16839   %}
16840   ins_pipe(pipe_jcc);
16841 %}
16842 
16843 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16844 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16845 %{
16846   match(CountedLoopEnd cop cr);
16847   effect(USE labl);
16848 
16849   ins_cost(300);
16850   format %{ "j$cop     $labl\t# loop end" %}
16851   size(6);
16852   ins_encode %{
16853     Label* L = $labl$$label;
16854     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16855   %}
16856   ins_pipe(pipe_jcc);
16857 %}
16858 
16859 // Jump Direct Conditional - using unsigned comparison
16860 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16861   match(If cop cmp);
16862   effect(USE labl);
16863 
16864   ins_cost(300);
16865   format %{ "j$cop,u   $labl" %}
16866   size(6);
16867   ins_encode %{
16868     Label* L = $labl$$label;
16869     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16870   %}
16871   ins_pipe(pipe_jcc);
16872 %}
16873 
16874 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16875   match(If cop cmp);
16876   effect(USE labl);
16877 
16878   ins_cost(200);
16879   format %{ "j$cop,u   $labl" %}
16880   size(6);
16881   ins_encode %{
16882     Label* L = $labl$$label;
16883     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16884   %}
16885   ins_pipe(pipe_jcc);
16886 %}
16887 
16888 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16889   match(If cop cmp);
16890   effect(USE labl);
16891 
16892   ins_cost(200);
16893   format %{ $$template
16894     if ($cop$$cmpcode == Assembler::notEqual) {
16895       $$emit$$"jp,u    $labl\n\t"
16896       $$emit$$"j$cop,u   $labl"
16897     } else {
16898       $$emit$$"jp,u    done\n\t"
16899       $$emit$$"j$cop,u   $labl\n\t"
16900       $$emit$$"done:"
16901     }
16902   %}
16903   ins_encode %{
16904     Label* l = $labl$$label;
16905     if ($cop$$cmpcode == Assembler::notEqual) {
16906       __ jcc(Assembler::parity, *l, false);
16907       __ jcc(Assembler::notEqual, *l, false);
16908     } else if ($cop$$cmpcode == Assembler::equal) {
16909       Label done;
16910       __ jccb(Assembler::parity, done);
16911       __ jcc(Assembler::equal, *l, false);
16912       __ bind(done);
16913     } else {
16914        ShouldNotReachHere();
16915     }
16916   %}
16917   ins_pipe(pipe_jcc);
16918 %}
16919 
16920 // Jump Direct Conditional - using signed and unsigned comparison
16921 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16922   match(If cop cmp);
16923   effect(USE labl);
16924 
16925   ins_cost(200);
16926   format %{ "j$cop,su   $labl" %}
16927   size(6);
16928   ins_encode %{
16929     Label* L = $labl$$label;
16930     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16931   %}
16932   ins_pipe(pipe_jcc);
16933 %}
16934 
16935 // ============================================================================
16936 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
16937 // superklass array for an instance of the superklass.  Set a hidden
16938 // internal cache on a hit (cache is checked with exposed code in
16939 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
16940 // encoding ALSO sets flags.
16941 
16942 instruct partialSubtypeCheck(rdi_RegP result,
16943                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16944                              rFlagsReg cr)
16945 %{
16946   match(Set result (PartialSubtypeCheck sub super));
16947   predicate(!UseSecondarySupersTable);
16948   effect(KILL rcx, KILL cr);
16949 
16950   ins_cost(1100);  // slightly larger than the next version
16951   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16952             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16953             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16954             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16955             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
16956             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16957             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
16958     "miss:\t" %}
16959 
16960   ins_encode %{
16961     Label miss;
16962     // NB: Callers may assume that, when $result is a valid register,
16963     // check_klass_subtype_slow_path_linear sets it to a nonzero
16964     // value.
16965     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16966                                             $rcx$$Register, $result$$Register,
16967                                             nullptr, &miss,
16968                                             /*set_cond_codes:*/ true);
16969     __ xorptr($result$$Register, $result$$Register);
16970     __ bind(miss);
16971   %}
16972 
16973   ins_pipe(pipe_slow);
16974 %}
16975 
16976 // ============================================================================
16977 // Two versions of hashtable-based partialSubtypeCheck, both used when
16978 // we need to search for a super class in the secondary supers array.
16979 // The first is used when we don't know _a priori_ the class being
16980 // searched for. The second, far more common, is used when we do know:
16981 // this is used for instanceof, checkcast, and any case where C2 can
16982 // determine it by constant propagation.
16983 
16984 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
16985                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16986                                        rFlagsReg cr)
16987 %{
16988   match(Set result (PartialSubtypeCheck sub super));
16989   predicate(UseSecondarySupersTable);
16990   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16991 
16992   ins_cost(1000);
16993   format %{ "partialSubtypeCheck $result, $sub, $super" %}
16994 
16995   ins_encode %{
16996     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
16997 					 $temp3$$Register, $temp4$$Register, $result$$Register);
16998   %}
16999 
17000   ins_pipe(pipe_slow);
17001 %}
17002 
17003 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17004                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17005                                        rFlagsReg cr)
17006 %{
17007   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17008   predicate(UseSecondarySupersTable);
17009   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17010 
17011   ins_cost(700);  // smaller than the next version
17012   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17013 
17014   ins_encode %{
17015     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17016     if (InlineSecondarySupersTest) {
17017       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17018                                        $temp3$$Register, $temp4$$Register, $result$$Register,
17019                                        super_klass_slot);
17020     } else {
17021       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17022     }
17023   %}
17024 
17025   ins_pipe(pipe_slow);
17026 %}
17027 
17028 // ============================================================================
17029 // Branch Instructions -- short offset versions
17030 //
17031 // These instructions are used to replace jumps of a long offset (the default
17032 // match) with jumps of a shorter offset.  These instructions are all tagged
17033 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17034 // match rules in general matching.  Instead, the ADLC generates a conversion
17035 // method in the MachNode which can be used to do in-place replacement of the
17036 // long variant with the shorter variant.  The compiler will determine if a
17037 // branch can be taken by the is_short_branch_offset() predicate in the machine
17038 // specific code section of the file.
17039 
17040 // Jump Direct - Label defines a relative address from JMP+1
17041 instruct jmpDir_short(label labl) %{
17042   match(Goto);
17043   effect(USE labl);
17044 
17045   ins_cost(300);
17046   format %{ "jmp,s   $labl" %}
17047   size(2);
17048   ins_encode %{
17049     Label* L = $labl$$label;
17050     __ jmpb(*L);
17051   %}
17052   ins_pipe(pipe_jmp);
17053   ins_short_branch(1);
17054 %}
17055 
17056 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17057 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17058   match(If cop cr);
17059   effect(USE labl);
17060 
17061   ins_cost(300);
17062   format %{ "j$cop,s   $labl" %}
17063   size(2);
17064   ins_encode %{
17065     Label* L = $labl$$label;
17066     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17067   %}
17068   ins_pipe(pipe_jcc);
17069   ins_short_branch(1);
17070 %}
17071 
17072 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17073 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17074   match(CountedLoopEnd cop cr);
17075   effect(USE labl);
17076 
17077   ins_cost(300);
17078   format %{ "j$cop,s   $labl\t# loop end" %}
17079   size(2);
17080   ins_encode %{
17081     Label* L = $labl$$label;
17082     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17083   %}
17084   ins_pipe(pipe_jcc);
17085   ins_short_branch(1);
17086 %}
17087 
17088 // Jump Direct Conditional - using unsigned comparison
17089 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17090   match(If cop cmp);
17091   effect(USE labl);
17092 
17093   ins_cost(300);
17094   format %{ "j$cop,us  $labl" %}
17095   size(2);
17096   ins_encode %{
17097     Label* L = $labl$$label;
17098     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17099   %}
17100   ins_pipe(pipe_jcc);
17101   ins_short_branch(1);
17102 %}
17103 
17104 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17105   match(If cop cmp);
17106   effect(USE labl);
17107 
17108   ins_cost(300);
17109   format %{ "j$cop,us  $labl" %}
17110   size(2);
17111   ins_encode %{
17112     Label* L = $labl$$label;
17113     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17114   %}
17115   ins_pipe(pipe_jcc);
17116   ins_short_branch(1);
17117 %}
17118 
17119 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17120   match(If cop cmp);
17121   effect(USE labl);
17122 
17123   ins_cost(300);
17124   format %{ $$template
17125     if ($cop$$cmpcode == Assembler::notEqual) {
17126       $$emit$$"jp,u,s  $labl\n\t"
17127       $$emit$$"j$cop,u,s  $labl"
17128     } else {
17129       $$emit$$"jp,u,s  done\n\t"
17130       $$emit$$"j$cop,u,s  $labl\n\t"
17131       $$emit$$"done:"
17132     }
17133   %}
17134   size(4);
17135   ins_encode %{
17136     Label* l = $labl$$label;
17137     if ($cop$$cmpcode == Assembler::notEqual) {
17138       __ jccb(Assembler::parity, *l);
17139       __ jccb(Assembler::notEqual, *l);
17140     } else if ($cop$$cmpcode == Assembler::equal) {
17141       Label done;
17142       __ jccb(Assembler::parity, done);
17143       __ jccb(Assembler::equal, *l);
17144       __ bind(done);
17145     } else {
17146        ShouldNotReachHere();
17147     }
17148   %}
17149   ins_pipe(pipe_jcc);
17150   ins_short_branch(1);
17151 %}
17152 
17153 // Jump Direct Conditional - using signed and unsigned comparison
17154 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17155   match(If cop cmp);
17156   effect(USE labl);
17157 
17158   ins_cost(300);
17159   format %{ "j$cop,sus  $labl" %}
17160   size(2);
17161   ins_encode %{
17162     Label* L = $labl$$label;
17163     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17164   %}
17165   ins_pipe(pipe_jcc);
17166   ins_short_branch(1);
17167 %}
17168 
17169 // ============================================================================
17170 // inlined locking and unlocking
17171 
17172 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17173   match(Set cr (FastLock object box));
17174   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17175   ins_cost(300);
17176   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17177   ins_encode %{
17178     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17179   %}
17180   ins_pipe(pipe_slow);
17181 %}
17182 
17183 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17184   match(Set cr (FastUnlock object rax_reg));
17185   effect(TEMP tmp, USE_KILL rax_reg);
17186   ins_cost(300);
17187   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17188   ins_encode %{
17189     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17190   %}
17191   ins_pipe(pipe_slow);
17192 %}
17193 
17194 
17195 // ============================================================================
17196 // Safepoint Instructions
17197 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17198 %{
17199   match(SafePoint poll);
17200   effect(KILL cr, USE poll);
17201 
17202   format %{ "testl   rax, [$poll]\t"
17203             "# Safepoint: poll for GC" %}
17204   ins_cost(125);
17205   ins_encode %{
17206     __ relocate(relocInfo::poll_type);
17207     address pre_pc = __ pc();
17208     __ testl(rax, Address($poll$$Register, 0));
17209     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17210   %}
17211   ins_pipe(ialu_reg_mem);
17212 %}
17213 
17214 instruct mask_all_evexL(kReg dst, rRegL src) %{
17215   match(Set dst (MaskAll src));
17216   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17217   ins_encode %{
17218     int mask_len = Matcher::vector_length(this);
17219     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17220   %}
17221   ins_pipe( pipe_slow );
17222 %}
17223 
17224 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17225   predicate(Matcher::vector_length(n) > 32);
17226   match(Set dst (MaskAll src));
17227   effect(TEMP tmp);
17228   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17229   ins_encode %{
17230     int mask_len = Matcher::vector_length(this);
17231     __ movslq($tmp$$Register, $src$$Register);
17232     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17233   %}
17234   ins_pipe( pipe_slow );
17235 %}
17236 
17237 // ============================================================================
17238 // Procedure Call/Return Instructions
17239 // Call Java Static Instruction
17240 // Note: If this code changes, the corresponding ret_addr_offset() and
17241 //       compute_padding() functions will have to be adjusted.
17242 instruct CallStaticJavaDirect(method meth) %{
17243   match(CallStaticJava);
17244   effect(USE meth);
17245 
17246   ins_cost(300);
17247   format %{ "call,static " %}
17248   opcode(0xE8); /* E8 cd */
17249   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17250   ins_pipe(pipe_slow);
17251   ins_alignment(4);
17252 %}
17253 
17254 // Call Java Dynamic Instruction
17255 // Note: If this code changes, the corresponding ret_addr_offset() and
17256 //       compute_padding() functions will have to be adjusted.
17257 instruct CallDynamicJavaDirect(method meth)
17258 %{
17259   match(CallDynamicJava);
17260   effect(USE meth);
17261 
17262   ins_cost(300);
17263   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
17264             "call,dynamic " %}
17265   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17266   ins_pipe(pipe_slow);
17267   ins_alignment(4);
17268 %}
17269 
17270 // Call Runtime Instruction
17271 instruct CallRuntimeDirect(method meth)
17272 %{
17273   match(CallRuntime);
17274   effect(USE meth);
17275 
17276   ins_cost(300);
17277   format %{ "call,runtime " %}
17278   ins_encode(clear_avx, Java_To_Runtime(meth));
17279   ins_pipe(pipe_slow);
17280 %}
17281 
17282 // Call runtime without safepoint
17283 instruct CallLeafDirect(method meth)
17284 %{
17285   match(CallLeaf);
17286   effect(USE meth);
17287 
17288   ins_cost(300);
17289   format %{ "call_leaf,runtime " %}
17290   ins_encode(clear_avx, Java_To_Runtime(meth));
17291   ins_pipe(pipe_slow);
17292 %}
17293 
17294 // Call runtime without safepoint and with vector arguments
17295 instruct CallLeafDirectVector(method meth)
17296 %{
17297   match(CallLeafVector);
17298   effect(USE meth);
17299 
17300   ins_cost(300);
17301   format %{ "call_leaf,vector " %}
17302   ins_encode(Java_To_Runtime(meth));
17303   ins_pipe(pipe_slow);
17304 %}
17305 
17306 // Call runtime without safepoint
17307 instruct CallLeafNoFPDirect(method meth)
17308 %{
17309   match(CallLeafNoFP);
17310   effect(USE meth);
17311 
17312   ins_cost(300);
17313   format %{ "call_leaf_nofp,runtime " %}
17314   ins_encode(clear_avx, Java_To_Runtime(meth));
17315   ins_pipe(pipe_slow);
17316 %}
17317 
17318 // Return Instruction
17319 // Remove the return address & jump to it.
17320 // Notice: We always emit a nop after a ret to make sure there is room
17321 // for safepoint patching
17322 instruct Ret()
17323 %{
17324   match(Return);
17325 
17326   format %{ "ret" %}
17327   ins_encode %{
17328     __ ret(0);
17329   %}
17330   ins_pipe(pipe_jmp);
17331 %}
17332 
17333 // Tail Call; Jump from runtime stub to Java code.
17334 // Also known as an 'interprocedural jump'.
17335 // Target of jump will eventually return to caller.
17336 // TailJump below removes the return address.
17337 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17338 // emitted just above the TailCall which has reset rbp to the caller state.
17339 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17340 %{
17341   match(TailCall jump_target method_ptr);
17342 
17343   ins_cost(300);
17344   format %{ "jmp     $jump_target\t# rbx holds method" %}
17345   ins_encode %{
17346     __ jmp($jump_target$$Register);
17347   %}
17348   ins_pipe(pipe_jmp);
17349 %}
17350 
17351 // Tail Jump; remove the return address; jump to target.
17352 // TailCall above leaves the return address around.
17353 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17354 %{
17355   match(TailJump jump_target ex_oop);
17356 
17357   ins_cost(300);
17358   format %{ "popq    rdx\t# pop return address\n\t"
17359             "jmp     $jump_target" %}
17360   ins_encode %{
17361     __ popq(as_Register(RDX_enc));
17362     __ jmp($jump_target$$Register);
17363   %}
17364   ins_pipe(pipe_jmp);
17365 %}
17366 
17367 // Forward exception.
17368 instruct ForwardExceptionjmp()
17369 %{
17370   match(ForwardException);
17371 
17372   format %{ "jmp     forward_exception_stub" %}
17373   ins_encode %{
17374     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17375   %}
17376   ins_pipe(pipe_jmp);
17377 %}
17378 
17379 // Create exception oop: created by stack-crawling runtime code.
17380 // Created exception is now available to this handler, and is setup
17381 // just prior to jumping to this handler.  No code emitted.
17382 instruct CreateException(rax_RegP ex_oop)
17383 %{
17384   match(Set ex_oop (CreateEx));
17385 
17386   size(0);
17387   // use the following format syntax
17388   format %{ "# exception oop is in rax; no code emitted" %}
17389   ins_encode();
17390   ins_pipe(empty);
17391 %}
17392 
17393 // Rethrow exception:
17394 // The exception oop will come in the first argument position.
17395 // Then JUMP (not call) to the rethrow stub code.
17396 instruct RethrowException()
17397 %{
17398   match(Rethrow);
17399 
17400   // use the following format syntax
17401   format %{ "jmp     rethrow_stub" %}
17402   ins_encode %{
17403     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17404   %}
17405   ins_pipe(pipe_jmp);
17406 %}
17407 
17408 // ============================================================================
17409 // This name is KNOWN by the ADLC and cannot be changed.
17410 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17411 // for this guy.
17412 instruct tlsLoadP(r15_RegP dst) %{
17413   match(Set dst (ThreadLocal));
17414   effect(DEF dst);
17415 
17416   size(0);
17417   format %{ "# TLS is in R15" %}
17418   ins_encode( /*empty encoding*/ );
17419   ins_pipe(ialu_reg_reg);
17420 %}
17421 
17422 instruct addF_reg(regF dst, regF src) %{
17423   predicate(UseAVX == 0);
17424   match(Set dst (AddF dst src));
17425 
17426   format %{ "addss   $dst, $src" %}
17427   ins_cost(150);
17428   ins_encode %{
17429     __ addss($dst$$XMMRegister, $src$$XMMRegister);
17430   %}
17431   ins_pipe(pipe_slow);
17432 %}
17433 
17434 instruct addF_mem(regF dst, memory src) %{
17435   predicate(UseAVX == 0);
17436   match(Set dst (AddF dst (LoadF src)));
17437 
17438   format %{ "addss   $dst, $src" %}
17439   ins_cost(150);
17440   ins_encode %{
17441     __ addss($dst$$XMMRegister, $src$$Address);
17442   %}
17443   ins_pipe(pipe_slow);
17444 %}
17445 
17446 instruct addF_imm(regF dst, immF con) %{
17447   predicate(UseAVX == 0);
17448   match(Set dst (AddF dst con));
17449   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17450   ins_cost(150);
17451   ins_encode %{
17452     __ addss($dst$$XMMRegister, $constantaddress($con));
17453   %}
17454   ins_pipe(pipe_slow);
17455 %}
17456 
17457 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17458   predicate(UseAVX > 0);
17459   match(Set dst (AddF src1 src2));
17460 
17461   format %{ "vaddss  $dst, $src1, $src2" %}
17462   ins_cost(150);
17463   ins_encode %{
17464     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17465   %}
17466   ins_pipe(pipe_slow);
17467 %}
17468 
17469 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17470   predicate(UseAVX > 0);
17471   match(Set dst (AddF src1 (LoadF src2)));
17472 
17473   format %{ "vaddss  $dst, $src1, $src2" %}
17474   ins_cost(150);
17475   ins_encode %{
17476     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17477   %}
17478   ins_pipe(pipe_slow);
17479 %}
17480 
17481 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17482   predicate(UseAVX > 0);
17483   match(Set dst (AddF src con));
17484 
17485   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17486   ins_cost(150);
17487   ins_encode %{
17488     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17489   %}
17490   ins_pipe(pipe_slow);
17491 %}
17492 
17493 instruct addD_reg(regD dst, regD src) %{
17494   predicate(UseAVX == 0);
17495   match(Set dst (AddD dst src));
17496 
17497   format %{ "addsd   $dst, $src" %}
17498   ins_cost(150);
17499   ins_encode %{
17500     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17501   %}
17502   ins_pipe(pipe_slow);
17503 %}
17504 
17505 instruct addD_mem(regD dst, memory src) %{
17506   predicate(UseAVX == 0);
17507   match(Set dst (AddD dst (LoadD src)));
17508 
17509   format %{ "addsd   $dst, $src" %}
17510   ins_cost(150);
17511   ins_encode %{
17512     __ addsd($dst$$XMMRegister, $src$$Address);
17513   %}
17514   ins_pipe(pipe_slow);
17515 %}
17516 
17517 instruct addD_imm(regD dst, immD con) %{
17518   predicate(UseAVX == 0);
17519   match(Set dst (AddD dst con));
17520   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17521   ins_cost(150);
17522   ins_encode %{
17523     __ addsd($dst$$XMMRegister, $constantaddress($con));
17524   %}
17525   ins_pipe(pipe_slow);
17526 %}
17527 
17528 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17529   predicate(UseAVX > 0);
17530   match(Set dst (AddD src1 src2));
17531 
17532   format %{ "vaddsd  $dst, $src1, $src2" %}
17533   ins_cost(150);
17534   ins_encode %{
17535     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17536   %}
17537   ins_pipe(pipe_slow);
17538 %}
17539 
17540 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17541   predicate(UseAVX > 0);
17542   match(Set dst (AddD src1 (LoadD src2)));
17543 
17544   format %{ "vaddsd  $dst, $src1, $src2" %}
17545   ins_cost(150);
17546   ins_encode %{
17547     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17548   %}
17549   ins_pipe(pipe_slow);
17550 %}
17551 
17552 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17553   predicate(UseAVX > 0);
17554   match(Set dst (AddD src con));
17555 
17556   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17557   ins_cost(150);
17558   ins_encode %{
17559     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17560   %}
17561   ins_pipe(pipe_slow);
17562 %}
17563 
17564 instruct subF_reg(regF dst, regF src) %{
17565   predicate(UseAVX == 0);
17566   match(Set dst (SubF dst src));
17567 
17568   format %{ "subss   $dst, $src" %}
17569   ins_cost(150);
17570   ins_encode %{
17571     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17572   %}
17573   ins_pipe(pipe_slow);
17574 %}
17575 
17576 instruct subF_mem(regF dst, memory src) %{
17577   predicate(UseAVX == 0);
17578   match(Set dst (SubF dst (LoadF src)));
17579 
17580   format %{ "subss   $dst, $src" %}
17581   ins_cost(150);
17582   ins_encode %{
17583     __ subss($dst$$XMMRegister, $src$$Address);
17584   %}
17585   ins_pipe(pipe_slow);
17586 %}
17587 
17588 instruct subF_imm(regF dst, immF con) %{
17589   predicate(UseAVX == 0);
17590   match(Set dst (SubF dst con));
17591   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17592   ins_cost(150);
17593   ins_encode %{
17594     __ subss($dst$$XMMRegister, $constantaddress($con));
17595   %}
17596   ins_pipe(pipe_slow);
17597 %}
17598 
17599 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17600   predicate(UseAVX > 0);
17601   match(Set dst (SubF src1 src2));
17602 
17603   format %{ "vsubss  $dst, $src1, $src2" %}
17604   ins_cost(150);
17605   ins_encode %{
17606     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17607   %}
17608   ins_pipe(pipe_slow);
17609 %}
17610 
17611 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17612   predicate(UseAVX > 0);
17613   match(Set dst (SubF src1 (LoadF src2)));
17614 
17615   format %{ "vsubss  $dst, $src1, $src2" %}
17616   ins_cost(150);
17617   ins_encode %{
17618     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17619   %}
17620   ins_pipe(pipe_slow);
17621 %}
17622 
17623 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17624   predicate(UseAVX > 0);
17625   match(Set dst (SubF src con));
17626 
17627   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17628   ins_cost(150);
17629   ins_encode %{
17630     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17631   %}
17632   ins_pipe(pipe_slow);
17633 %}
17634 
17635 instruct subD_reg(regD dst, regD src) %{
17636   predicate(UseAVX == 0);
17637   match(Set dst (SubD dst src));
17638 
17639   format %{ "subsd   $dst, $src" %}
17640   ins_cost(150);
17641   ins_encode %{
17642     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17643   %}
17644   ins_pipe(pipe_slow);
17645 %}
17646 
17647 instruct subD_mem(regD dst, memory src) %{
17648   predicate(UseAVX == 0);
17649   match(Set dst (SubD dst (LoadD src)));
17650 
17651   format %{ "subsd   $dst, $src" %}
17652   ins_cost(150);
17653   ins_encode %{
17654     __ subsd($dst$$XMMRegister, $src$$Address);
17655   %}
17656   ins_pipe(pipe_slow);
17657 %}
17658 
17659 instruct subD_imm(regD dst, immD con) %{
17660   predicate(UseAVX == 0);
17661   match(Set dst (SubD dst con));
17662   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17663   ins_cost(150);
17664   ins_encode %{
17665     __ subsd($dst$$XMMRegister, $constantaddress($con));
17666   %}
17667   ins_pipe(pipe_slow);
17668 %}
17669 
17670 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17671   predicate(UseAVX > 0);
17672   match(Set dst (SubD src1 src2));
17673 
17674   format %{ "vsubsd  $dst, $src1, $src2" %}
17675   ins_cost(150);
17676   ins_encode %{
17677     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17678   %}
17679   ins_pipe(pipe_slow);
17680 %}
17681 
17682 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17683   predicate(UseAVX > 0);
17684   match(Set dst (SubD src1 (LoadD src2)));
17685 
17686   format %{ "vsubsd  $dst, $src1, $src2" %}
17687   ins_cost(150);
17688   ins_encode %{
17689     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17690   %}
17691   ins_pipe(pipe_slow);
17692 %}
17693 
17694 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17695   predicate(UseAVX > 0);
17696   match(Set dst (SubD src con));
17697 
17698   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17699   ins_cost(150);
17700   ins_encode %{
17701     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17702   %}
17703   ins_pipe(pipe_slow);
17704 %}
17705 
17706 instruct mulF_reg(regF dst, regF src) %{
17707   predicate(UseAVX == 0);
17708   match(Set dst (MulF dst src));
17709 
17710   format %{ "mulss   $dst, $src" %}
17711   ins_cost(150);
17712   ins_encode %{
17713     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17714   %}
17715   ins_pipe(pipe_slow);
17716 %}
17717 
17718 instruct mulF_mem(regF dst, memory src) %{
17719   predicate(UseAVX == 0);
17720   match(Set dst (MulF dst (LoadF src)));
17721 
17722   format %{ "mulss   $dst, $src" %}
17723   ins_cost(150);
17724   ins_encode %{
17725     __ mulss($dst$$XMMRegister, $src$$Address);
17726   %}
17727   ins_pipe(pipe_slow);
17728 %}
17729 
17730 instruct mulF_imm(regF dst, immF con) %{
17731   predicate(UseAVX == 0);
17732   match(Set dst (MulF dst con));
17733   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17734   ins_cost(150);
17735   ins_encode %{
17736     __ mulss($dst$$XMMRegister, $constantaddress($con));
17737   %}
17738   ins_pipe(pipe_slow);
17739 %}
17740 
17741 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17742   predicate(UseAVX > 0);
17743   match(Set dst (MulF src1 src2));
17744 
17745   format %{ "vmulss  $dst, $src1, $src2" %}
17746   ins_cost(150);
17747   ins_encode %{
17748     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17749   %}
17750   ins_pipe(pipe_slow);
17751 %}
17752 
17753 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17754   predicate(UseAVX > 0);
17755   match(Set dst (MulF src1 (LoadF src2)));
17756 
17757   format %{ "vmulss  $dst, $src1, $src2" %}
17758   ins_cost(150);
17759   ins_encode %{
17760     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17761   %}
17762   ins_pipe(pipe_slow);
17763 %}
17764 
17765 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17766   predicate(UseAVX > 0);
17767   match(Set dst (MulF src con));
17768 
17769   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17770   ins_cost(150);
17771   ins_encode %{
17772     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17773   %}
17774   ins_pipe(pipe_slow);
17775 %}
17776 
17777 instruct mulD_reg(regD dst, regD src) %{
17778   predicate(UseAVX == 0);
17779   match(Set dst (MulD dst src));
17780 
17781   format %{ "mulsd   $dst, $src" %}
17782   ins_cost(150);
17783   ins_encode %{
17784     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17785   %}
17786   ins_pipe(pipe_slow);
17787 %}
17788 
17789 instruct mulD_mem(regD dst, memory src) %{
17790   predicate(UseAVX == 0);
17791   match(Set dst (MulD dst (LoadD src)));
17792 
17793   format %{ "mulsd   $dst, $src" %}
17794   ins_cost(150);
17795   ins_encode %{
17796     __ mulsd($dst$$XMMRegister, $src$$Address);
17797   %}
17798   ins_pipe(pipe_slow);
17799 %}
17800 
17801 instruct mulD_imm(regD dst, immD con) %{
17802   predicate(UseAVX == 0);
17803   match(Set dst (MulD dst con));
17804   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17805   ins_cost(150);
17806   ins_encode %{
17807     __ mulsd($dst$$XMMRegister, $constantaddress($con));
17808   %}
17809   ins_pipe(pipe_slow);
17810 %}
17811 
17812 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17813   predicate(UseAVX > 0);
17814   match(Set dst (MulD src1 src2));
17815 
17816   format %{ "vmulsd  $dst, $src1, $src2" %}
17817   ins_cost(150);
17818   ins_encode %{
17819     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17820   %}
17821   ins_pipe(pipe_slow);
17822 %}
17823 
17824 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17825   predicate(UseAVX > 0);
17826   match(Set dst (MulD src1 (LoadD src2)));
17827 
17828   format %{ "vmulsd  $dst, $src1, $src2" %}
17829   ins_cost(150);
17830   ins_encode %{
17831     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17832   %}
17833   ins_pipe(pipe_slow);
17834 %}
17835 
17836 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17837   predicate(UseAVX > 0);
17838   match(Set dst (MulD src con));
17839 
17840   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17841   ins_cost(150);
17842   ins_encode %{
17843     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17844   %}
17845   ins_pipe(pipe_slow);
17846 %}
17847 
17848 instruct divF_reg(regF dst, regF src) %{
17849   predicate(UseAVX == 0);
17850   match(Set dst (DivF dst src));
17851 
17852   format %{ "divss   $dst, $src" %}
17853   ins_cost(150);
17854   ins_encode %{
17855     __ divss($dst$$XMMRegister, $src$$XMMRegister);
17856   %}
17857   ins_pipe(pipe_slow);
17858 %}
17859 
17860 instruct divF_mem(regF dst, memory src) %{
17861   predicate(UseAVX == 0);
17862   match(Set dst (DivF dst (LoadF src)));
17863 
17864   format %{ "divss   $dst, $src" %}
17865   ins_cost(150);
17866   ins_encode %{
17867     __ divss($dst$$XMMRegister, $src$$Address);
17868   %}
17869   ins_pipe(pipe_slow);
17870 %}
17871 
17872 instruct divF_imm(regF dst, immF con) %{
17873   predicate(UseAVX == 0);
17874   match(Set dst (DivF dst con));
17875   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17876   ins_cost(150);
17877   ins_encode %{
17878     __ divss($dst$$XMMRegister, $constantaddress($con));
17879   %}
17880   ins_pipe(pipe_slow);
17881 %}
17882 
17883 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17884   predicate(UseAVX > 0);
17885   match(Set dst (DivF src1 src2));
17886 
17887   format %{ "vdivss  $dst, $src1, $src2" %}
17888   ins_cost(150);
17889   ins_encode %{
17890     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17891   %}
17892   ins_pipe(pipe_slow);
17893 %}
17894 
17895 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17896   predicate(UseAVX > 0);
17897   match(Set dst (DivF src1 (LoadF src2)));
17898 
17899   format %{ "vdivss  $dst, $src1, $src2" %}
17900   ins_cost(150);
17901   ins_encode %{
17902     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17903   %}
17904   ins_pipe(pipe_slow);
17905 %}
17906 
17907 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17908   predicate(UseAVX > 0);
17909   match(Set dst (DivF src con));
17910 
17911   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17912   ins_cost(150);
17913   ins_encode %{
17914     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17915   %}
17916   ins_pipe(pipe_slow);
17917 %}
17918 
17919 instruct divD_reg(regD dst, regD src) %{
17920   predicate(UseAVX == 0);
17921   match(Set dst (DivD dst src));
17922 
17923   format %{ "divsd   $dst, $src" %}
17924   ins_cost(150);
17925   ins_encode %{
17926     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17927   %}
17928   ins_pipe(pipe_slow);
17929 %}
17930 
17931 instruct divD_mem(regD dst, memory src) %{
17932   predicate(UseAVX == 0);
17933   match(Set dst (DivD dst (LoadD src)));
17934 
17935   format %{ "divsd   $dst, $src" %}
17936   ins_cost(150);
17937   ins_encode %{
17938     __ divsd($dst$$XMMRegister, $src$$Address);
17939   %}
17940   ins_pipe(pipe_slow);
17941 %}
17942 
17943 instruct divD_imm(regD dst, immD con) %{
17944   predicate(UseAVX == 0);
17945   match(Set dst (DivD dst con));
17946   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17947   ins_cost(150);
17948   ins_encode %{
17949     __ divsd($dst$$XMMRegister, $constantaddress($con));
17950   %}
17951   ins_pipe(pipe_slow);
17952 %}
17953 
17954 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17955   predicate(UseAVX > 0);
17956   match(Set dst (DivD src1 src2));
17957 
17958   format %{ "vdivsd  $dst, $src1, $src2" %}
17959   ins_cost(150);
17960   ins_encode %{
17961     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17962   %}
17963   ins_pipe(pipe_slow);
17964 %}
17965 
17966 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17967   predicate(UseAVX > 0);
17968   match(Set dst (DivD src1 (LoadD src2)));
17969 
17970   format %{ "vdivsd  $dst, $src1, $src2" %}
17971   ins_cost(150);
17972   ins_encode %{
17973     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17974   %}
17975   ins_pipe(pipe_slow);
17976 %}
17977 
17978 instruct divD_reg_imm(regD dst, regD src, immD con) %{
17979   predicate(UseAVX > 0);
17980   match(Set dst (DivD src con));
17981 
17982   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17983   ins_cost(150);
17984   ins_encode %{
17985     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17986   %}
17987   ins_pipe(pipe_slow);
17988 %}
17989 
17990 instruct absF_reg(regF dst) %{
17991   predicate(UseAVX == 0);
17992   match(Set dst (AbsF dst));
17993   ins_cost(150);
17994   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
17995   ins_encode %{
17996     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
17997   %}
17998   ins_pipe(pipe_slow);
17999 %}
18000 
18001 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18002   predicate(UseAVX > 0);
18003   match(Set dst (AbsF src));
18004   ins_cost(150);
18005   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18006   ins_encode %{
18007     int vlen_enc = Assembler::AVX_128bit;
18008     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18009               ExternalAddress(float_signmask()), vlen_enc);
18010   %}
18011   ins_pipe(pipe_slow);
18012 %}
18013 
18014 instruct absD_reg(regD dst) %{
18015   predicate(UseAVX == 0);
18016   match(Set dst (AbsD dst));
18017   ins_cost(150);
18018   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
18019             "# abs double by sign masking" %}
18020   ins_encode %{
18021     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18022   %}
18023   ins_pipe(pipe_slow);
18024 %}
18025 
18026 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18027   predicate(UseAVX > 0);
18028   match(Set dst (AbsD src));
18029   ins_cost(150);
18030   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
18031             "# abs double by sign masking" %}
18032   ins_encode %{
18033     int vlen_enc = Assembler::AVX_128bit;
18034     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18035               ExternalAddress(double_signmask()), vlen_enc);
18036   %}
18037   ins_pipe(pipe_slow);
18038 %}
18039 
18040 instruct negF_reg(regF dst) %{
18041   predicate(UseAVX == 0);
18042   match(Set dst (NegF dst));
18043   ins_cost(150);
18044   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
18045   ins_encode %{
18046     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18047   %}
18048   ins_pipe(pipe_slow);
18049 %}
18050 
18051 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18052   predicate(UseAVX > 0);
18053   match(Set dst (NegF src));
18054   ins_cost(150);
18055   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18056   ins_encode %{
18057     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18058                  ExternalAddress(float_signflip()));
18059   %}
18060   ins_pipe(pipe_slow);
18061 %}
18062 
18063 instruct negD_reg(regD dst) %{
18064   predicate(UseAVX == 0);
18065   match(Set dst (NegD dst));
18066   ins_cost(150);
18067   format %{ "xorpd   $dst, [0x8000000000000000]\t"
18068             "# neg double by sign flipping" %}
18069   ins_encode %{
18070     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18071   %}
18072   ins_pipe(pipe_slow);
18073 %}
18074 
18075 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18076   predicate(UseAVX > 0);
18077   match(Set dst (NegD src));
18078   ins_cost(150);
18079   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
18080             "# neg double by sign flipping" %}
18081   ins_encode %{
18082     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18083                  ExternalAddress(double_signflip()));
18084   %}
18085   ins_pipe(pipe_slow);
18086 %}
18087 
18088 // sqrtss instruction needs destination register to be pre initialized for best performance
18089 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18090 instruct sqrtF_reg(regF dst) %{
18091   match(Set dst (SqrtF dst));
18092   format %{ "sqrtss  $dst, $dst" %}
18093   ins_encode %{
18094     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18095   %}
18096   ins_pipe(pipe_slow);
18097 %}
18098 
18099 // sqrtsd instruction needs destination register to be pre initialized for best performance
18100 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18101 instruct sqrtD_reg(regD dst) %{
18102   match(Set dst (SqrtD dst));
18103   format %{ "sqrtsd  $dst, $dst" %}
18104   ins_encode %{
18105     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18106   %}
18107   ins_pipe(pipe_slow);
18108 %}
18109 
18110 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18111   effect(TEMP tmp);
18112   match(Set dst (ConvF2HF src));
18113   ins_cost(125);
18114   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18115   ins_encode %{
18116     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18117   %}
18118   ins_pipe( pipe_slow );
18119 %}
18120 
18121 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18122   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18123   effect(TEMP ktmp, TEMP rtmp);
18124   match(Set mem (StoreC mem (ConvF2HF src)));
18125   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18126   ins_encode %{
18127     __ movl($rtmp$$Register, 0x1);
18128     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18129     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18130   %}
18131   ins_pipe( pipe_slow );
18132 %}
18133 
18134 instruct vconvF2HF(vec dst, vec src) %{
18135   match(Set dst (VectorCastF2HF src));
18136   format %{ "vector_conv_F2HF $dst $src" %}
18137   ins_encode %{
18138     int vlen_enc = vector_length_encoding(this, $src);
18139     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18140   %}
18141   ins_pipe( pipe_slow );
18142 %}
18143 
18144 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18145   predicate(n->as_StoreVector()->memory_size() >= 16);
18146   match(Set mem (StoreVector mem (VectorCastF2HF src)));
18147   format %{ "vcvtps2ph $mem,$src" %}
18148   ins_encode %{
18149     int vlen_enc = vector_length_encoding(this, $src);
18150     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18151   %}
18152   ins_pipe( pipe_slow );
18153 %}
18154 
18155 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18156   match(Set dst (ConvHF2F src));
18157   format %{ "vcvtph2ps $dst,$src" %}
18158   ins_encode %{
18159     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18160   %}
18161   ins_pipe( pipe_slow );
18162 %}
18163 
18164 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18165   match(Set dst (VectorCastHF2F (LoadVector mem)));
18166   format %{ "vcvtph2ps $dst,$mem" %}
18167   ins_encode %{
18168     int vlen_enc = vector_length_encoding(this);
18169     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18170   %}
18171   ins_pipe( pipe_slow );
18172 %}
18173 
18174 instruct vconvHF2F(vec dst, vec src) %{
18175   match(Set dst (VectorCastHF2F src));
18176   ins_cost(125);
18177   format %{ "vector_conv_HF2F $dst,$src" %}
18178   ins_encode %{
18179     int vlen_enc = vector_length_encoding(this);
18180     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18181   %}
18182   ins_pipe( pipe_slow );
18183 %}
18184 
18185 // ---------------------------------------- VectorReinterpret ------------------------------------
18186 instruct reinterpret_mask(kReg dst) %{
18187   predicate(n->bottom_type()->isa_vectmask() &&
18188             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18189   match(Set dst (VectorReinterpret dst));
18190   ins_cost(125);
18191   format %{ "vector_reinterpret $dst\t!" %}
18192   ins_encode %{
18193     // empty
18194   %}
18195   ins_pipe( pipe_slow );
18196 %}
18197 
18198 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18199   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18200             n->bottom_type()->isa_vectmask() &&
18201             n->in(1)->bottom_type()->isa_vectmask() &&
18202             n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18203             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18204   match(Set dst (VectorReinterpret src));
18205   effect(TEMP xtmp);
18206   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18207   ins_encode %{
18208      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18209      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18210      assert(src_sz == dst_sz , "src and dst size mismatch");
18211      int vlen_enc = vector_length_encoding(src_sz);
18212      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18213      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18214   %}
18215   ins_pipe( pipe_slow );
18216 %}
18217 
18218 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18219   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18220             n->bottom_type()->isa_vectmask() &&
18221             n->in(1)->bottom_type()->isa_vectmask() &&
18222             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18223              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18224             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18225   match(Set dst (VectorReinterpret src));
18226   effect(TEMP xtmp);
18227   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18228   ins_encode %{
18229      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18230      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18231      assert(src_sz == dst_sz , "src and dst size mismatch");
18232      int vlen_enc = vector_length_encoding(src_sz);
18233      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18234      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18235   %}
18236   ins_pipe( pipe_slow );
18237 %}
18238 
18239 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18240   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18241             n->bottom_type()->isa_vectmask() &&
18242             n->in(1)->bottom_type()->isa_vectmask() &&
18243             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18244              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18245             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18246   match(Set dst (VectorReinterpret src));
18247   effect(TEMP xtmp);
18248   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18249   ins_encode %{
18250      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18251      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18252      assert(src_sz == dst_sz , "src and dst size mismatch");
18253      int vlen_enc = vector_length_encoding(src_sz);
18254      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18255      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18256   %}
18257   ins_pipe( pipe_slow );
18258 %}
18259 
18260 instruct reinterpret(vec dst) %{
18261   predicate(!n->bottom_type()->isa_vectmask() &&
18262             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18263   match(Set dst (VectorReinterpret dst));
18264   ins_cost(125);
18265   format %{ "vector_reinterpret $dst\t!" %}
18266   ins_encode %{
18267     // empty
18268   %}
18269   ins_pipe( pipe_slow );
18270 %}
18271 
18272 instruct reinterpret_expand(vec dst, vec src) %{
18273   predicate(UseAVX == 0 &&
18274             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18275   match(Set dst (VectorReinterpret src));
18276   ins_cost(125);
18277   effect(TEMP dst);
18278   format %{ "vector_reinterpret_expand $dst,$src" %}
18279   ins_encode %{
18280     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
18281     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
18282 
18283     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18284     if (src_vlen_in_bytes == 4) {
18285       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18286     } else {
18287       assert(src_vlen_in_bytes == 8, "");
18288       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18289     }
18290     __ pand($dst$$XMMRegister, $src$$XMMRegister);
18291   %}
18292   ins_pipe( pipe_slow );
18293 %}
18294 
18295 instruct vreinterpret_expand4(legVec dst, vec src) %{
18296   predicate(UseAVX > 0 &&
18297             !n->bottom_type()->isa_vectmask() &&
18298             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18299             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18300   match(Set dst (VectorReinterpret src));
18301   ins_cost(125);
18302   format %{ "vector_reinterpret_expand $dst,$src" %}
18303   ins_encode %{
18304     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18305   %}
18306   ins_pipe( pipe_slow );
18307 %}
18308 
18309 
18310 instruct vreinterpret_expand(legVec dst, vec src) %{
18311   predicate(UseAVX > 0 &&
18312             !n->bottom_type()->isa_vectmask() &&
18313             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18314             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18315   match(Set dst (VectorReinterpret src));
18316   ins_cost(125);
18317   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18318   ins_encode %{
18319     switch (Matcher::vector_length_in_bytes(this, $src)) {
18320       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18321       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18322       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18323       default: ShouldNotReachHere();
18324     }
18325   %}
18326   ins_pipe( pipe_slow );
18327 %}
18328 
18329 instruct reinterpret_shrink(vec dst, legVec src) %{
18330   predicate(!n->bottom_type()->isa_vectmask() &&
18331             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18332   match(Set dst (VectorReinterpret src));
18333   ins_cost(125);
18334   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18335   ins_encode %{
18336     switch (Matcher::vector_length_in_bytes(this)) {
18337       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18338       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18339       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18340       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18341       default: ShouldNotReachHere();
18342     }
18343   %}
18344   ins_pipe( pipe_slow );
18345 %}
18346 
18347 // ----------------------------------------------------------------------------------------------------
18348 
18349 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18350   match(Set dst (RoundDoubleMode src rmode));
18351   format %{ "roundsd $dst,$src" %}
18352   ins_cost(150);
18353   ins_encode %{
18354     assert(UseSSE >= 4, "required");
18355     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18356       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18357     }
18358     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18359   %}
18360   ins_pipe(pipe_slow);
18361 %}
18362 
18363 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18364   match(Set dst (RoundDoubleMode con rmode));
18365   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18366   ins_cost(150);
18367   ins_encode %{
18368     assert(UseSSE >= 4, "required");
18369     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18370   %}
18371   ins_pipe(pipe_slow);
18372 %}
18373 
18374 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18375   predicate(Matcher::vector_length(n) < 8);
18376   match(Set dst (RoundDoubleModeV src rmode));
18377   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18378   ins_encode %{
18379     assert(UseAVX > 0, "required");
18380     int vlen_enc = vector_length_encoding(this);
18381     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18382   %}
18383   ins_pipe( pipe_slow );
18384 %}
18385 
18386 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18387   predicate(Matcher::vector_length(n) == 8);
18388   match(Set dst (RoundDoubleModeV src rmode));
18389   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18390   ins_encode %{
18391     assert(UseAVX > 2, "required");
18392     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18393   %}
18394   ins_pipe( pipe_slow );
18395 %}
18396 
18397 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18398   predicate(Matcher::vector_length(n) < 8);
18399   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18400   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18401   ins_encode %{
18402     assert(UseAVX > 0, "required");
18403     int vlen_enc = vector_length_encoding(this);
18404     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18405   %}
18406   ins_pipe( pipe_slow );
18407 %}
18408 
18409 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18410   predicate(Matcher::vector_length(n) == 8);
18411   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18412   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18413   ins_encode %{
18414     assert(UseAVX > 2, "required");
18415     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18416   %}
18417   ins_pipe( pipe_slow );
18418 %}
18419 
18420 instruct onspinwait() %{
18421   match(OnSpinWait);
18422   ins_cost(200);
18423 
18424   format %{
18425     $$template
18426     $$emit$$"pause\t! membar_onspinwait"
18427   %}
18428   ins_encode %{
18429     __ pause();
18430   %}
18431   ins_pipe(pipe_slow);
18432 %}
18433 
18434 // a * b + c
18435 instruct fmaD_reg(regD a, regD b, regD c) %{
18436   match(Set c (FmaD  c (Binary a b)));
18437   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18438   ins_cost(150);
18439   ins_encode %{
18440     assert(UseFMA, "Needs FMA instructions support.");
18441     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18442   %}
18443   ins_pipe( pipe_slow );
18444 %}
18445 
18446 // a * b + c
18447 instruct fmaF_reg(regF a, regF b, regF c) %{
18448   match(Set c (FmaF  c (Binary a b)));
18449   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18450   ins_cost(150);
18451   ins_encode %{
18452     assert(UseFMA, "Needs FMA instructions support.");
18453     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18454   %}
18455   ins_pipe( pipe_slow );
18456 %}
18457 
18458 // ====================VECTOR INSTRUCTIONS=====================================
18459 
18460 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18461 instruct MoveVec2Leg(legVec dst, vec src) %{
18462   match(Set dst src);
18463   format %{ "" %}
18464   ins_encode %{
18465     ShouldNotReachHere();
18466   %}
18467   ins_pipe( fpu_reg_reg );
18468 %}
18469 
18470 instruct MoveLeg2Vec(vec dst, legVec src) %{
18471   match(Set dst src);
18472   format %{ "" %}
18473   ins_encode %{
18474     ShouldNotReachHere();
18475   %}
18476   ins_pipe( fpu_reg_reg );
18477 %}
18478 
18479 // ============================================================================
18480 
18481 // Load vectors generic operand pattern
18482 instruct loadV(vec dst, memory mem) %{
18483   match(Set dst (LoadVector mem));
18484   ins_cost(125);
18485   format %{ "load_vector $dst,$mem" %}
18486   ins_encode %{
18487     BasicType bt = Matcher::vector_element_basic_type(this);
18488     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18489   %}
18490   ins_pipe( pipe_slow );
18491 %}
18492 
18493 // Store vectors generic operand pattern.
18494 instruct storeV(memory mem, vec src) %{
18495   match(Set mem (StoreVector mem src));
18496   ins_cost(145);
18497   format %{ "store_vector $mem,$src\n\t" %}
18498   ins_encode %{
18499     switch (Matcher::vector_length_in_bytes(this, $src)) {
18500       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
18501       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
18502       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
18503       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
18504       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18505       default: ShouldNotReachHere();
18506     }
18507   %}
18508   ins_pipe( pipe_slow );
18509 %}
18510 
18511 // ---------------------------------------- Gather ------------------------------------
18512 
18513 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18514 
18515 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18516   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18517             Matcher::vector_length_in_bytes(n) <= 32);
18518   match(Set dst (LoadVectorGather mem idx));
18519   effect(TEMP dst, TEMP tmp, TEMP mask);
18520   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18521   ins_encode %{
18522     int vlen_enc = vector_length_encoding(this);
18523     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18524     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18525     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18526     __ lea($tmp$$Register, $mem$$Address);
18527     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18528   %}
18529   ins_pipe( pipe_slow );
18530 %}
18531 
18532 
18533 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18534   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18535             !is_subword_type(Matcher::vector_element_basic_type(n)));
18536   match(Set dst (LoadVectorGather mem idx));
18537   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18538   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18539   ins_encode %{
18540     int vlen_enc = vector_length_encoding(this);
18541     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18542     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18543     __ lea($tmp$$Register, $mem$$Address);
18544     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18545   %}
18546   ins_pipe( pipe_slow );
18547 %}
18548 
18549 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18550   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18551             !is_subword_type(Matcher::vector_element_basic_type(n)));
18552   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18553   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18554   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18555   ins_encode %{
18556     assert(UseAVX > 2, "sanity");
18557     int vlen_enc = vector_length_encoding(this);
18558     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18559     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18560     // Note: Since gather instruction partially updates the opmask register used
18561     // for predication hense moving mask operand to a temporary.
18562     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18563     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18564     __ lea($tmp$$Register, $mem$$Address);
18565     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18566   %}
18567   ins_pipe( pipe_slow );
18568 %}
18569 
18570 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18571   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18572   match(Set dst (LoadVectorGather mem idx_base));
18573   effect(TEMP tmp, TEMP rtmp);
18574   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18575   ins_encode %{
18576     int vlen_enc = vector_length_encoding(this);
18577     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18578     __ lea($tmp$$Register, $mem$$Address);
18579     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18580   %}
18581   ins_pipe( pipe_slow );
18582 %}
18583 
18584 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18585                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18586   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18587   match(Set dst (LoadVectorGather mem idx_base));
18588   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18589   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18590   ins_encode %{
18591     int vlen_enc = vector_length_encoding(this);
18592     int vector_len = Matcher::vector_length(this);
18593     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18594     __ lea($tmp$$Register, $mem$$Address);
18595     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18596     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18597                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18598   %}
18599   ins_pipe( pipe_slow );
18600 %}
18601 
18602 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18603   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18604   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18605   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18606   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18607   ins_encode %{
18608     int vlen_enc = vector_length_encoding(this);
18609     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18610     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18611     __ lea($tmp$$Register, $mem$$Address);
18612     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18613     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18614   %}
18615   ins_pipe( pipe_slow );
18616 %}
18617 
18618 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18619                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18620   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18621   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18622   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18623   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18624   ins_encode %{
18625     int vlen_enc = vector_length_encoding(this);
18626     int vector_len = Matcher::vector_length(this);
18627     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18628     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18629     __ lea($tmp$$Register, $mem$$Address);
18630     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18631     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18632     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18633                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18634   %}
18635   ins_pipe( pipe_slow );
18636 %}
18637 
18638 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18639   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18640   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18641   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18642   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18643   ins_encode %{
18644     int vlen_enc = vector_length_encoding(this);
18645     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18646     __ lea($tmp$$Register, $mem$$Address);
18647     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18648     if (elem_bt == T_SHORT) {
18649       __ movl($mask_idx$$Register, 0x55555555);
18650       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18651     }
18652     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18653     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18654   %}
18655   ins_pipe( pipe_slow );
18656 %}
18657 
18658 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18659                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18660   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18661   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18662   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18663   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18664   ins_encode %{
18665     int vlen_enc = vector_length_encoding(this);
18666     int vector_len = Matcher::vector_length(this);
18667     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18668     __ lea($tmp$$Register, $mem$$Address);
18669     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18670     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18671     if (elem_bt == T_SHORT) {
18672       __ movl($mask_idx$$Register, 0x55555555);
18673       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18674     }
18675     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18676     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18677                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18678   %}
18679   ins_pipe( pipe_slow );
18680 %}
18681 
18682 // ====================Scatter=======================================
18683 
18684 // Scatter INT, LONG, FLOAT, DOUBLE
18685 
18686 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18687   predicate(UseAVX > 2);
18688   match(Set mem (StoreVectorScatter mem (Binary src idx)));
18689   effect(TEMP tmp, TEMP ktmp);
18690   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18691   ins_encode %{
18692     int vlen_enc = vector_length_encoding(this, $src);
18693     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18694 
18695     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18696     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18697 
18698     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18699     __ lea($tmp$$Register, $mem$$Address);
18700     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18701   %}
18702   ins_pipe( pipe_slow );
18703 %}
18704 
18705 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18706   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18707   effect(TEMP tmp, TEMP ktmp);
18708   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18709   ins_encode %{
18710     int vlen_enc = vector_length_encoding(this, $src);
18711     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18712     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18713     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18714     // Note: Since scatter instruction partially updates the opmask register used
18715     // for predication hense moving mask operand to a temporary.
18716     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18717     __ lea($tmp$$Register, $mem$$Address);
18718     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18719   %}
18720   ins_pipe( pipe_slow );
18721 %}
18722 
18723 // ====================REPLICATE=======================================
18724 
18725 // Replicate byte scalar to be vector
18726 instruct vReplB_reg(vec dst, rRegI src) %{
18727   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18728   match(Set dst (Replicate src));
18729   format %{ "replicateB $dst,$src" %}
18730   ins_encode %{
18731     uint vlen = Matcher::vector_length(this);
18732     if (UseAVX >= 2) {
18733       int vlen_enc = vector_length_encoding(this);
18734       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18735         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18736         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18737       } else {
18738         __ movdl($dst$$XMMRegister, $src$$Register);
18739         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18740       }
18741     } else {
18742        assert(UseAVX < 2, "");
18743       __ movdl($dst$$XMMRegister, $src$$Register);
18744       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18745       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18746       if (vlen >= 16) {
18747         assert(vlen == 16, "");
18748         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18749       }
18750     }
18751   %}
18752   ins_pipe( pipe_slow );
18753 %}
18754 
18755 instruct ReplB_mem(vec dst, memory mem) %{
18756   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18757   match(Set dst (Replicate (LoadB mem)));
18758   format %{ "replicateB $dst,$mem" %}
18759   ins_encode %{
18760     int vlen_enc = vector_length_encoding(this);
18761     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18762   %}
18763   ins_pipe( pipe_slow );
18764 %}
18765 
18766 // ====================ReplicateS=======================================
18767 
18768 instruct vReplS_reg(vec dst, rRegI src) %{
18769   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18770   match(Set dst (Replicate src));
18771   format %{ "replicateS $dst,$src" %}
18772   ins_encode %{
18773     uint vlen = Matcher::vector_length(this);
18774     int vlen_enc = vector_length_encoding(this);
18775     if (UseAVX >= 2) {
18776       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18777         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18778         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18779       } else {
18780         __ movdl($dst$$XMMRegister, $src$$Register);
18781         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18782       }
18783     } else {
18784       assert(UseAVX < 2, "");
18785       __ movdl($dst$$XMMRegister, $src$$Register);
18786       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18787       if (vlen >= 8) {
18788         assert(vlen == 8, "");
18789         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18790       }
18791     }
18792   %}
18793   ins_pipe( pipe_slow );
18794 %}
18795 
18796 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18797   match(Set dst (Replicate con));
18798   effect(TEMP rtmp);
18799   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18800   ins_encode %{
18801     int vlen_enc = vector_length_encoding(this);
18802     BasicType bt = Matcher::vector_element_basic_type(this);
18803     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18804     __ movl($rtmp$$Register, $con$$constant);
18805     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18806   %}
18807   ins_pipe( pipe_slow );
18808 %}
18809 
18810 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18811   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18812   match(Set dst (Replicate src));
18813   effect(TEMP rtmp);
18814   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18815   ins_encode %{
18816     int vlen_enc = vector_length_encoding(this);
18817     __ vmovw($rtmp$$Register, $src$$XMMRegister);
18818     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18819   %}
18820   ins_pipe( pipe_slow );
18821 %}
18822 
18823 instruct ReplS_mem(vec dst, memory mem) %{
18824   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18825   match(Set dst (Replicate (LoadS mem)));
18826   format %{ "replicateS $dst,$mem" %}
18827   ins_encode %{
18828     int vlen_enc = vector_length_encoding(this);
18829     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18830   %}
18831   ins_pipe( pipe_slow );
18832 %}
18833 
18834 // ====================ReplicateI=======================================
18835 
18836 instruct ReplI_reg(vec dst, rRegI src) %{
18837   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18838   match(Set dst (Replicate src));
18839   format %{ "replicateI $dst,$src" %}
18840   ins_encode %{
18841     uint vlen = Matcher::vector_length(this);
18842     int vlen_enc = vector_length_encoding(this);
18843     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18844       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18845     } else if (VM_Version::supports_avx2()) {
18846       __ movdl($dst$$XMMRegister, $src$$Register);
18847       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18848     } else {
18849       __ movdl($dst$$XMMRegister, $src$$Register);
18850       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18851     }
18852   %}
18853   ins_pipe( pipe_slow );
18854 %}
18855 
18856 instruct ReplI_mem(vec dst, memory mem) %{
18857   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18858   match(Set dst (Replicate (LoadI mem)));
18859   format %{ "replicateI $dst,$mem" %}
18860   ins_encode %{
18861     int vlen_enc = vector_length_encoding(this);
18862     if (VM_Version::supports_avx2()) {
18863       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18864     } else if (VM_Version::supports_avx()) {
18865       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18866     } else {
18867       __ movdl($dst$$XMMRegister, $mem$$Address);
18868       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18869     }
18870   %}
18871   ins_pipe( pipe_slow );
18872 %}
18873 
18874 instruct ReplI_imm(vec dst, immI con) %{
18875   predicate(Matcher::is_non_long_integral_vector(n));
18876   match(Set dst (Replicate con));
18877   format %{ "replicateI $dst,$con" %}
18878   ins_encode %{
18879     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18880                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18881                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
18882     BasicType bt = Matcher::vector_element_basic_type(this);
18883     int vlen = Matcher::vector_length_in_bytes(this);
18884     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18885   %}
18886   ins_pipe( pipe_slow );
18887 %}
18888 
18889 // Replicate scalar zero to be vector
18890 instruct ReplI_zero(vec dst, immI_0 zero) %{
18891   predicate(Matcher::is_non_long_integral_vector(n));
18892   match(Set dst (Replicate zero));
18893   format %{ "replicateI $dst,$zero" %}
18894   ins_encode %{
18895     int vlen_enc = vector_length_encoding(this);
18896     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18897       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18898     } else {
18899       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18900     }
18901   %}
18902   ins_pipe( fpu_reg_reg );
18903 %}
18904 
18905 instruct ReplI_M1(vec dst, immI_M1 con) %{
18906   predicate(Matcher::is_non_long_integral_vector(n));
18907   match(Set dst (Replicate con));
18908   format %{ "vallones $dst" %}
18909   ins_encode %{
18910     int vector_len = vector_length_encoding(this);
18911     __ vallones($dst$$XMMRegister, vector_len);
18912   %}
18913   ins_pipe( pipe_slow );
18914 %}
18915 
18916 // ====================ReplicateL=======================================
18917 
18918 // Replicate long (8 byte) scalar to be vector
18919 instruct ReplL_reg(vec dst, rRegL src) %{
18920   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18921   match(Set dst (Replicate src));
18922   format %{ "replicateL $dst,$src" %}
18923   ins_encode %{
18924     int vlen = Matcher::vector_length(this);
18925     int vlen_enc = vector_length_encoding(this);
18926     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18927       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18928     } else if (VM_Version::supports_avx2()) {
18929       __ movdq($dst$$XMMRegister, $src$$Register);
18930       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18931     } else {
18932       __ movdq($dst$$XMMRegister, $src$$Register);
18933       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18934     }
18935   %}
18936   ins_pipe( pipe_slow );
18937 %}
18938 
18939 instruct ReplL_mem(vec dst, memory mem) %{
18940   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18941   match(Set dst (Replicate (LoadL mem)));
18942   format %{ "replicateL $dst,$mem" %}
18943   ins_encode %{
18944     int vlen_enc = vector_length_encoding(this);
18945     if (VM_Version::supports_avx2()) {
18946       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18947     } else if (VM_Version::supports_sse3()) {
18948       __ movddup($dst$$XMMRegister, $mem$$Address);
18949     } else {
18950       __ movq($dst$$XMMRegister, $mem$$Address);
18951       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18952     }
18953   %}
18954   ins_pipe( pipe_slow );
18955 %}
18956 
18957 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18958 instruct ReplL_imm(vec dst, immL con) %{
18959   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18960   match(Set dst (Replicate con));
18961   format %{ "replicateL $dst,$con" %}
18962   ins_encode %{
18963     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18964     int vlen = Matcher::vector_length_in_bytes(this);
18965     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18966   %}
18967   ins_pipe( pipe_slow );
18968 %}
18969 
18970 instruct ReplL_zero(vec dst, immL0 zero) %{
18971   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18972   match(Set dst (Replicate zero));
18973   format %{ "replicateL $dst,$zero" %}
18974   ins_encode %{
18975     int vlen_enc = vector_length_encoding(this);
18976     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18977       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18978     } else {
18979       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18980     }
18981   %}
18982   ins_pipe( fpu_reg_reg );
18983 %}
18984 
18985 instruct ReplL_M1(vec dst, immL_M1 con) %{
18986   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18987   match(Set dst (Replicate con));
18988   format %{ "vallones $dst" %}
18989   ins_encode %{
18990     int vector_len = vector_length_encoding(this);
18991     __ vallones($dst$$XMMRegister, vector_len);
18992   %}
18993   ins_pipe( pipe_slow );
18994 %}
18995 
18996 // ====================ReplicateF=======================================
18997 
18998 instruct vReplF_reg(vec dst, vlRegF src) %{
18999   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19000   match(Set dst (Replicate src));
19001   format %{ "replicateF $dst,$src" %}
19002   ins_encode %{
19003     uint vlen = Matcher::vector_length(this);
19004     int vlen_enc = vector_length_encoding(this);
19005     if (vlen <= 4) {
19006       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19007     } else if (VM_Version::supports_avx2()) {
19008       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19009     } else {
19010       assert(vlen == 8, "sanity");
19011       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19012       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19013     }
19014   %}
19015   ins_pipe( pipe_slow );
19016 %}
19017 
19018 instruct ReplF_reg(vec dst, vlRegF src) %{
19019   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19020   match(Set dst (Replicate src));
19021   format %{ "replicateF $dst,$src" %}
19022   ins_encode %{
19023     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19024   %}
19025   ins_pipe( pipe_slow );
19026 %}
19027 
19028 instruct ReplF_mem(vec dst, memory mem) %{
19029   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19030   match(Set dst (Replicate (LoadF mem)));
19031   format %{ "replicateF $dst,$mem" %}
19032   ins_encode %{
19033     int vlen_enc = vector_length_encoding(this);
19034     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19035   %}
19036   ins_pipe( pipe_slow );
19037 %}
19038 
19039 // Replicate float scalar immediate to be vector by loading from const table.
19040 instruct ReplF_imm(vec dst, immF con) %{
19041   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19042   match(Set dst (Replicate con));
19043   format %{ "replicateF $dst,$con" %}
19044   ins_encode %{
19045     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19046                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19047     int vlen = Matcher::vector_length_in_bytes(this);
19048     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19049   %}
19050   ins_pipe( pipe_slow );
19051 %}
19052 
19053 instruct ReplF_zero(vec dst, immF0 zero) %{
19054   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19055   match(Set dst (Replicate zero));
19056   format %{ "replicateF $dst,$zero" %}
19057   ins_encode %{
19058     int vlen_enc = vector_length_encoding(this);
19059     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19060       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19061     } else {
19062       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19063     }
19064   %}
19065   ins_pipe( fpu_reg_reg );
19066 %}
19067 
19068 // ====================ReplicateD=======================================
19069 
19070 // Replicate double (8 bytes) scalar to be vector
19071 instruct vReplD_reg(vec dst, vlRegD src) %{
19072   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19073   match(Set dst (Replicate src));
19074   format %{ "replicateD $dst,$src" %}
19075   ins_encode %{
19076     uint vlen = Matcher::vector_length(this);
19077     int vlen_enc = vector_length_encoding(this);
19078     if (vlen <= 2) {
19079       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19080     } else if (VM_Version::supports_avx2()) {
19081       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19082     } else {
19083       assert(vlen == 4, "sanity");
19084       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19085       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19086     }
19087   %}
19088   ins_pipe( pipe_slow );
19089 %}
19090 
19091 instruct ReplD_reg(vec dst, vlRegD src) %{
19092   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19093   match(Set dst (Replicate src));
19094   format %{ "replicateD $dst,$src" %}
19095   ins_encode %{
19096     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19097   %}
19098   ins_pipe( pipe_slow );
19099 %}
19100 
19101 instruct ReplD_mem(vec dst, memory mem) %{
19102   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19103   match(Set dst (Replicate (LoadD mem)));
19104   format %{ "replicateD $dst,$mem" %}
19105   ins_encode %{
19106     if (Matcher::vector_length(this) >= 4) {
19107       int vlen_enc = vector_length_encoding(this);
19108       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19109     } else {
19110       __ movddup($dst$$XMMRegister, $mem$$Address);
19111     }
19112   %}
19113   ins_pipe( pipe_slow );
19114 %}
19115 
19116 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19117 instruct ReplD_imm(vec dst, immD con) %{
19118   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19119   match(Set dst (Replicate con));
19120   format %{ "replicateD $dst,$con" %}
19121   ins_encode %{
19122     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19123     int vlen = Matcher::vector_length_in_bytes(this);
19124     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19125   %}
19126   ins_pipe( pipe_slow );
19127 %}
19128 
19129 instruct ReplD_zero(vec dst, immD0 zero) %{
19130   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19131   match(Set dst (Replicate zero));
19132   format %{ "replicateD $dst,$zero" %}
19133   ins_encode %{
19134     int vlen_enc = vector_length_encoding(this);
19135     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19136       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19137     } else {
19138       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19139     }
19140   %}
19141   ins_pipe( fpu_reg_reg );
19142 %}
19143 
19144 // ====================VECTOR INSERT=======================================
19145 
19146 instruct insert(vec dst, rRegI val, immU8 idx) %{
19147   predicate(Matcher::vector_length_in_bytes(n) < 32);
19148   match(Set dst (VectorInsert (Binary dst val) idx));
19149   format %{ "vector_insert $dst,$val,$idx" %}
19150   ins_encode %{
19151     assert(UseSSE >= 4, "required");
19152     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19153 
19154     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19155 
19156     assert(is_integral_type(elem_bt), "");
19157     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19158 
19159     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19160   %}
19161   ins_pipe( pipe_slow );
19162 %}
19163 
19164 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19165   predicate(Matcher::vector_length_in_bytes(n) == 32);
19166   match(Set dst (VectorInsert (Binary src val) idx));
19167   effect(TEMP vtmp);
19168   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19169   ins_encode %{
19170     int vlen_enc = Assembler::AVX_256bit;
19171     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19172     int elem_per_lane = 16/type2aelembytes(elem_bt);
19173     int log2epr = log2(elem_per_lane);
19174 
19175     assert(is_integral_type(elem_bt), "sanity");
19176     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19177 
19178     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19179     uint y_idx = ($idx$$constant >> log2epr) & 1;
19180     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19181     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19182     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19183   %}
19184   ins_pipe( pipe_slow );
19185 %}
19186 
19187 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19188   predicate(Matcher::vector_length_in_bytes(n) == 64);
19189   match(Set dst (VectorInsert (Binary src val) idx));
19190   effect(TEMP vtmp);
19191   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19192   ins_encode %{
19193     assert(UseAVX > 2, "sanity");
19194 
19195     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19196     int elem_per_lane = 16/type2aelembytes(elem_bt);
19197     int log2epr = log2(elem_per_lane);
19198 
19199     assert(is_integral_type(elem_bt), "");
19200     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19201 
19202     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19203     uint y_idx = ($idx$$constant >> log2epr) & 3;
19204     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19205     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19206     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19207   %}
19208   ins_pipe( pipe_slow );
19209 %}
19210 
19211 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19212   predicate(Matcher::vector_length(n) == 2);
19213   match(Set dst (VectorInsert (Binary dst val) idx));
19214   format %{ "vector_insert $dst,$val,$idx" %}
19215   ins_encode %{
19216     assert(UseSSE >= 4, "required");
19217     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19218     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19219 
19220     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19221   %}
19222   ins_pipe( pipe_slow );
19223 %}
19224 
19225 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19226   predicate(Matcher::vector_length(n) == 4);
19227   match(Set dst (VectorInsert (Binary src val) idx));
19228   effect(TEMP vtmp);
19229   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19230   ins_encode %{
19231     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19232     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19233 
19234     uint x_idx = $idx$$constant & right_n_bits(1);
19235     uint y_idx = ($idx$$constant >> 1) & 1;
19236     int vlen_enc = Assembler::AVX_256bit;
19237     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19238     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19239     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19240   %}
19241   ins_pipe( pipe_slow );
19242 %}
19243 
19244 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19245   predicate(Matcher::vector_length(n) == 8);
19246   match(Set dst (VectorInsert (Binary src val) idx));
19247   effect(TEMP vtmp);
19248   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19249   ins_encode %{
19250     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19251     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19252 
19253     uint x_idx = $idx$$constant & right_n_bits(1);
19254     uint y_idx = ($idx$$constant >> 1) & 3;
19255     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19256     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19257     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19258   %}
19259   ins_pipe( pipe_slow );
19260 %}
19261 
19262 instruct insertF(vec dst, regF val, immU8 idx) %{
19263   predicate(Matcher::vector_length(n) < 8);
19264   match(Set dst (VectorInsert (Binary dst val) idx));
19265   format %{ "vector_insert $dst,$val,$idx" %}
19266   ins_encode %{
19267     assert(UseSSE >= 4, "sanity");
19268 
19269     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19270     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19271 
19272     uint x_idx = $idx$$constant & right_n_bits(2);
19273     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19274   %}
19275   ins_pipe( pipe_slow );
19276 %}
19277 
19278 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19279   predicate(Matcher::vector_length(n) >= 8);
19280   match(Set dst (VectorInsert (Binary src val) idx));
19281   effect(TEMP vtmp);
19282   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19283   ins_encode %{
19284     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19285     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19286 
19287     int vlen = Matcher::vector_length(this);
19288     uint x_idx = $idx$$constant & right_n_bits(2);
19289     if (vlen == 8) {
19290       uint y_idx = ($idx$$constant >> 2) & 1;
19291       int vlen_enc = Assembler::AVX_256bit;
19292       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19293       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19294       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19295     } else {
19296       assert(vlen == 16, "sanity");
19297       uint y_idx = ($idx$$constant >> 2) & 3;
19298       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19299       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19300       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19301     }
19302   %}
19303   ins_pipe( pipe_slow );
19304 %}
19305 
19306 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19307   predicate(Matcher::vector_length(n) == 2);
19308   match(Set dst (VectorInsert (Binary dst val) idx));
19309   effect(TEMP tmp);
19310   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19311   ins_encode %{
19312     assert(UseSSE >= 4, "sanity");
19313     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19314     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19315 
19316     __ movq($tmp$$Register, $val$$XMMRegister);
19317     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19318   %}
19319   ins_pipe( pipe_slow );
19320 %}
19321 
19322 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19323   predicate(Matcher::vector_length(n) == 4);
19324   match(Set dst (VectorInsert (Binary src val) idx));
19325   effect(TEMP vtmp, TEMP tmp);
19326   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19327   ins_encode %{
19328     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19329     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19330 
19331     uint x_idx = $idx$$constant & right_n_bits(1);
19332     uint y_idx = ($idx$$constant >> 1) & 1;
19333     int vlen_enc = Assembler::AVX_256bit;
19334     __ movq($tmp$$Register, $val$$XMMRegister);
19335     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19336     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19337     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19338   %}
19339   ins_pipe( pipe_slow );
19340 %}
19341 
19342 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19343   predicate(Matcher::vector_length(n) == 8);
19344   match(Set dst (VectorInsert (Binary src val) idx));
19345   effect(TEMP tmp, TEMP vtmp);
19346   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19347   ins_encode %{
19348     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19349     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19350 
19351     uint x_idx = $idx$$constant & right_n_bits(1);
19352     uint y_idx = ($idx$$constant >> 1) & 3;
19353     __ movq($tmp$$Register, $val$$XMMRegister);
19354     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19355     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19356     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19357   %}
19358   ins_pipe( pipe_slow );
19359 %}
19360 
19361 // ====================REDUCTION ARITHMETIC=======================================
19362 
19363 // =======================Int Reduction==========================================
19364 
19365 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19366   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19367   match(Set dst (AddReductionVI src1 src2));
19368   match(Set dst (MulReductionVI src1 src2));
19369   match(Set dst (AndReductionV  src1 src2));
19370   match(Set dst ( OrReductionV  src1 src2));
19371   match(Set dst (XorReductionV  src1 src2));
19372   match(Set dst (MinReductionV  src1 src2));
19373   match(Set dst (MaxReductionV  src1 src2));
19374   match(Set dst (UMinReductionV  src1 src2));
19375   match(Set dst (UMaxReductionV  src1 src2));
19376   effect(TEMP vtmp1, TEMP vtmp2);
19377   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19378   ins_encode %{
19379     int opcode = this->ideal_Opcode();
19380     int vlen = Matcher::vector_length(this, $src2);
19381     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19382   %}
19383   ins_pipe( pipe_slow );
19384 %}
19385 
19386 // =======================Long Reduction==========================================
19387 
19388 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19389   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19390   match(Set dst (AddReductionVL src1 src2));
19391   match(Set dst (MulReductionVL src1 src2));
19392   match(Set dst (AndReductionV  src1 src2));
19393   match(Set dst ( OrReductionV  src1 src2));
19394   match(Set dst (XorReductionV  src1 src2));
19395   match(Set dst (MinReductionV  src1 src2));
19396   match(Set dst (MaxReductionV  src1 src2));
19397   match(Set dst (UMinReductionV  src1 src2));
19398   match(Set dst (UMaxReductionV  src1 src2));
19399   effect(TEMP vtmp1, TEMP vtmp2);
19400   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19401   ins_encode %{
19402     int opcode = this->ideal_Opcode();
19403     int vlen = Matcher::vector_length(this, $src2);
19404     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19405   %}
19406   ins_pipe( pipe_slow );
19407 %}
19408 
19409 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19410   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19411   match(Set dst (AddReductionVL src1 src2));
19412   match(Set dst (MulReductionVL src1 src2));
19413   match(Set dst (AndReductionV  src1 src2));
19414   match(Set dst ( OrReductionV  src1 src2));
19415   match(Set dst (XorReductionV  src1 src2));
19416   match(Set dst (MinReductionV  src1 src2));
19417   match(Set dst (MaxReductionV  src1 src2));
19418   match(Set dst (UMinReductionV  src1 src2));
19419   match(Set dst (UMaxReductionV  src1 src2));
19420   effect(TEMP vtmp1, TEMP vtmp2);
19421   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19422   ins_encode %{
19423     int opcode = this->ideal_Opcode();
19424     int vlen = Matcher::vector_length(this, $src2);
19425     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19426   %}
19427   ins_pipe( pipe_slow );
19428 %}
19429 
19430 // =======================Float Reduction==========================================
19431 
19432 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19433   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19434   match(Set dst (AddReductionVF dst src));
19435   match(Set dst (MulReductionVF dst src));
19436   effect(TEMP dst, TEMP vtmp);
19437   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
19438   ins_encode %{
19439     int opcode = this->ideal_Opcode();
19440     int vlen = Matcher::vector_length(this, $src);
19441     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19442   %}
19443   ins_pipe( pipe_slow );
19444 %}
19445 
19446 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19447   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19448   match(Set dst (AddReductionVF dst src));
19449   match(Set dst (MulReductionVF dst src));
19450   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19451   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19452   ins_encode %{
19453     int opcode = this->ideal_Opcode();
19454     int vlen = Matcher::vector_length(this, $src);
19455     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19456   %}
19457   ins_pipe( pipe_slow );
19458 %}
19459 
19460 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19461   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19462   match(Set dst (AddReductionVF dst src));
19463   match(Set dst (MulReductionVF dst src));
19464   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19465   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19466   ins_encode %{
19467     int opcode = this->ideal_Opcode();
19468     int vlen = Matcher::vector_length(this, $src);
19469     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19470   %}
19471   ins_pipe( pipe_slow );
19472 %}
19473 
19474 
19475 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19476   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19477   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19478   // src1 contains reduction identity
19479   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19480   match(Set dst (AddReductionVF src1 src2));
19481   match(Set dst (MulReductionVF src1 src2));
19482   effect(TEMP dst);
19483   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
19484   ins_encode %{
19485     int opcode = this->ideal_Opcode();
19486     int vlen = Matcher::vector_length(this, $src2);
19487     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19488   %}
19489   ins_pipe( pipe_slow );
19490 %}
19491 
19492 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19493   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19494   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19495   // src1 contains reduction identity
19496   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19497   match(Set dst (AddReductionVF src1 src2));
19498   match(Set dst (MulReductionVF src1 src2));
19499   effect(TEMP dst, TEMP vtmp);
19500   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19501   ins_encode %{
19502     int opcode = this->ideal_Opcode();
19503     int vlen = Matcher::vector_length(this, $src2);
19504     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19505   %}
19506   ins_pipe( pipe_slow );
19507 %}
19508 
19509 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19510   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19511   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19512   // src1 contains reduction identity
19513   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19514   match(Set dst (AddReductionVF src1 src2));
19515   match(Set dst (MulReductionVF src1 src2));
19516   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19517   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19518   ins_encode %{
19519     int opcode = this->ideal_Opcode();
19520     int vlen = Matcher::vector_length(this, $src2);
19521     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19522   %}
19523   ins_pipe( pipe_slow );
19524 %}
19525 
19526 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19527   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19528   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19529   // src1 contains reduction identity
19530   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19531   match(Set dst (AddReductionVF src1 src2));
19532   match(Set dst (MulReductionVF src1 src2));
19533   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19534   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19535   ins_encode %{
19536     int opcode = this->ideal_Opcode();
19537     int vlen = Matcher::vector_length(this, $src2);
19538     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19539   %}
19540   ins_pipe( pipe_slow );
19541 %}
19542 
19543 // =======================Double Reduction==========================================
19544 
19545 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19546   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19547   match(Set dst (AddReductionVD dst src));
19548   match(Set dst (MulReductionVD dst src));
19549   effect(TEMP dst, TEMP vtmp);
19550   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19551   ins_encode %{
19552     int opcode = this->ideal_Opcode();
19553     int vlen = Matcher::vector_length(this, $src);
19554     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19555 %}
19556   ins_pipe( pipe_slow );
19557 %}
19558 
19559 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19560   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19561   match(Set dst (AddReductionVD dst src));
19562   match(Set dst (MulReductionVD dst src));
19563   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19564   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19565   ins_encode %{
19566     int opcode = this->ideal_Opcode();
19567     int vlen = Matcher::vector_length(this, $src);
19568     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19569   %}
19570   ins_pipe( pipe_slow );
19571 %}
19572 
19573 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19574   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19575   match(Set dst (AddReductionVD dst src));
19576   match(Set dst (MulReductionVD dst src));
19577   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19578   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19579   ins_encode %{
19580     int opcode = this->ideal_Opcode();
19581     int vlen = Matcher::vector_length(this, $src);
19582     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19583   %}
19584   ins_pipe( pipe_slow );
19585 %}
19586 
19587 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19588   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19589   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19590   // src1 contains reduction identity
19591   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19592   match(Set dst (AddReductionVD src1 src2));
19593   match(Set dst (MulReductionVD src1 src2));
19594   effect(TEMP dst);
19595   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19596   ins_encode %{
19597     int opcode = this->ideal_Opcode();
19598     int vlen = Matcher::vector_length(this, $src2);
19599     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19600 %}
19601   ins_pipe( pipe_slow );
19602 %}
19603 
19604 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19605   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19606   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19607   // src1 contains reduction identity
19608   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19609   match(Set dst (AddReductionVD src1 src2));
19610   match(Set dst (MulReductionVD src1 src2));
19611   effect(TEMP dst, TEMP vtmp);
19612   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19613   ins_encode %{
19614     int opcode = this->ideal_Opcode();
19615     int vlen = Matcher::vector_length(this, $src2);
19616     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19617   %}
19618   ins_pipe( pipe_slow );
19619 %}
19620 
19621 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19622   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19623   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19624   // src1 contains reduction identity
19625   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19626   match(Set dst (AddReductionVD src1 src2));
19627   match(Set dst (MulReductionVD src1 src2));
19628   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19629   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19630   ins_encode %{
19631     int opcode = this->ideal_Opcode();
19632     int vlen = Matcher::vector_length(this, $src2);
19633     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19634   %}
19635   ins_pipe( pipe_slow );
19636 %}
19637 
19638 // =======================Byte Reduction==========================================
19639 
19640 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19641   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19642   match(Set dst (AddReductionVI src1 src2));
19643   match(Set dst (AndReductionV  src1 src2));
19644   match(Set dst ( OrReductionV  src1 src2));
19645   match(Set dst (XorReductionV  src1 src2));
19646   match(Set dst (MinReductionV  src1 src2));
19647   match(Set dst (MaxReductionV  src1 src2));
19648   match(Set dst (UMinReductionV  src1 src2));
19649   match(Set dst (UMaxReductionV  src1 src2));
19650   effect(TEMP vtmp1, TEMP vtmp2);
19651   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19652   ins_encode %{
19653     int opcode = this->ideal_Opcode();
19654     int vlen = Matcher::vector_length(this, $src2);
19655     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19656   %}
19657   ins_pipe( pipe_slow );
19658 %}
19659 
19660 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19661   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19662   match(Set dst (AddReductionVI src1 src2));
19663   match(Set dst (AndReductionV  src1 src2));
19664   match(Set dst ( OrReductionV  src1 src2));
19665   match(Set dst (XorReductionV  src1 src2));
19666   match(Set dst (MinReductionV  src1 src2));
19667   match(Set dst (MaxReductionV  src1 src2));
19668   match(Set dst (UMinReductionV  src1 src2));
19669   match(Set dst (UMaxReductionV  src1 src2));
19670   effect(TEMP vtmp1, TEMP vtmp2);
19671   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19672   ins_encode %{
19673     int opcode = this->ideal_Opcode();
19674     int vlen = Matcher::vector_length(this, $src2);
19675     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19676   %}
19677   ins_pipe( pipe_slow );
19678 %}
19679 
19680 // =======================Short Reduction==========================================
19681 
19682 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19683   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19684   match(Set dst (AddReductionVI src1 src2));
19685   match(Set dst (MulReductionVI src1 src2));
19686   match(Set dst (AndReductionV  src1 src2));
19687   match(Set dst ( OrReductionV  src1 src2));
19688   match(Set dst (XorReductionV  src1 src2));
19689   match(Set dst (MinReductionV  src1 src2));
19690   match(Set dst (MaxReductionV  src1 src2));
19691   match(Set dst (UMinReductionV  src1 src2));
19692   match(Set dst (UMaxReductionV  src1 src2));
19693   effect(TEMP vtmp1, TEMP vtmp2);
19694   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19695   ins_encode %{
19696     int opcode = this->ideal_Opcode();
19697     int vlen = Matcher::vector_length(this, $src2);
19698     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19699   %}
19700   ins_pipe( pipe_slow );
19701 %}
19702 
19703 // =======================Mul Reduction==========================================
19704 
19705 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19706   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19707             Matcher::vector_length(n->in(2)) <= 32); // src2
19708   match(Set dst (MulReductionVI src1 src2));
19709   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19710   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19711   ins_encode %{
19712     int opcode = this->ideal_Opcode();
19713     int vlen = Matcher::vector_length(this, $src2);
19714     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19715   %}
19716   ins_pipe( pipe_slow );
19717 %}
19718 
19719 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19720   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19721             Matcher::vector_length(n->in(2)) == 64); // src2
19722   match(Set dst (MulReductionVI src1 src2));
19723   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19724   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19725   ins_encode %{
19726     int opcode = this->ideal_Opcode();
19727     int vlen = Matcher::vector_length(this, $src2);
19728     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19729   %}
19730   ins_pipe( pipe_slow );
19731 %}
19732 
19733 //--------------------Min/Max Float Reduction --------------------
19734 // Float Min Reduction
19735 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19736                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19737   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19738             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19739              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19740             Matcher::vector_length(n->in(2)) == 2);
19741   match(Set dst (MinReductionV src1 src2));
19742   match(Set dst (MaxReductionV src1 src2));
19743   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19744   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19745   ins_encode %{
19746     assert(UseAVX > 0, "sanity");
19747 
19748     int opcode = this->ideal_Opcode();
19749     int vlen = Matcher::vector_length(this, $src2);
19750     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19751                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19752   %}
19753   ins_pipe( pipe_slow );
19754 %}
19755 
19756 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19757                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19758   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19759             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19760              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19761             Matcher::vector_length(n->in(2)) >= 4);
19762   match(Set dst (MinReductionV src1 src2));
19763   match(Set dst (MaxReductionV src1 src2));
19764   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19765   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19766   ins_encode %{
19767     assert(UseAVX > 0, "sanity");
19768 
19769     int opcode = this->ideal_Opcode();
19770     int vlen = Matcher::vector_length(this, $src2);
19771     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19772                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19773   %}
19774   ins_pipe( pipe_slow );
19775 %}
19776 
19777 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19778                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19779   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19780             Matcher::vector_length(n->in(2)) == 2);
19781   match(Set dst (MinReductionV dst src));
19782   match(Set dst (MaxReductionV dst src));
19783   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19784   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19785   ins_encode %{
19786     assert(UseAVX > 0, "sanity");
19787 
19788     int opcode = this->ideal_Opcode();
19789     int vlen = Matcher::vector_length(this, $src);
19790     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19791                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19792   %}
19793   ins_pipe( pipe_slow );
19794 %}
19795 
19796 
19797 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19798                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19799   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19800             Matcher::vector_length(n->in(2)) >= 4);
19801   match(Set dst (MinReductionV dst src));
19802   match(Set dst (MaxReductionV dst src));
19803   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19804   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19805   ins_encode %{
19806     assert(UseAVX > 0, "sanity");
19807 
19808     int opcode = this->ideal_Opcode();
19809     int vlen = Matcher::vector_length(this, $src);
19810     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19811                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19812   %}
19813   ins_pipe( pipe_slow );
19814 %}
19815 
19816 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19817   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19818             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19819              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19820             Matcher::vector_length(n->in(2)) == 2);
19821   match(Set dst (MinReductionV src1 src2));
19822   match(Set dst (MaxReductionV src1 src2));
19823   effect(TEMP dst, TEMP xtmp1);
19824   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19825   ins_encode %{
19826     int opcode = this->ideal_Opcode();
19827     int vlen = Matcher::vector_length(this, $src2);
19828     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19829                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19830   %}
19831   ins_pipe( pipe_slow );
19832 %}
19833 
19834 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19835   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19836             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19837              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19838             Matcher::vector_length(n->in(2)) >= 4);
19839   match(Set dst (MinReductionV src1 src2));
19840   match(Set dst (MaxReductionV src1 src2));
19841   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19842   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19843   ins_encode %{
19844     int opcode = this->ideal_Opcode();
19845     int vlen = Matcher::vector_length(this, $src2);
19846     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19847                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19848   %}
19849   ins_pipe( pipe_slow );
19850 %}
19851 
19852 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19853   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19854             Matcher::vector_length(n->in(2)) == 2);
19855   match(Set dst (MinReductionV dst src));
19856   match(Set dst (MaxReductionV dst src));
19857   effect(TEMP dst, TEMP xtmp1);
19858   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19859   ins_encode %{
19860     int opcode = this->ideal_Opcode();
19861     int vlen = Matcher::vector_length(this, $src);
19862     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19863                          $xtmp1$$XMMRegister);
19864   %}
19865   ins_pipe( pipe_slow );
19866 %}
19867 
19868 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19869   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19870             Matcher::vector_length(n->in(2)) >= 4);
19871   match(Set dst (MinReductionV dst src));
19872   match(Set dst (MaxReductionV dst src));
19873   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19874   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19875   ins_encode %{
19876     int opcode = this->ideal_Opcode();
19877     int vlen = Matcher::vector_length(this, $src);
19878     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19879                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19880   %}
19881   ins_pipe( pipe_slow );
19882 %}
19883 
19884 //--------------------Min Double Reduction --------------------
19885 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19886                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19887   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19888             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19889              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19890             Matcher::vector_length(n->in(2)) == 2);
19891   match(Set dst (MinReductionV src1 src2));
19892   match(Set dst (MaxReductionV src1 src2));
19893   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19894   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19895   ins_encode %{
19896     assert(UseAVX > 0, "sanity");
19897 
19898     int opcode = this->ideal_Opcode();
19899     int vlen = Matcher::vector_length(this, $src2);
19900     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19901                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19902   %}
19903   ins_pipe( pipe_slow );
19904 %}
19905 
19906 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19907                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19908   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19909             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19910              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19911             Matcher::vector_length(n->in(2)) >= 4);
19912   match(Set dst (MinReductionV src1 src2));
19913   match(Set dst (MaxReductionV src1 src2));
19914   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19915   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19916   ins_encode %{
19917     assert(UseAVX > 0, "sanity");
19918 
19919     int opcode = this->ideal_Opcode();
19920     int vlen = Matcher::vector_length(this, $src2);
19921     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19922                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19923   %}
19924   ins_pipe( pipe_slow );
19925 %}
19926 
19927 
19928 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19929                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19930   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19931             Matcher::vector_length(n->in(2)) == 2);
19932   match(Set dst (MinReductionV dst src));
19933   match(Set dst (MaxReductionV dst src));
19934   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19935   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19936   ins_encode %{
19937     assert(UseAVX > 0, "sanity");
19938 
19939     int opcode = this->ideal_Opcode();
19940     int vlen = Matcher::vector_length(this, $src);
19941     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19942                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19943   %}
19944   ins_pipe( pipe_slow );
19945 %}
19946 
19947 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19948                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19949   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19950             Matcher::vector_length(n->in(2)) >= 4);
19951   match(Set dst (MinReductionV dst src));
19952   match(Set dst (MaxReductionV dst src));
19953   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19954   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19955   ins_encode %{
19956     assert(UseAVX > 0, "sanity");
19957 
19958     int opcode = this->ideal_Opcode();
19959     int vlen = Matcher::vector_length(this, $src);
19960     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19961                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19962   %}
19963   ins_pipe( pipe_slow );
19964 %}
19965 
19966 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19967   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19968             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19969              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19970             Matcher::vector_length(n->in(2)) == 2);
19971   match(Set dst (MinReductionV src1 src2));
19972   match(Set dst (MaxReductionV src1 src2));
19973   effect(TEMP dst, TEMP xtmp1);
19974   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19975   ins_encode %{
19976     int opcode = this->ideal_Opcode();
19977     int vlen = Matcher::vector_length(this, $src2);
19978     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
19979                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
19980   %}
19981   ins_pipe( pipe_slow );
19982 %}
19983 
19984 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
19985   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19986             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19987              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19988             Matcher::vector_length(n->in(2)) >= 4);
19989   match(Set dst (MinReductionV src1 src2));
19990   match(Set dst (MaxReductionV src1 src2));
19991   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19992   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
19993   ins_encode %{
19994     int opcode = this->ideal_Opcode();
19995     int vlen = Matcher::vector_length(this, $src2);
19996     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19997                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19998   %}
19999   ins_pipe( pipe_slow );
20000 %}
20001 
20002 
20003 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20004   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20005             Matcher::vector_length(n->in(2)) == 2);
20006   match(Set dst (MinReductionV dst src));
20007   match(Set dst (MaxReductionV dst src));
20008   effect(TEMP dst, TEMP xtmp1);
20009   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20010   ins_encode %{
20011     int opcode = this->ideal_Opcode();
20012     int vlen = Matcher::vector_length(this, $src);
20013     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20014                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20015   %}
20016   ins_pipe( pipe_slow );
20017 %}
20018 
20019 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20020   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20021             Matcher::vector_length(n->in(2)) >= 4);
20022   match(Set dst (MinReductionV dst src));
20023   match(Set dst (MaxReductionV dst src));
20024   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20025   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20026   ins_encode %{
20027     int opcode = this->ideal_Opcode();
20028     int vlen = Matcher::vector_length(this, $src);
20029     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20030                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20031   %}
20032   ins_pipe( pipe_slow );
20033 %}
20034 
20035 // ====================VECTOR ARITHMETIC=======================================
20036 
20037 // --------------------------------- ADD --------------------------------------
20038 
20039 // Bytes vector add
20040 instruct vaddB(vec dst, vec src) %{
20041   predicate(UseAVX == 0);
20042   match(Set dst (AddVB dst src));
20043   format %{ "paddb   $dst,$src\t! add packedB" %}
20044   ins_encode %{
20045     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20046   %}
20047   ins_pipe( pipe_slow );
20048 %}
20049 
20050 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20051   predicate(UseAVX > 0);
20052   match(Set dst (AddVB src1 src2));
20053   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
20054   ins_encode %{
20055     int vlen_enc = vector_length_encoding(this);
20056     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20057   %}
20058   ins_pipe( pipe_slow );
20059 %}
20060 
20061 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20062   predicate((UseAVX > 0) &&
20063             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20064   match(Set dst (AddVB src (LoadVector mem)));
20065   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
20066   ins_encode %{
20067     int vlen_enc = vector_length_encoding(this);
20068     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20069   %}
20070   ins_pipe( pipe_slow );
20071 %}
20072 
20073 // Shorts/Chars vector add
20074 instruct vaddS(vec dst, vec src) %{
20075   predicate(UseAVX == 0);
20076   match(Set dst (AddVS dst src));
20077   format %{ "paddw   $dst,$src\t! add packedS" %}
20078   ins_encode %{
20079     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20080   %}
20081   ins_pipe( pipe_slow );
20082 %}
20083 
20084 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20085   predicate(UseAVX > 0);
20086   match(Set dst (AddVS src1 src2));
20087   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
20088   ins_encode %{
20089     int vlen_enc = vector_length_encoding(this);
20090     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20091   %}
20092   ins_pipe( pipe_slow );
20093 %}
20094 
20095 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20096   predicate((UseAVX > 0) &&
20097             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20098   match(Set dst (AddVS src (LoadVector mem)));
20099   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
20100   ins_encode %{
20101     int vlen_enc = vector_length_encoding(this);
20102     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20103   %}
20104   ins_pipe( pipe_slow );
20105 %}
20106 
20107 // Integers vector add
20108 instruct vaddI(vec dst, vec src) %{
20109   predicate(UseAVX == 0);
20110   match(Set dst (AddVI dst src));
20111   format %{ "paddd   $dst,$src\t! add packedI" %}
20112   ins_encode %{
20113     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20114   %}
20115   ins_pipe( pipe_slow );
20116 %}
20117 
20118 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20119   predicate(UseAVX > 0);
20120   match(Set dst (AddVI src1 src2));
20121   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
20122   ins_encode %{
20123     int vlen_enc = vector_length_encoding(this);
20124     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20125   %}
20126   ins_pipe( pipe_slow );
20127 %}
20128 
20129 
20130 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20131   predicate((UseAVX > 0) &&
20132             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20133   match(Set dst (AddVI src (LoadVector mem)));
20134   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
20135   ins_encode %{
20136     int vlen_enc = vector_length_encoding(this);
20137     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20138   %}
20139   ins_pipe( pipe_slow );
20140 %}
20141 
20142 // Longs vector add
20143 instruct vaddL(vec dst, vec src) %{
20144   predicate(UseAVX == 0);
20145   match(Set dst (AddVL dst src));
20146   format %{ "paddq   $dst,$src\t! add packedL" %}
20147   ins_encode %{
20148     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20149   %}
20150   ins_pipe( pipe_slow );
20151 %}
20152 
20153 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20154   predicate(UseAVX > 0);
20155   match(Set dst (AddVL src1 src2));
20156   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
20157   ins_encode %{
20158     int vlen_enc = vector_length_encoding(this);
20159     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20160   %}
20161   ins_pipe( pipe_slow );
20162 %}
20163 
20164 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20165   predicate((UseAVX > 0) &&
20166             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20167   match(Set dst (AddVL src (LoadVector mem)));
20168   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
20169   ins_encode %{
20170     int vlen_enc = vector_length_encoding(this);
20171     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20172   %}
20173   ins_pipe( pipe_slow );
20174 %}
20175 
20176 // Floats vector add
20177 instruct vaddF(vec dst, vec src) %{
20178   predicate(UseAVX == 0);
20179   match(Set dst (AddVF dst src));
20180   format %{ "addps   $dst,$src\t! add packedF" %}
20181   ins_encode %{
20182     __ addps($dst$$XMMRegister, $src$$XMMRegister);
20183   %}
20184   ins_pipe( pipe_slow );
20185 %}
20186 
20187 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20188   predicate(UseAVX > 0);
20189   match(Set dst (AddVF src1 src2));
20190   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
20191   ins_encode %{
20192     int vlen_enc = vector_length_encoding(this);
20193     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20194   %}
20195   ins_pipe( pipe_slow );
20196 %}
20197 
20198 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20199   predicate((UseAVX > 0) &&
20200             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20201   match(Set dst (AddVF src (LoadVector mem)));
20202   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
20203   ins_encode %{
20204     int vlen_enc = vector_length_encoding(this);
20205     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20206   %}
20207   ins_pipe( pipe_slow );
20208 %}
20209 
20210 // Doubles vector add
20211 instruct vaddD(vec dst, vec src) %{
20212   predicate(UseAVX == 0);
20213   match(Set dst (AddVD dst src));
20214   format %{ "addpd   $dst,$src\t! add packedD" %}
20215   ins_encode %{
20216     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20217   %}
20218   ins_pipe( pipe_slow );
20219 %}
20220 
20221 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20222   predicate(UseAVX > 0);
20223   match(Set dst (AddVD src1 src2));
20224   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
20225   ins_encode %{
20226     int vlen_enc = vector_length_encoding(this);
20227     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20228   %}
20229   ins_pipe( pipe_slow );
20230 %}
20231 
20232 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20233   predicate((UseAVX > 0) &&
20234             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20235   match(Set dst (AddVD src (LoadVector mem)));
20236   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
20237   ins_encode %{
20238     int vlen_enc = vector_length_encoding(this);
20239     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20240   %}
20241   ins_pipe( pipe_slow );
20242 %}
20243 
20244 // --------------------------------- SUB --------------------------------------
20245 
20246 // Bytes vector sub
20247 instruct vsubB(vec dst, vec src) %{
20248   predicate(UseAVX == 0);
20249   match(Set dst (SubVB dst src));
20250   format %{ "psubb   $dst,$src\t! sub packedB" %}
20251   ins_encode %{
20252     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20253   %}
20254   ins_pipe( pipe_slow );
20255 %}
20256 
20257 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20258   predicate(UseAVX > 0);
20259   match(Set dst (SubVB src1 src2));
20260   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
20261   ins_encode %{
20262     int vlen_enc = vector_length_encoding(this);
20263     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20264   %}
20265   ins_pipe( pipe_slow );
20266 %}
20267 
20268 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20269   predicate((UseAVX > 0) &&
20270             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20271   match(Set dst (SubVB src (LoadVector mem)));
20272   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
20273   ins_encode %{
20274     int vlen_enc = vector_length_encoding(this);
20275     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20276   %}
20277   ins_pipe( pipe_slow );
20278 %}
20279 
20280 // Shorts/Chars vector sub
20281 instruct vsubS(vec dst, vec src) %{
20282   predicate(UseAVX == 0);
20283   match(Set dst (SubVS dst src));
20284   format %{ "psubw   $dst,$src\t! sub packedS" %}
20285   ins_encode %{
20286     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20287   %}
20288   ins_pipe( pipe_slow );
20289 %}
20290 
20291 
20292 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20293   predicate(UseAVX > 0);
20294   match(Set dst (SubVS src1 src2));
20295   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
20296   ins_encode %{
20297     int vlen_enc = vector_length_encoding(this);
20298     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20299   %}
20300   ins_pipe( pipe_slow );
20301 %}
20302 
20303 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20304   predicate((UseAVX > 0) &&
20305             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20306   match(Set dst (SubVS src (LoadVector mem)));
20307   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
20308   ins_encode %{
20309     int vlen_enc = vector_length_encoding(this);
20310     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20311   %}
20312   ins_pipe( pipe_slow );
20313 %}
20314 
20315 // Integers vector sub
20316 instruct vsubI(vec dst, vec src) %{
20317   predicate(UseAVX == 0);
20318   match(Set dst (SubVI dst src));
20319   format %{ "psubd   $dst,$src\t! sub packedI" %}
20320   ins_encode %{
20321     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20322   %}
20323   ins_pipe( pipe_slow );
20324 %}
20325 
20326 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20327   predicate(UseAVX > 0);
20328   match(Set dst (SubVI src1 src2));
20329   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
20330   ins_encode %{
20331     int vlen_enc = vector_length_encoding(this);
20332     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20333   %}
20334   ins_pipe( pipe_slow );
20335 %}
20336 
20337 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20338   predicate((UseAVX > 0) &&
20339             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20340   match(Set dst (SubVI src (LoadVector mem)));
20341   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
20342   ins_encode %{
20343     int vlen_enc = vector_length_encoding(this);
20344     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20345   %}
20346   ins_pipe( pipe_slow );
20347 %}
20348 
20349 // Longs vector sub
20350 instruct vsubL(vec dst, vec src) %{
20351   predicate(UseAVX == 0);
20352   match(Set dst (SubVL dst src));
20353   format %{ "psubq   $dst,$src\t! sub packedL" %}
20354   ins_encode %{
20355     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20356   %}
20357   ins_pipe( pipe_slow );
20358 %}
20359 
20360 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20361   predicate(UseAVX > 0);
20362   match(Set dst (SubVL src1 src2));
20363   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
20364   ins_encode %{
20365     int vlen_enc = vector_length_encoding(this);
20366     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20367   %}
20368   ins_pipe( pipe_slow );
20369 %}
20370 
20371 
20372 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20373   predicate((UseAVX > 0) &&
20374             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20375   match(Set dst (SubVL src (LoadVector mem)));
20376   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
20377   ins_encode %{
20378     int vlen_enc = vector_length_encoding(this);
20379     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20380   %}
20381   ins_pipe( pipe_slow );
20382 %}
20383 
20384 // Floats vector sub
20385 instruct vsubF(vec dst, vec src) %{
20386   predicate(UseAVX == 0);
20387   match(Set dst (SubVF dst src));
20388   format %{ "subps   $dst,$src\t! sub packedF" %}
20389   ins_encode %{
20390     __ subps($dst$$XMMRegister, $src$$XMMRegister);
20391   %}
20392   ins_pipe( pipe_slow );
20393 %}
20394 
20395 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20396   predicate(UseAVX > 0);
20397   match(Set dst (SubVF src1 src2));
20398   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
20399   ins_encode %{
20400     int vlen_enc = vector_length_encoding(this);
20401     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20402   %}
20403   ins_pipe( pipe_slow );
20404 %}
20405 
20406 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20407   predicate((UseAVX > 0) &&
20408             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20409   match(Set dst (SubVF src (LoadVector mem)));
20410   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
20411   ins_encode %{
20412     int vlen_enc = vector_length_encoding(this);
20413     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20414   %}
20415   ins_pipe( pipe_slow );
20416 %}
20417 
20418 // Doubles vector sub
20419 instruct vsubD(vec dst, vec src) %{
20420   predicate(UseAVX == 0);
20421   match(Set dst (SubVD dst src));
20422   format %{ "subpd   $dst,$src\t! sub packedD" %}
20423   ins_encode %{
20424     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20425   %}
20426   ins_pipe( pipe_slow );
20427 %}
20428 
20429 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20430   predicate(UseAVX > 0);
20431   match(Set dst (SubVD src1 src2));
20432   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
20433   ins_encode %{
20434     int vlen_enc = vector_length_encoding(this);
20435     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20436   %}
20437   ins_pipe( pipe_slow );
20438 %}
20439 
20440 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20441   predicate((UseAVX > 0) &&
20442             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20443   match(Set dst (SubVD src (LoadVector mem)));
20444   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
20445   ins_encode %{
20446     int vlen_enc = vector_length_encoding(this);
20447     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20448   %}
20449   ins_pipe( pipe_slow );
20450 %}
20451 
20452 // --------------------------------- MUL --------------------------------------
20453 
20454 // Byte vector mul
20455 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20456   predicate(Matcher::vector_length_in_bytes(n) <= 8);
20457   match(Set dst (MulVB src1 src2));
20458   effect(TEMP dst, TEMP xtmp);
20459   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20460   ins_encode %{
20461     assert(UseSSE > 3, "required");
20462     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20463     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20464     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20465     __ psllw($dst$$XMMRegister, 8);
20466     __ psrlw($dst$$XMMRegister, 8);
20467     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20468   %}
20469   ins_pipe( pipe_slow );
20470 %}
20471 
20472 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20473   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20474   match(Set dst (MulVB src1 src2));
20475   effect(TEMP dst, TEMP xtmp);
20476   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20477   ins_encode %{
20478     assert(UseSSE > 3, "required");
20479     // Odd-index elements
20480     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20481     __ psrlw($dst$$XMMRegister, 8);
20482     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20483     __ psrlw($xtmp$$XMMRegister, 8);
20484     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20485     __ psllw($dst$$XMMRegister, 8);
20486     // Even-index elements
20487     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20488     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20489     __ psllw($xtmp$$XMMRegister, 8);
20490     __ psrlw($xtmp$$XMMRegister, 8);
20491     // Combine
20492     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20493   %}
20494   ins_pipe( pipe_slow );
20495 %}
20496 
20497 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20498   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20499   match(Set dst (MulVB src1 src2));
20500   effect(TEMP xtmp1, TEMP xtmp2);
20501   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20502   ins_encode %{
20503     int vlen_enc = vector_length_encoding(this);
20504     // Odd-index elements
20505     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20506     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20507     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20508     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20509     // Even-index elements
20510     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20511     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20512     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20513     // Combine
20514     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20515   %}
20516   ins_pipe( pipe_slow );
20517 %}
20518 
20519 // Shorts/Chars vector mul
20520 instruct vmulS(vec dst, vec src) %{
20521   predicate(UseAVX == 0);
20522   match(Set dst (MulVS dst src));
20523   format %{ "pmullw  $dst,$src\t! mul packedS" %}
20524   ins_encode %{
20525     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20526   %}
20527   ins_pipe( pipe_slow );
20528 %}
20529 
20530 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20531   predicate(UseAVX > 0);
20532   match(Set dst (MulVS src1 src2));
20533   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20534   ins_encode %{
20535     int vlen_enc = vector_length_encoding(this);
20536     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20537   %}
20538   ins_pipe( pipe_slow );
20539 %}
20540 
20541 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20542   predicate((UseAVX > 0) &&
20543             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20544   match(Set dst (MulVS src (LoadVector mem)));
20545   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20546   ins_encode %{
20547     int vlen_enc = vector_length_encoding(this);
20548     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20549   %}
20550   ins_pipe( pipe_slow );
20551 %}
20552 
20553 // Integers vector mul
20554 instruct vmulI(vec dst, vec src) %{
20555   predicate(UseAVX == 0);
20556   match(Set dst (MulVI dst src));
20557   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20558   ins_encode %{
20559     assert(UseSSE > 3, "required");
20560     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20561   %}
20562   ins_pipe( pipe_slow );
20563 %}
20564 
20565 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20566   predicate(UseAVX > 0);
20567   match(Set dst (MulVI src1 src2));
20568   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20569   ins_encode %{
20570     int vlen_enc = vector_length_encoding(this);
20571     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20572   %}
20573   ins_pipe( pipe_slow );
20574 %}
20575 
20576 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20577   predicate((UseAVX > 0) &&
20578             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20579   match(Set dst (MulVI src (LoadVector mem)));
20580   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20581   ins_encode %{
20582     int vlen_enc = vector_length_encoding(this);
20583     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20584   %}
20585   ins_pipe( pipe_slow );
20586 %}
20587 
20588 // Longs vector mul
20589 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20590   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20591              VM_Version::supports_avx512dq()) ||
20592             VM_Version::supports_avx512vldq());
20593   match(Set dst (MulVL src1 src2));
20594   ins_cost(500);
20595   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20596   ins_encode %{
20597     assert(UseAVX > 2, "required");
20598     int vlen_enc = vector_length_encoding(this);
20599     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20600   %}
20601   ins_pipe( pipe_slow );
20602 %}
20603 
20604 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20605   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20606              VM_Version::supports_avx512dq()) ||
20607             (Matcher::vector_length_in_bytes(n) > 8 &&
20608              VM_Version::supports_avx512vldq()));
20609   match(Set dst (MulVL src (LoadVector mem)));
20610   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20611   ins_cost(500);
20612   ins_encode %{
20613     assert(UseAVX > 2, "required");
20614     int vlen_enc = vector_length_encoding(this);
20615     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20616   %}
20617   ins_pipe( pipe_slow );
20618 %}
20619 
20620 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20621   predicate(UseAVX == 0);
20622   match(Set dst (MulVL src1 src2));
20623   ins_cost(500);
20624   effect(TEMP dst, TEMP xtmp);
20625   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20626   ins_encode %{
20627     assert(VM_Version::supports_sse4_1(), "required");
20628     // Get the lo-hi products, only the lower 32 bits is in concerns
20629     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20630     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20631     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20632     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20633     __ psllq($dst$$XMMRegister, 32);
20634     // Get the lo-lo products
20635     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20636     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20637     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20638   %}
20639   ins_pipe( pipe_slow );
20640 %}
20641 
20642 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20643   predicate(UseAVX > 0 &&
20644             ((Matcher::vector_length_in_bytes(n) == 64 &&
20645               !VM_Version::supports_avx512dq()) ||
20646              (Matcher::vector_length_in_bytes(n) < 64 &&
20647               !VM_Version::supports_avx512vldq())));
20648   match(Set dst (MulVL src1 src2));
20649   effect(TEMP xtmp1, TEMP xtmp2);
20650   ins_cost(500);
20651   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20652   ins_encode %{
20653     int vlen_enc = vector_length_encoding(this);
20654     // Get the lo-hi products, only the lower 32 bits is in concerns
20655     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20656     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20657     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20658     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20659     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20660     // Get the lo-lo products
20661     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20662     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20663   %}
20664   ins_pipe( pipe_slow );
20665 %}
20666 
20667 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20668   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20669   match(Set dst (MulVL src1 src2));
20670   ins_cost(100);
20671   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20672   ins_encode %{
20673     int vlen_enc = vector_length_encoding(this);
20674     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20675   %}
20676   ins_pipe( pipe_slow );
20677 %}
20678 
20679 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20680   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20681   match(Set dst (MulVL src1 src2));
20682   ins_cost(100);
20683   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20684   ins_encode %{
20685     int vlen_enc = vector_length_encoding(this);
20686     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20687   %}
20688   ins_pipe( pipe_slow );
20689 %}
20690 
20691 // Floats vector mul
20692 instruct vmulF(vec dst, vec src) %{
20693   predicate(UseAVX == 0);
20694   match(Set dst (MulVF dst src));
20695   format %{ "mulps   $dst,$src\t! mul packedF" %}
20696   ins_encode %{
20697     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20698   %}
20699   ins_pipe( pipe_slow );
20700 %}
20701 
20702 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20703   predicate(UseAVX > 0);
20704   match(Set dst (MulVF src1 src2));
20705   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
20706   ins_encode %{
20707     int vlen_enc = vector_length_encoding(this);
20708     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20709   %}
20710   ins_pipe( pipe_slow );
20711 %}
20712 
20713 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20714   predicate((UseAVX > 0) &&
20715             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20716   match(Set dst (MulVF src (LoadVector mem)));
20717   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
20718   ins_encode %{
20719     int vlen_enc = vector_length_encoding(this);
20720     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20721   %}
20722   ins_pipe( pipe_slow );
20723 %}
20724 
20725 // Doubles vector mul
20726 instruct vmulD(vec dst, vec src) %{
20727   predicate(UseAVX == 0);
20728   match(Set dst (MulVD dst src));
20729   format %{ "mulpd   $dst,$src\t! mul packedD" %}
20730   ins_encode %{
20731     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20732   %}
20733   ins_pipe( pipe_slow );
20734 %}
20735 
20736 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20737   predicate(UseAVX > 0);
20738   match(Set dst (MulVD src1 src2));
20739   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
20740   ins_encode %{
20741     int vlen_enc = vector_length_encoding(this);
20742     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20743   %}
20744   ins_pipe( pipe_slow );
20745 %}
20746 
20747 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20748   predicate((UseAVX > 0) &&
20749             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20750   match(Set dst (MulVD src (LoadVector mem)));
20751   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
20752   ins_encode %{
20753     int vlen_enc = vector_length_encoding(this);
20754     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20755   %}
20756   ins_pipe( pipe_slow );
20757 %}
20758 
20759 // --------------------------------- DIV --------------------------------------
20760 
20761 // Floats vector div
20762 instruct vdivF(vec dst, vec src) %{
20763   predicate(UseAVX == 0);
20764   match(Set dst (DivVF dst src));
20765   format %{ "divps   $dst,$src\t! div packedF" %}
20766   ins_encode %{
20767     __ divps($dst$$XMMRegister, $src$$XMMRegister);
20768   %}
20769   ins_pipe( pipe_slow );
20770 %}
20771 
20772 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20773   predicate(UseAVX > 0);
20774   match(Set dst (DivVF src1 src2));
20775   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
20776   ins_encode %{
20777     int vlen_enc = vector_length_encoding(this);
20778     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20779   %}
20780   ins_pipe( pipe_slow );
20781 %}
20782 
20783 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20784   predicate((UseAVX > 0) &&
20785             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20786   match(Set dst (DivVF src (LoadVector mem)));
20787   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
20788   ins_encode %{
20789     int vlen_enc = vector_length_encoding(this);
20790     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20791   %}
20792   ins_pipe( pipe_slow );
20793 %}
20794 
20795 // Doubles vector div
20796 instruct vdivD(vec dst, vec src) %{
20797   predicate(UseAVX == 0);
20798   match(Set dst (DivVD dst src));
20799   format %{ "divpd   $dst,$src\t! div packedD" %}
20800   ins_encode %{
20801     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20802   %}
20803   ins_pipe( pipe_slow );
20804 %}
20805 
20806 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20807   predicate(UseAVX > 0);
20808   match(Set dst (DivVD src1 src2));
20809   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
20810   ins_encode %{
20811     int vlen_enc = vector_length_encoding(this);
20812     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20813   %}
20814   ins_pipe( pipe_slow );
20815 %}
20816 
20817 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20818   predicate((UseAVX > 0) &&
20819             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20820   match(Set dst (DivVD src (LoadVector mem)));
20821   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
20822   ins_encode %{
20823     int vlen_enc = vector_length_encoding(this);
20824     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20825   %}
20826   ins_pipe( pipe_slow );
20827 %}
20828 
20829 // ------------------------------ MinMax ---------------------------------------
20830 
20831 // Byte, Short, Int vector Min/Max
20832 instruct minmax_reg_sse(vec dst, vec src) %{
20833   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20834             UseAVX == 0);
20835   match(Set dst (MinV dst src));
20836   match(Set dst (MaxV dst src));
20837   format %{ "vector_minmax  $dst,$src\t!  " %}
20838   ins_encode %{
20839     assert(UseSSE >= 4, "required");
20840 
20841     int opcode = this->ideal_Opcode();
20842     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20843     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20844   %}
20845   ins_pipe( pipe_slow );
20846 %}
20847 
20848 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20849   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20850             UseAVX > 0);
20851   match(Set dst (MinV src1 src2));
20852   match(Set dst (MaxV src1 src2));
20853   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
20854   ins_encode %{
20855     int opcode = this->ideal_Opcode();
20856     int vlen_enc = vector_length_encoding(this);
20857     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20858 
20859     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20860   %}
20861   ins_pipe( pipe_slow );
20862 %}
20863 
20864 // Long vector Min/Max
20865 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20866   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20867             UseAVX == 0);
20868   match(Set dst (MinV dst src));
20869   match(Set dst (MaxV src dst));
20870   effect(TEMP dst, TEMP tmp);
20871   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
20872   ins_encode %{
20873     assert(UseSSE >= 4, "required");
20874 
20875     int opcode = this->ideal_Opcode();
20876     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20877     assert(elem_bt == T_LONG, "sanity");
20878 
20879     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20880   %}
20881   ins_pipe( pipe_slow );
20882 %}
20883 
20884 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20885   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20886             UseAVX > 0 && !VM_Version::supports_avx512vl());
20887   match(Set dst (MinV src1 src2));
20888   match(Set dst (MaxV src1 src2));
20889   effect(TEMP dst);
20890   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
20891   ins_encode %{
20892     int vlen_enc = vector_length_encoding(this);
20893     int opcode = this->ideal_Opcode();
20894     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20895     assert(elem_bt == T_LONG, "sanity");
20896 
20897     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20898   %}
20899   ins_pipe( pipe_slow );
20900 %}
20901 
20902 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20903   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20904             Matcher::vector_element_basic_type(n) == T_LONG);
20905   match(Set dst (MinV src1 src2));
20906   match(Set dst (MaxV src1 src2));
20907   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
20908   ins_encode %{
20909     assert(UseAVX > 2, "required");
20910 
20911     int vlen_enc = vector_length_encoding(this);
20912     int opcode = this->ideal_Opcode();
20913     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20914     assert(elem_bt == T_LONG, "sanity");
20915 
20916     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20917   %}
20918   ins_pipe( pipe_slow );
20919 %}
20920 
20921 // Float/Double vector Min/Max
20922 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20923   predicate(VM_Version::supports_avx10_2() &&
20924             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20925   match(Set dst (MinV a b));
20926   match(Set dst (MaxV a b));
20927   format %{ "vector_minmaxFP  $dst, $a, $b" %}
20928   ins_encode %{
20929     int vlen_enc = vector_length_encoding(this);
20930     int opcode = this->ideal_Opcode();
20931     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20932     __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20933   %}
20934   ins_pipe( pipe_slow );
20935 %}
20936 
20937 // Float/Double vector Min/Max
20938 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20939   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20940             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20941             UseAVX > 0);
20942   match(Set dst (MinV a b));
20943   match(Set dst (MaxV a b));
20944   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20945   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20946   ins_encode %{
20947     assert(UseAVX > 0, "required");
20948 
20949     int opcode = this->ideal_Opcode();
20950     int vlen_enc = vector_length_encoding(this);
20951     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20952 
20953     __ vminmax_fp(opcode, elem_bt,
20954                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20955                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20956   %}
20957   ins_pipe( pipe_slow );
20958 %}
20959 
20960 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20961   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20962             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20963   match(Set dst (MinV a b));
20964   match(Set dst (MaxV a b));
20965   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20966   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20967   ins_encode %{
20968     assert(UseAVX > 2, "required");
20969 
20970     int opcode = this->ideal_Opcode();
20971     int vlen_enc = vector_length_encoding(this);
20972     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20973 
20974     __ evminmax_fp(opcode, elem_bt,
20975                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20976                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20977   %}
20978   ins_pipe( pipe_slow );
20979 %}
20980 
20981 // ------------------------------ Unsigned vector Min/Max ----------------------
20982 
20983 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
20984   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20985   match(Set dst (UMinV a b));
20986   match(Set dst (UMaxV a b));
20987   format %{ "vector_uminmax $dst,$a,$b\t!" %}
20988   ins_encode %{
20989     int opcode = this->ideal_Opcode();
20990     int vlen_enc = vector_length_encoding(this);
20991     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20992     assert(is_integral_type(elem_bt), "");
20993     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20994   %}
20995   ins_pipe( pipe_slow );
20996 %}
20997 
20998 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
20999   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21000   match(Set dst (UMinV a (LoadVector b)));
21001   match(Set dst (UMaxV a (LoadVector b)));
21002   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21003   ins_encode %{
21004     int opcode = this->ideal_Opcode();
21005     int vlen_enc = vector_length_encoding(this);
21006     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21007     assert(is_integral_type(elem_bt), "");
21008     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21009   %}
21010   ins_pipe( pipe_slow );
21011 %}
21012 
21013 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21014   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21015   match(Set dst (UMinV a b));
21016   match(Set dst (UMaxV a b));
21017   effect(TEMP xtmp1, TEMP xtmp2);
21018   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21019   ins_encode %{
21020     int opcode = this->ideal_Opcode();
21021     int vlen_enc = vector_length_encoding(this);
21022     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21023   %}
21024   ins_pipe( pipe_slow );
21025 %}
21026 
21027 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21028   match(Set dst (UMinV (Binary dst src2) mask));
21029   match(Set dst (UMaxV (Binary dst src2) mask));
21030   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21031   ins_encode %{
21032     int vlen_enc = vector_length_encoding(this);
21033     BasicType bt = Matcher::vector_element_basic_type(this);
21034     int opc = this->ideal_Opcode();
21035     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21036                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21037   %}
21038   ins_pipe( pipe_slow );
21039 %}
21040 
21041 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21042   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21043   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21044   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21045   ins_encode %{
21046     int vlen_enc = vector_length_encoding(this);
21047     BasicType bt = Matcher::vector_element_basic_type(this);
21048     int opc = this->ideal_Opcode();
21049     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21050                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21051   %}
21052   ins_pipe( pipe_slow );
21053 %}
21054 
21055 // --------------------------------- Signum/CopySign ---------------------------
21056 
21057 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21058   match(Set dst (SignumF dst (Binary zero one)));
21059   effect(KILL cr);
21060   format %{ "signumF $dst, $dst" %}
21061   ins_encode %{
21062     int opcode = this->ideal_Opcode();
21063     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21064   %}
21065   ins_pipe( pipe_slow );
21066 %}
21067 
21068 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21069   match(Set dst (SignumD dst (Binary zero one)));
21070   effect(KILL cr);
21071   format %{ "signumD $dst, $dst" %}
21072   ins_encode %{
21073     int opcode = this->ideal_Opcode();
21074     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21075   %}
21076   ins_pipe( pipe_slow );
21077 %}
21078 
21079 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21080   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21081   match(Set dst (SignumVF src (Binary zero one)));
21082   match(Set dst (SignumVD src (Binary zero one)));
21083   effect(TEMP dst, TEMP xtmp1);
21084   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21085   ins_encode %{
21086     int opcode = this->ideal_Opcode();
21087     int vec_enc = vector_length_encoding(this);
21088     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21089                          $xtmp1$$XMMRegister, vec_enc);
21090   %}
21091   ins_pipe( pipe_slow );
21092 %}
21093 
21094 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21095   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21096   match(Set dst (SignumVF src (Binary zero one)));
21097   match(Set dst (SignumVD src (Binary zero one)));
21098   effect(TEMP dst, TEMP ktmp1);
21099   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21100   ins_encode %{
21101     int opcode = this->ideal_Opcode();
21102     int vec_enc = vector_length_encoding(this);
21103     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21104                           $ktmp1$$KRegister, vec_enc);
21105   %}
21106   ins_pipe( pipe_slow );
21107 %}
21108 
21109 // ---------------------------------------
21110 // For copySign use 0xE4 as writemask for vpternlog
21111 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21112 // C (xmm2) is set to 0x7FFFFFFF
21113 // Wherever xmm2 is 0, we want to pick from B (sign)
21114 // Wherever xmm2 is 1, we want to pick from A (src)
21115 //
21116 // A B C Result
21117 // 0 0 0 0
21118 // 0 0 1 0
21119 // 0 1 0 1
21120 // 0 1 1 0
21121 // 1 0 0 0
21122 // 1 0 1 1
21123 // 1 1 0 1
21124 // 1 1 1 1
21125 //
21126 // Result going from high bit to low bit is 0x11100100 = 0xe4
21127 // ---------------------------------------
21128 
21129 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21130   match(Set dst (CopySignF dst src));
21131   effect(TEMP tmp1, TEMP tmp2);
21132   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21133   ins_encode %{
21134     __ movl($tmp2$$Register, 0x7FFFFFFF);
21135     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21136     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21137   %}
21138   ins_pipe( pipe_slow );
21139 %}
21140 
21141 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21142   match(Set dst (CopySignD dst (Binary src zero)));
21143   ins_cost(100);
21144   effect(TEMP tmp1, TEMP tmp2);
21145   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21146   ins_encode %{
21147     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21148     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21149     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21150   %}
21151   ins_pipe( pipe_slow );
21152 %}
21153 
21154 //----------------------------- CompressBits/ExpandBits ------------------------
21155 
21156 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21157   predicate(n->bottom_type()->isa_int());
21158   match(Set dst (CompressBits src mask));
21159   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21160   ins_encode %{
21161     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21162   %}
21163   ins_pipe( pipe_slow );
21164 %}
21165 
21166 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21167   predicate(n->bottom_type()->isa_int());
21168   match(Set dst (ExpandBits src mask));
21169   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21170   ins_encode %{
21171     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21172   %}
21173   ins_pipe( pipe_slow );
21174 %}
21175 
21176 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21177   predicate(n->bottom_type()->isa_int());
21178   match(Set dst (CompressBits src (LoadI mask)));
21179   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21180   ins_encode %{
21181     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21182   %}
21183   ins_pipe( pipe_slow );
21184 %}
21185 
21186 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21187   predicate(n->bottom_type()->isa_int());
21188   match(Set dst (ExpandBits src (LoadI mask)));
21189   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21190   ins_encode %{
21191     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21192   %}
21193   ins_pipe( pipe_slow );
21194 %}
21195 
21196 // --------------------------------- Sqrt --------------------------------------
21197 
21198 instruct vsqrtF_reg(vec dst, vec src) %{
21199   match(Set dst (SqrtVF src));
21200   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
21201   ins_encode %{
21202     assert(UseAVX > 0, "required");
21203     int vlen_enc = vector_length_encoding(this);
21204     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21205   %}
21206   ins_pipe( pipe_slow );
21207 %}
21208 
21209 instruct vsqrtF_mem(vec dst, memory mem) %{
21210   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21211   match(Set dst (SqrtVF (LoadVector mem)));
21212   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
21213   ins_encode %{
21214     assert(UseAVX > 0, "required");
21215     int vlen_enc = vector_length_encoding(this);
21216     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21217   %}
21218   ins_pipe( pipe_slow );
21219 %}
21220 
21221 // Floating point vector sqrt
21222 instruct vsqrtD_reg(vec dst, vec src) %{
21223   match(Set dst (SqrtVD src));
21224   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
21225   ins_encode %{
21226     assert(UseAVX > 0, "required");
21227     int vlen_enc = vector_length_encoding(this);
21228     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21229   %}
21230   ins_pipe( pipe_slow );
21231 %}
21232 
21233 instruct vsqrtD_mem(vec dst, memory mem) %{
21234   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21235   match(Set dst (SqrtVD (LoadVector mem)));
21236   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
21237   ins_encode %{
21238     assert(UseAVX > 0, "required");
21239     int vlen_enc = vector_length_encoding(this);
21240     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21241   %}
21242   ins_pipe( pipe_slow );
21243 %}
21244 
21245 // ------------------------------ Shift ---------------------------------------
21246 
21247 // Left and right shift count vectors are the same on x86
21248 // (only lowest bits of xmm reg are used for count).
21249 instruct vshiftcnt(vec dst, rRegI cnt) %{
21250   match(Set dst (LShiftCntV cnt));
21251   match(Set dst (RShiftCntV cnt));
21252   format %{ "movdl    $dst,$cnt\t! load shift count" %}
21253   ins_encode %{
21254     __ movdl($dst$$XMMRegister, $cnt$$Register);
21255   %}
21256   ins_pipe( pipe_slow );
21257 %}
21258 
21259 // Byte vector shift
21260 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21261   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21262   match(Set dst ( LShiftVB src shift));
21263   match(Set dst ( RShiftVB src shift));
21264   match(Set dst (URShiftVB src shift));
21265   effect(TEMP dst, USE src, USE shift, TEMP tmp);
21266   format %{"vector_byte_shift $dst,$src,$shift" %}
21267   ins_encode %{
21268     assert(UseSSE > 3, "required");
21269     int opcode = this->ideal_Opcode();
21270     bool sign = (opcode != Op_URShiftVB);
21271     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21272     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21273     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21274     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21275     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21276   %}
21277   ins_pipe( pipe_slow );
21278 %}
21279 
21280 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21281   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21282             UseAVX <= 1);
21283   match(Set dst ( LShiftVB src shift));
21284   match(Set dst ( RShiftVB src shift));
21285   match(Set dst (URShiftVB src shift));
21286   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21287   format %{"vector_byte_shift $dst,$src,$shift" %}
21288   ins_encode %{
21289     assert(UseSSE > 3, "required");
21290     int opcode = this->ideal_Opcode();
21291     bool sign = (opcode != Op_URShiftVB);
21292     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21293     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21294     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21295     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21296     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21297     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21298     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21299     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21300     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21301   %}
21302   ins_pipe( pipe_slow );
21303 %}
21304 
21305 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21306   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21307             UseAVX > 1);
21308   match(Set dst ( LShiftVB src shift));
21309   match(Set dst ( RShiftVB src shift));
21310   match(Set dst (URShiftVB src shift));
21311   effect(TEMP dst, TEMP tmp);
21312   format %{"vector_byte_shift $dst,$src,$shift" %}
21313   ins_encode %{
21314     int opcode = this->ideal_Opcode();
21315     bool sign = (opcode != Op_URShiftVB);
21316     int vlen_enc = Assembler::AVX_256bit;
21317     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21318     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21319     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21320     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21321     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21322   %}
21323   ins_pipe( pipe_slow );
21324 %}
21325 
21326 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21327   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21328   match(Set dst ( LShiftVB src shift));
21329   match(Set dst ( RShiftVB src shift));
21330   match(Set dst (URShiftVB src shift));
21331   effect(TEMP dst, TEMP tmp);
21332   format %{"vector_byte_shift $dst,$src,$shift" %}
21333   ins_encode %{
21334     assert(UseAVX > 1, "required");
21335     int opcode = this->ideal_Opcode();
21336     bool sign = (opcode != Op_URShiftVB);
21337     int vlen_enc = Assembler::AVX_256bit;
21338     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21339     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21340     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21341     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21342     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21343     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21344     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21345     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21346     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21347   %}
21348   ins_pipe( pipe_slow );
21349 %}
21350 
21351 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21352   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21353   match(Set dst ( LShiftVB src shift));
21354   match(Set dst  (RShiftVB src shift));
21355   match(Set dst (URShiftVB src shift));
21356   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21357   format %{"vector_byte_shift $dst,$src,$shift" %}
21358   ins_encode %{
21359     assert(UseAVX > 2, "required");
21360     int opcode = this->ideal_Opcode();
21361     bool sign = (opcode != Op_URShiftVB);
21362     int vlen_enc = Assembler::AVX_512bit;
21363     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21364     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21365     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21366     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21367     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21368     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21369     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21370     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21371     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21372     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21373     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21374     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21375   %}
21376   ins_pipe( pipe_slow );
21377 %}
21378 
21379 // Shorts vector logical right shift produces incorrect Java result
21380 // for negative data because java code convert short value into int with
21381 // sign extension before a shift. But char vectors are fine since chars are
21382 // unsigned values.
21383 // Shorts/Chars vector left shift
21384 instruct vshiftS(vec dst, vec src, vec shift) %{
21385   predicate(!n->as_ShiftV()->is_var_shift());
21386   match(Set dst ( LShiftVS src shift));
21387   match(Set dst ( RShiftVS src shift));
21388   match(Set dst (URShiftVS src shift));
21389   effect(TEMP dst, USE src, USE shift);
21390   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
21391   ins_encode %{
21392     int opcode = this->ideal_Opcode();
21393     if (UseAVX > 0) {
21394       int vlen_enc = vector_length_encoding(this);
21395       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21396     } else {
21397       int vlen = Matcher::vector_length(this);
21398       if (vlen == 2) {
21399         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21400         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21401       } else if (vlen == 4) {
21402         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21403         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21404       } else {
21405         assert (vlen == 8, "sanity");
21406         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21407         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21408       }
21409     }
21410   %}
21411   ins_pipe( pipe_slow );
21412 %}
21413 
21414 // Integers vector left shift
21415 instruct vshiftI(vec dst, vec src, vec shift) %{
21416   predicate(!n->as_ShiftV()->is_var_shift());
21417   match(Set dst ( LShiftVI src shift));
21418   match(Set dst ( RShiftVI src shift));
21419   match(Set dst (URShiftVI src shift));
21420   effect(TEMP dst, USE src, USE shift);
21421   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
21422   ins_encode %{
21423     int opcode = this->ideal_Opcode();
21424     if (UseAVX > 0) {
21425       int vlen_enc = vector_length_encoding(this);
21426       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21427     } else {
21428       int vlen = Matcher::vector_length(this);
21429       if (vlen == 2) {
21430         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21431         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21432       } else {
21433         assert(vlen == 4, "sanity");
21434         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21435         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21436       }
21437     }
21438   %}
21439   ins_pipe( pipe_slow );
21440 %}
21441 
21442 // Integers vector left constant shift
21443 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21444   match(Set dst (LShiftVI src (LShiftCntV shift)));
21445   match(Set dst (RShiftVI src (RShiftCntV shift)));
21446   match(Set dst (URShiftVI src (RShiftCntV shift)));
21447   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
21448   ins_encode %{
21449     int opcode = this->ideal_Opcode();
21450     if (UseAVX > 0) {
21451       int vector_len = vector_length_encoding(this);
21452       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21453     } else {
21454       int vlen = Matcher::vector_length(this);
21455       if (vlen == 2) {
21456         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21457         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21458       } else {
21459         assert(vlen == 4, "sanity");
21460         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21461         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21462       }
21463     }
21464   %}
21465   ins_pipe( pipe_slow );
21466 %}
21467 
21468 // Longs vector shift
21469 instruct vshiftL(vec dst, vec src, vec shift) %{
21470   predicate(!n->as_ShiftV()->is_var_shift());
21471   match(Set dst ( LShiftVL src shift));
21472   match(Set dst (URShiftVL src shift));
21473   effect(TEMP dst, USE src, USE shift);
21474   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
21475   ins_encode %{
21476     int opcode = this->ideal_Opcode();
21477     if (UseAVX > 0) {
21478       int vlen_enc = vector_length_encoding(this);
21479       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21480     } else {
21481       assert(Matcher::vector_length(this) == 2, "");
21482       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21483       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21484     }
21485   %}
21486   ins_pipe( pipe_slow );
21487 %}
21488 
21489 // Longs vector constant shift
21490 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21491   match(Set dst (LShiftVL src (LShiftCntV shift)));
21492   match(Set dst (URShiftVL src (RShiftCntV shift)));
21493   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
21494   ins_encode %{
21495     int opcode = this->ideal_Opcode();
21496     if (UseAVX > 0) {
21497       int vector_len = vector_length_encoding(this);
21498       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21499     } else {
21500       assert(Matcher::vector_length(this) == 2, "");
21501       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21502       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21503     }
21504   %}
21505   ins_pipe( pipe_slow );
21506 %}
21507 
21508 // -------------------ArithmeticRightShift -----------------------------------
21509 // Long vector arithmetic right shift
21510 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21511   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21512   match(Set dst (RShiftVL src shift));
21513   effect(TEMP dst, TEMP tmp);
21514   format %{ "vshiftq $dst,$src,$shift" %}
21515   ins_encode %{
21516     uint vlen = Matcher::vector_length(this);
21517     if (vlen == 2) {
21518       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21519       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21520       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21521       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21522       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21523       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21524     } else {
21525       assert(vlen == 4, "sanity");
21526       assert(UseAVX > 1, "required");
21527       int vlen_enc = Assembler::AVX_256bit;
21528       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21529       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21530       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21531       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21532       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21533     }
21534   %}
21535   ins_pipe( pipe_slow );
21536 %}
21537 
21538 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21539   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21540   match(Set dst (RShiftVL src shift));
21541   format %{ "vshiftq $dst,$src,$shift" %}
21542   ins_encode %{
21543     int vlen_enc = vector_length_encoding(this);
21544     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21545   %}
21546   ins_pipe( pipe_slow );
21547 %}
21548 
21549 // ------------------- Variable Shift -----------------------------
21550 // Byte variable shift
21551 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21552   predicate(Matcher::vector_length(n) <= 8 &&
21553             n->as_ShiftV()->is_var_shift() &&
21554             !VM_Version::supports_avx512bw());
21555   match(Set dst ( LShiftVB src shift));
21556   match(Set dst ( RShiftVB src shift));
21557   match(Set dst (URShiftVB src shift));
21558   effect(TEMP dst, TEMP vtmp);
21559   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21560   ins_encode %{
21561     assert(UseAVX >= 2, "required");
21562 
21563     int opcode = this->ideal_Opcode();
21564     int vlen_enc = Assembler::AVX_128bit;
21565     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21566     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21567   %}
21568   ins_pipe( pipe_slow );
21569 %}
21570 
21571 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21572   predicate(Matcher::vector_length(n) == 16 &&
21573             n->as_ShiftV()->is_var_shift() &&
21574             !VM_Version::supports_avx512bw());
21575   match(Set dst ( LShiftVB src shift));
21576   match(Set dst ( RShiftVB src shift));
21577   match(Set dst (URShiftVB src shift));
21578   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21579   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21580   ins_encode %{
21581     assert(UseAVX >= 2, "required");
21582 
21583     int opcode = this->ideal_Opcode();
21584     int vlen_enc = Assembler::AVX_128bit;
21585     // Shift lower half and get word result in dst
21586     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21587 
21588     // Shift upper half and get word result in vtmp1
21589     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21590     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21591     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21592 
21593     // Merge and down convert the two word results to byte in dst
21594     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21595   %}
21596   ins_pipe( pipe_slow );
21597 %}
21598 
21599 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21600   predicate(Matcher::vector_length(n) == 32 &&
21601             n->as_ShiftV()->is_var_shift() &&
21602             !VM_Version::supports_avx512bw());
21603   match(Set dst ( LShiftVB src shift));
21604   match(Set dst ( RShiftVB src shift));
21605   match(Set dst (URShiftVB src shift));
21606   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21607   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21608   ins_encode %{
21609     assert(UseAVX >= 2, "required");
21610 
21611     int opcode = this->ideal_Opcode();
21612     int vlen_enc = Assembler::AVX_128bit;
21613     // Process lower 128 bits and get result in dst
21614     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21615     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21616     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21617     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21618     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21619 
21620     // Process higher 128 bits and get result in vtmp3
21621     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21622     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21623     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21624     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21625     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21626     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21627     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21628 
21629     // Merge the two results in dst
21630     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21631   %}
21632   ins_pipe( pipe_slow );
21633 %}
21634 
21635 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21636   predicate(Matcher::vector_length(n) <= 32 &&
21637             n->as_ShiftV()->is_var_shift() &&
21638             VM_Version::supports_avx512bw());
21639   match(Set dst ( LShiftVB src shift));
21640   match(Set dst ( RShiftVB src shift));
21641   match(Set dst (URShiftVB src shift));
21642   effect(TEMP dst, TEMP vtmp);
21643   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21644   ins_encode %{
21645     assert(UseAVX > 2, "required");
21646 
21647     int opcode = this->ideal_Opcode();
21648     int vlen_enc = vector_length_encoding(this);
21649     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21650   %}
21651   ins_pipe( pipe_slow );
21652 %}
21653 
21654 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21655   predicate(Matcher::vector_length(n) == 64 &&
21656             n->as_ShiftV()->is_var_shift() &&
21657             VM_Version::supports_avx512bw());
21658   match(Set dst ( LShiftVB src shift));
21659   match(Set dst ( RShiftVB src shift));
21660   match(Set dst (URShiftVB src shift));
21661   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21662   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21663   ins_encode %{
21664     assert(UseAVX > 2, "required");
21665 
21666     int opcode = this->ideal_Opcode();
21667     int vlen_enc = Assembler::AVX_256bit;
21668     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21669     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21670     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21671     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21672     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21673   %}
21674   ins_pipe( pipe_slow );
21675 %}
21676 
21677 // Short variable shift
21678 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21679   predicate(Matcher::vector_length(n) <= 8 &&
21680             n->as_ShiftV()->is_var_shift() &&
21681             !VM_Version::supports_avx512bw());
21682   match(Set dst ( LShiftVS src shift));
21683   match(Set dst ( RShiftVS src shift));
21684   match(Set dst (URShiftVS src shift));
21685   effect(TEMP dst, TEMP vtmp);
21686   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21687   ins_encode %{
21688     assert(UseAVX >= 2, "required");
21689 
21690     int opcode = this->ideal_Opcode();
21691     bool sign = (opcode != Op_URShiftVS);
21692     int vlen_enc = Assembler::AVX_256bit;
21693     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21694     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21695     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21696     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21697     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21698     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21699   %}
21700   ins_pipe( pipe_slow );
21701 %}
21702 
21703 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21704   predicate(Matcher::vector_length(n) == 16 &&
21705             n->as_ShiftV()->is_var_shift() &&
21706             !VM_Version::supports_avx512bw());
21707   match(Set dst ( LShiftVS src shift));
21708   match(Set dst ( RShiftVS src shift));
21709   match(Set dst (URShiftVS src shift));
21710   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21711   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21712   ins_encode %{
21713     assert(UseAVX >= 2, "required");
21714 
21715     int opcode = this->ideal_Opcode();
21716     bool sign = (opcode != Op_URShiftVS);
21717     int vlen_enc = Assembler::AVX_256bit;
21718     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21719     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21720     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21721     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21722     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21723 
21724     // Shift upper half, with result in dst using vtmp1 as TEMP
21725     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21726     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21727     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21728     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21729     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21730     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21731 
21732     // Merge lower and upper half result into dst
21733     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21734     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21735   %}
21736   ins_pipe( pipe_slow );
21737 %}
21738 
21739 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21740   predicate(n->as_ShiftV()->is_var_shift() &&
21741             VM_Version::supports_avx512bw());
21742   match(Set dst ( LShiftVS src shift));
21743   match(Set dst ( RShiftVS src shift));
21744   match(Set dst (URShiftVS src shift));
21745   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21746   ins_encode %{
21747     assert(UseAVX > 2, "required");
21748 
21749     int opcode = this->ideal_Opcode();
21750     int vlen_enc = vector_length_encoding(this);
21751     if (!VM_Version::supports_avx512vl()) {
21752       vlen_enc = Assembler::AVX_512bit;
21753     }
21754     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21755   %}
21756   ins_pipe( pipe_slow );
21757 %}
21758 
21759 //Integer variable shift
21760 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21761   predicate(n->as_ShiftV()->is_var_shift());
21762   match(Set dst ( LShiftVI src shift));
21763   match(Set dst ( RShiftVI src shift));
21764   match(Set dst (URShiftVI src shift));
21765   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21766   ins_encode %{
21767     assert(UseAVX >= 2, "required");
21768 
21769     int opcode = this->ideal_Opcode();
21770     int vlen_enc = vector_length_encoding(this);
21771     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21772   %}
21773   ins_pipe( pipe_slow );
21774 %}
21775 
21776 //Long variable shift
21777 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21778   predicate(n->as_ShiftV()->is_var_shift());
21779   match(Set dst ( LShiftVL src shift));
21780   match(Set dst (URShiftVL src shift));
21781   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21782   ins_encode %{
21783     assert(UseAVX >= 2, "required");
21784 
21785     int opcode = this->ideal_Opcode();
21786     int vlen_enc = vector_length_encoding(this);
21787     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21788   %}
21789   ins_pipe( pipe_slow );
21790 %}
21791 
21792 //Long variable right shift arithmetic
21793 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21794   predicate(Matcher::vector_length(n) <= 4 &&
21795             n->as_ShiftV()->is_var_shift() &&
21796             UseAVX == 2);
21797   match(Set dst (RShiftVL src shift));
21798   effect(TEMP dst, TEMP vtmp);
21799   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21800   ins_encode %{
21801     int opcode = this->ideal_Opcode();
21802     int vlen_enc = vector_length_encoding(this);
21803     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21804                  $vtmp$$XMMRegister);
21805   %}
21806   ins_pipe( pipe_slow );
21807 %}
21808 
21809 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21810   predicate(n->as_ShiftV()->is_var_shift() &&
21811             UseAVX > 2);
21812   match(Set dst (RShiftVL src shift));
21813   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21814   ins_encode %{
21815     int opcode = this->ideal_Opcode();
21816     int vlen_enc = vector_length_encoding(this);
21817     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21818   %}
21819   ins_pipe( pipe_slow );
21820 %}
21821 
21822 // --------------------------------- AND --------------------------------------
21823 
21824 instruct vand(vec dst, vec src) %{
21825   predicate(UseAVX == 0);
21826   match(Set dst (AndV dst src));
21827   format %{ "pand    $dst,$src\t! and vectors" %}
21828   ins_encode %{
21829     __ pand($dst$$XMMRegister, $src$$XMMRegister);
21830   %}
21831   ins_pipe( pipe_slow );
21832 %}
21833 
21834 instruct vand_reg(vec dst, vec src1, vec src2) %{
21835   predicate(UseAVX > 0);
21836   match(Set dst (AndV src1 src2));
21837   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
21838   ins_encode %{
21839     int vlen_enc = vector_length_encoding(this);
21840     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21841   %}
21842   ins_pipe( pipe_slow );
21843 %}
21844 
21845 instruct vand_mem(vec dst, vec src, memory mem) %{
21846   predicate((UseAVX > 0) &&
21847             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21848   match(Set dst (AndV src (LoadVector mem)));
21849   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
21850   ins_encode %{
21851     int vlen_enc = vector_length_encoding(this);
21852     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21853   %}
21854   ins_pipe( pipe_slow );
21855 %}
21856 
21857 // --------------------------------- OR ---------------------------------------
21858 
21859 instruct vor(vec dst, vec src) %{
21860   predicate(UseAVX == 0);
21861   match(Set dst (OrV dst src));
21862   format %{ "por     $dst,$src\t! or vectors" %}
21863   ins_encode %{
21864     __ por($dst$$XMMRegister, $src$$XMMRegister);
21865   %}
21866   ins_pipe( pipe_slow );
21867 %}
21868 
21869 instruct vor_reg(vec dst, vec src1, vec src2) %{
21870   predicate(UseAVX > 0);
21871   match(Set dst (OrV src1 src2));
21872   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
21873   ins_encode %{
21874     int vlen_enc = vector_length_encoding(this);
21875     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21876   %}
21877   ins_pipe( pipe_slow );
21878 %}
21879 
21880 instruct vor_mem(vec dst, vec src, memory mem) %{
21881   predicate((UseAVX > 0) &&
21882             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21883   match(Set dst (OrV src (LoadVector mem)));
21884   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
21885   ins_encode %{
21886     int vlen_enc = vector_length_encoding(this);
21887     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21888   %}
21889   ins_pipe( pipe_slow );
21890 %}
21891 
21892 // --------------------------------- XOR --------------------------------------
21893 
21894 instruct vxor(vec dst, vec src) %{
21895   predicate(UseAVX == 0);
21896   match(Set dst (XorV dst src));
21897   format %{ "pxor    $dst,$src\t! xor vectors" %}
21898   ins_encode %{
21899     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21900   %}
21901   ins_pipe( pipe_slow );
21902 %}
21903 
21904 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21905   predicate(UseAVX > 0);
21906   match(Set dst (XorV src1 src2));
21907   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
21908   ins_encode %{
21909     int vlen_enc = vector_length_encoding(this);
21910     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21911   %}
21912   ins_pipe( pipe_slow );
21913 %}
21914 
21915 instruct vxor_mem(vec dst, vec src, memory mem) %{
21916   predicate((UseAVX > 0) &&
21917             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21918   match(Set dst (XorV src (LoadVector mem)));
21919   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
21920   ins_encode %{
21921     int vlen_enc = vector_length_encoding(this);
21922     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21923   %}
21924   ins_pipe( pipe_slow );
21925 %}
21926 
21927 // --------------------------------- VectorCast --------------------------------------
21928 
21929 instruct vcastBtoX(vec dst, vec src) %{
21930   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21931   match(Set dst (VectorCastB2X src));
21932   format %{ "vector_cast_b2x $dst,$src\t!" %}
21933   ins_encode %{
21934     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21935     int vlen_enc = vector_length_encoding(this);
21936     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21937   %}
21938   ins_pipe( pipe_slow );
21939 %}
21940 
21941 instruct vcastBtoD(legVec dst, legVec src) %{
21942   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21943   match(Set dst (VectorCastB2X src));
21944   format %{ "vector_cast_b2x $dst,$src\t!" %}
21945   ins_encode %{
21946     int vlen_enc = vector_length_encoding(this);
21947     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21948   %}
21949   ins_pipe( pipe_slow );
21950 %}
21951 
21952 instruct castStoX(vec dst, vec src) %{
21953   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21954             Matcher::vector_length(n->in(1)) <= 8 && // src
21955             Matcher::vector_element_basic_type(n) == T_BYTE);
21956   match(Set dst (VectorCastS2X src));
21957   format %{ "vector_cast_s2x $dst,$src" %}
21958   ins_encode %{
21959     assert(UseAVX > 0, "required");
21960 
21961     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21962     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21963   %}
21964   ins_pipe( pipe_slow );
21965 %}
21966 
21967 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21968   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21969             Matcher::vector_length(n->in(1)) == 16 && // src
21970             Matcher::vector_element_basic_type(n) == T_BYTE);
21971   effect(TEMP dst, TEMP vtmp);
21972   match(Set dst (VectorCastS2X src));
21973   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21974   ins_encode %{
21975     assert(UseAVX > 0, "required");
21976 
21977     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
21978     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21979     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
21980     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21981   %}
21982   ins_pipe( pipe_slow );
21983 %}
21984 
21985 instruct vcastStoX_evex(vec dst, vec src) %{
21986   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
21987             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21988   match(Set dst (VectorCastS2X src));
21989   format %{ "vector_cast_s2x $dst,$src\t!" %}
21990   ins_encode %{
21991     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21992     int src_vlen_enc = vector_length_encoding(this, $src);
21993     int vlen_enc = vector_length_encoding(this);
21994     switch (to_elem_bt) {
21995       case T_BYTE:
21996         if (!VM_Version::supports_avx512vl()) {
21997           vlen_enc = Assembler::AVX_512bit;
21998         }
21999         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22000         break;
22001       case T_INT:
22002         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22003         break;
22004       case T_FLOAT:
22005         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22006         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22007         break;
22008       case T_LONG:
22009         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22010         break;
22011       case T_DOUBLE: {
22012         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22013         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22014         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22015         break;
22016       }
22017       default:
22018         ShouldNotReachHere();
22019     }
22020   %}
22021   ins_pipe( pipe_slow );
22022 %}
22023 
22024 instruct castItoX(vec dst, vec src) %{
22025   predicate(UseAVX <= 2 &&
22026             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22027             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22028   match(Set dst (VectorCastI2X src));
22029   format %{ "vector_cast_i2x $dst,$src" %}
22030   ins_encode %{
22031     assert(UseAVX > 0, "required");
22032 
22033     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22034     int vlen_enc = vector_length_encoding(this, $src);
22035 
22036     if (to_elem_bt == T_BYTE) {
22037       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22038       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22039       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22040     } else {
22041       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22042       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22043       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22044     }
22045   %}
22046   ins_pipe( pipe_slow );
22047 %}
22048 
22049 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22050   predicate(UseAVX <= 2 &&
22051             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22052             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22053   match(Set dst (VectorCastI2X src));
22054   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22055   effect(TEMP dst, TEMP vtmp);
22056   ins_encode %{
22057     assert(UseAVX > 0, "required");
22058 
22059     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22060     int vlen_enc = vector_length_encoding(this, $src);
22061 
22062     if (to_elem_bt == T_BYTE) {
22063       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22064       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22065       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22066       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22067     } else {
22068       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22069       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22070       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22071       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22072     }
22073   %}
22074   ins_pipe( pipe_slow );
22075 %}
22076 
22077 instruct vcastItoX_evex(vec dst, vec src) %{
22078   predicate(UseAVX > 2 ||
22079             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22080   match(Set dst (VectorCastI2X src));
22081   format %{ "vector_cast_i2x $dst,$src\t!" %}
22082   ins_encode %{
22083     assert(UseAVX > 0, "required");
22084 
22085     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22086     int src_vlen_enc = vector_length_encoding(this, $src);
22087     int dst_vlen_enc = vector_length_encoding(this);
22088     switch (dst_elem_bt) {
22089       case T_BYTE:
22090         if (!VM_Version::supports_avx512vl()) {
22091           src_vlen_enc = Assembler::AVX_512bit;
22092         }
22093         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22094         break;
22095       case T_SHORT:
22096         if (!VM_Version::supports_avx512vl()) {
22097           src_vlen_enc = Assembler::AVX_512bit;
22098         }
22099         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22100         break;
22101       case T_FLOAT:
22102         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22103         break;
22104       case T_LONG:
22105         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22106         break;
22107       case T_DOUBLE:
22108         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22109         break;
22110       default:
22111         ShouldNotReachHere();
22112     }
22113   %}
22114   ins_pipe( pipe_slow );
22115 %}
22116 
22117 instruct vcastLtoBS(vec dst, vec src) %{
22118   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22119             UseAVX <= 2);
22120   match(Set dst (VectorCastL2X src));
22121   format %{ "vector_cast_l2x  $dst,$src" %}
22122   ins_encode %{
22123     assert(UseAVX > 0, "required");
22124 
22125     int vlen = Matcher::vector_length_in_bytes(this, $src);
22126     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
22127     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22128                                                       : ExternalAddress(vector_int_to_short_mask());
22129     if (vlen <= 16) {
22130       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22131       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22132       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22133     } else {
22134       assert(vlen <= 32, "required");
22135       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22136       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22137       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22138       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22139     }
22140     if (to_elem_bt == T_BYTE) {
22141       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22142     }
22143   %}
22144   ins_pipe( pipe_slow );
22145 %}
22146 
22147 instruct vcastLtoX_evex(vec dst, vec src) %{
22148   predicate(UseAVX > 2 ||
22149             (Matcher::vector_element_basic_type(n) == T_INT ||
22150              Matcher::vector_element_basic_type(n) == T_FLOAT ||
22151              Matcher::vector_element_basic_type(n) == T_DOUBLE));
22152   match(Set dst (VectorCastL2X src));
22153   format %{ "vector_cast_l2x  $dst,$src\t!" %}
22154   ins_encode %{
22155     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22156     int vlen = Matcher::vector_length_in_bytes(this, $src);
22157     int vlen_enc = vector_length_encoding(this, $src);
22158     switch (to_elem_bt) {
22159       case T_BYTE:
22160         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22161           vlen_enc = Assembler::AVX_512bit;
22162         }
22163         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22164         break;
22165       case T_SHORT:
22166         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22167           vlen_enc = Assembler::AVX_512bit;
22168         }
22169         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22170         break;
22171       case T_INT:
22172         if (vlen == 8) {
22173           if ($dst$$XMMRegister != $src$$XMMRegister) {
22174             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22175           }
22176         } else if (vlen == 16) {
22177           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22178         } else if (vlen == 32) {
22179           if (UseAVX > 2) {
22180             if (!VM_Version::supports_avx512vl()) {
22181               vlen_enc = Assembler::AVX_512bit;
22182             }
22183             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22184           } else {
22185             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22186             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22187           }
22188         } else { // vlen == 64
22189           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22190         }
22191         break;
22192       case T_FLOAT:
22193         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22194         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22195         break;
22196       case T_DOUBLE:
22197         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22198         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22199         break;
22200 
22201       default: assert(false, "%s", type2name(to_elem_bt));
22202     }
22203   %}
22204   ins_pipe( pipe_slow );
22205 %}
22206 
22207 instruct vcastFtoD_reg(vec dst, vec src) %{
22208   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22209   match(Set dst (VectorCastF2X src));
22210   format %{ "vector_cast_f2d  $dst,$src\t!" %}
22211   ins_encode %{
22212     int vlen_enc = vector_length_encoding(this);
22213     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22214   %}
22215   ins_pipe( pipe_slow );
22216 %}
22217 
22218 
22219 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22220   predicate(!VM_Version::supports_avx10_2() &&
22221             !VM_Version::supports_avx512vl() &&
22222             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22223             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22224             is_integral_type(Matcher::vector_element_basic_type(n)));
22225   match(Set dst (VectorCastF2X src));
22226   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22227   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22228   ins_encode %{
22229     int vlen_enc = vector_length_encoding(this, $src);
22230     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22231     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22232     // 32 bit addresses for register indirect addressing mode since stub constants
22233     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22234     // However, targets are free to increase this limit, but having a large code cache size
22235     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22236     // cap we save a temporary register allocation which in limiting case can prevent
22237     // spilling in high register pressure blocks.
22238     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22239                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22240                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22241   %}
22242   ins_pipe( pipe_slow );
22243 %}
22244 
22245 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22246   predicate(!VM_Version::supports_avx10_2() &&
22247             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22248             is_integral_type(Matcher::vector_element_basic_type(n)));
22249   match(Set dst (VectorCastF2X src));
22250   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22251   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22252   ins_encode %{
22253     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22254     if (to_elem_bt == T_LONG) {
22255       int vlen_enc = vector_length_encoding(this);
22256       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22257                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22258                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22259     } else {
22260       int vlen_enc = vector_length_encoding(this, $src);
22261       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22262                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22263                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22264     }
22265   %}
22266   ins_pipe( pipe_slow );
22267 %}
22268 
22269 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22270   predicate(VM_Version::supports_avx10_2() &&
22271             is_integral_type(Matcher::vector_element_basic_type(n)));
22272   match(Set dst (VectorCastF2X src));
22273   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22274   ins_encode %{
22275     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22276     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22277     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22278   %}
22279   ins_pipe( pipe_slow );
22280 %}
22281 
22282 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22283   predicate(VM_Version::supports_avx10_2() &&
22284             is_integral_type(Matcher::vector_element_basic_type(n)));
22285   match(Set dst (VectorCastF2X (LoadVector src)));
22286   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22287   ins_encode %{
22288     int vlen = Matcher::vector_length(this);
22289     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22290     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22291     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22292   %}
22293   ins_pipe( pipe_slow );
22294 %}
22295 
22296 instruct vcastDtoF_reg(vec dst, vec src) %{
22297   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22298   match(Set dst (VectorCastD2X src));
22299   format %{ "vector_cast_d2x  $dst,$src\t!" %}
22300   ins_encode %{
22301     int vlen_enc = vector_length_encoding(this, $src);
22302     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22303   %}
22304   ins_pipe( pipe_slow );
22305 %}
22306 
22307 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22308   predicate(!VM_Version::supports_avx10_2() &&
22309             !VM_Version::supports_avx512vl() &&
22310             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22311             is_integral_type(Matcher::vector_element_basic_type(n)));
22312   match(Set dst (VectorCastD2X src));
22313   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22314   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22315   ins_encode %{
22316     int vlen_enc = vector_length_encoding(this, $src);
22317     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22318     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22319                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22320                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22321   %}
22322   ins_pipe( pipe_slow );
22323 %}
22324 
22325 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22326   predicate(!VM_Version::supports_avx10_2() &&
22327             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22328             is_integral_type(Matcher::vector_element_basic_type(n)));
22329   match(Set dst (VectorCastD2X src));
22330   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22331   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22332   ins_encode %{
22333     int vlen_enc = vector_length_encoding(this, $src);
22334     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22335     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22336                               ExternalAddress(vector_float_signflip());
22337     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22338                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22339   %}
22340   ins_pipe( pipe_slow );
22341 %}
22342 
22343 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22344   predicate(VM_Version::supports_avx10_2() &&
22345             is_integral_type(Matcher::vector_element_basic_type(n)));
22346   match(Set dst (VectorCastD2X src));
22347   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22348   ins_encode %{
22349     int vlen_enc = vector_length_encoding(this, $src);
22350     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22351     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22352   %}
22353   ins_pipe( pipe_slow );
22354 %}
22355 
22356 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22357   predicate(VM_Version::supports_avx10_2() &&
22358             is_integral_type(Matcher::vector_element_basic_type(n)));
22359   match(Set dst (VectorCastD2X (LoadVector src)));
22360   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22361   ins_encode %{
22362     int vlen = Matcher::vector_length(this);
22363     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22364     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22365     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22366   %}
22367   ins_pipe( pipe_slow );
22368 %}
22369 
22370 instruct vucast(vec dst, vec src) %{
22371   match(Set dst (VectorUCastB2X src));
22372   match(Set dst (VectorUCastS2X src));
22373   match(Set dst (VectorUCastI2X src));
22374   format %{ "vector_ucast $dst,$src\t!" %}
22375   ins_encode %{
22376     assert(UseAVX > 0, "required");
22377 
22378     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22379     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22380     int vlen_enc = vector_length_encoding(this);
22381     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22382   %}
22383   ins_pipe( pipe_slow );
22384 %}
22385 
22386 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22387   predicate(!VM_Version::supports_avx512vl() &&
22388             Matcher::vector_length_in_bytes(n) < 64 &&
22389             Matcher::vector_element_basic_type(n) == T_INT);
22390   match(Set dst (RoundVF src));
22391   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22392   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22393   ins_encode %{
22394     int vlen_enc = vector_length_encoding(this);
22395     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22396     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22397                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22398                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22399   %}
22400   ins_pipe( pipe_slow );
22401 %}
22402 
22403 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22404   predicate((VM_Version::supports_avx512vl() ||
22405              Matcher::vector_length_in_bytes(n) == 64) &&
22406              Matcher::vector_element_basic_type(n) == T_INT);
22407   match(Set dst (RoundVF src));
22408   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22409   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22410   ins_encode %{
22411     int vlen_enc = vector_length_encoding(this);
22412     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22413     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22414                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22415                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22416   %}
22417   ins_pipe( pipe_slow );
22418 %}
22419 
22420 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22421   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22422   match(Set dst (RoundVD src));
22423   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
22424   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22425   ins_encode %{
22426     int vlen_enc = vector_length_encoding(this);
22427     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22428     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22429                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22430                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22431   %}
22432   ins_pipe( pipe_slow );
22433 %}
22434 
22435 // --------------------------------- VectorMaskCmp --------------------------------------
22436 
22437 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22438   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22439             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
22440             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22441             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22442   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22443   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22444   ins_encode %{
22445     int vlen_enc = vector_length_encoding(this, $src1);
22446     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22447     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22448       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22449     } else {
22450       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22451     }
22452   %}
22453   ins_pipe( pipe_slow );
22454 %}
22455 
22456 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22457   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22458             n->bottom_type()->isa_vectmask() == nullptr &&
22459             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22460   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22461   effect(TEMP ktmp);
22462   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22463   ins_encode %{
22464     int vlen_enc = Assembler::AVX_512bit;
22465     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22466     KRegister mask = k0; // The comparison itself is not being masked.
22467     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22468       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22469       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22470     } else {
22471       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22472       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22473     }
22474   %}
22475   ins_pipe( pipe_slow );
22476 %}
22477 
22478 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22479   predicate(n->bottom_type()->isa_vectmask() &&
22480             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22481   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22482   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22483   ins_encode %{
22484     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22485     int vlen_enc = vector_length_encoding(this, $src1);
22486     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22487     KRegister mask = k0; // The comparison itself is not being masked.
22488     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22489       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22490     } else {
22491       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22492     }
22493   %}
22494   ins_pipe( pipe_slow );
22495 %}
22496 
22497 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22498   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22499             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22500             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22501             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22502             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22503             (n->in(2)->get_int() == BoolTest::eq ||
22504              n->in(2)->get_int() == BoolTest::lt ||
22505              n->in(2)->get_int() == BoolTest::gt)); // cond
22506   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22507   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22508   ins_encode %{
22509     int vlen_enc = vector_length_encoding(this, $src1);
22510     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22511     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22512     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22513   %}
22514   ins_pipe( pipe_slow );
22515 %}
22516 
22517 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22518   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22519             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22520             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22521             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22522             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22523             (n->in(2)->get_int() == BoolTest::ne ||
22524              n->in(2)->get_int() == BoolTest::le ||
22525              n->in(2)->get_int() == BoolTest::ge)); // cond
22526   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22527   effect(TEMP dst, TEMP xtmp);
22528   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22529   ins_encode %{
22530     int vlen_enc = vector_length_encoding(this, $src1);
22531     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22532     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22533     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22534   %}
22535   ins_pipe( pipe_slow );
22536 %}
22537 
22538 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22539   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22540             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22541             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22542             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22543             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22544   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22545   effect(TEMP dst, TEMP xtmp);
22546   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22547   ins_encode %{
22548     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22549     int vlen_enc = vector_length_encoding(this, $src1);
22550     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22551     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22552 
22553     if (vlen_enc == Assembler::AVX_128bit) {
22554       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22555     } else {
22556       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22557     }
22558     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22559     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22560     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22561   %}
22562   ins_pipe( pipe_slow );
22563 %}
22564 
22565 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22566   predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22567              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22568              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22569   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22570   effect(TEMP ktmp);
22571   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22572   ins_encode %{
22573     assert(UseAVX > 2, "required");
22574 
22575     int vlen_enc = vector_length_encoding(this, $src1);
22576     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22577     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22578     KRegister mask = k0; // The comparison itself is not being masked.
22579     bool merge = false;
22580     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22581 
22582     switch (src1_elem_bt) {
22583       case T_INT: {
22584         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22585         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22586         break;
22587       }
22588       case T_LONG: {
22589         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22590         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22591         break;
22592       }
22593       default: assert(false, "%s", type2name(src1_elem_bt));
22594     }
22595   %}
22596   ins_pipe( pipe_slow );
22597 %}
22598 
22599 
22600 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22601   predicate(n->bottom_type()->isa_vectmask() &&
22602             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22603   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22604   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22605   ins_encode %{
22606     assert(UseAVX > 2, "required");
22607     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22608 
22609     int vlen_enc = vector_length_encoding(this, $src1);
22610     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22611     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22612     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22613 
22614     // Comparison i
22615     switch (src1_elem_bt) {
22616       case T_BYTE: {
22617         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22618         break;
22619       }
22620       case T_SHORT: {
22621         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22622         break;
22623       }
22624       case T_INT: {
22625         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22626         break;
22627       }
22628       case T_LONG: {
22629         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22630         break;
22631       }
22632       default: assert(false, "%s", type2name(src1_elem_bt));
22633     }
22634   %}
22635   ins_pipe( pipe_slow );
22636 %}
22637 
22638 // Extract
22639 
22640 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22641   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22642   match(Set dst (ExtractI src idx));
22643   match(Set dst (ExtractS src idx));
22644   match(Set dst (ExtractB src idx));
22645   format %{ "extractI $dst,$src,$idx\t!" %}
22646   ins_encode %{
22647     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22648 
22649     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22650     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22651   %}
22652   ins_pipe( pipe_slow );
22653 %}
22654 
22655 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22656   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22657             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
22658   match(Set dst (ExtractI src idx));
22659   match(Set dst (ExtractS src idx));
22660   match(Set dst (ExtractB src idx));
22661   effect(TEMP vtmp);
22662   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22663   ins_encode %{
22664     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22665 
22666     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22667     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22668     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22669   %}
22670   ins_pipe( pipe_slow );
22671 %}
22672 
22673 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22674   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22675   match(Set dst (ExtractL src idx));
22676   format %{ "extractL $dst,$src,$idx\t!" %}
22677   ins_encode %{
22678     assert(UseSSE >= 4, "required");
22679     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22680 
22681     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22682   %}
22683   ins_pipe( pipe_slow );
22684 %}
22685 
22686 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22687   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22688             Matcher::vector_length(n->in(1)) == 8);  // src
22689   match(Set dst (ExtractL src idx));
22690   effect(TEMP vtmp);
22691   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22692   ins_encode %{
22693     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22694 
22695     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22696     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22697   %}
22698   ins_pipe( pipe_slow );
22699 %}
22700 
22701 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22702   predicate(Matcher::vector_length(n->in(1)) <= 4);
22703   match(Set dst (ExtractF src idx));
22704   effect(TEMP dst, TEMP vtmp);
22705   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22706   ins_encode %{
22707     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22708 
22709     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22710   %}
22711   ins_pipe( pipe_slow );
22712 %}
22713 
22714 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22715   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22716             Matcher::vector_length(n->in(1)/*src*/) == 16);
22717   match(Set dst (ExtractF src idx));
22718   effect(TEMP vtmp);
22719   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22720   ins_encode %{
22721     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22722 
22723     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22724     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22725   %}
22726   ins_pipe( pipe_slow );
22727 %}
22728 
22729 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22730   predicate(Matcher::vector_length(n->in(1)) == 2); // src
22731   match(Set dst (ExtractD src idx));
22732   format %{ "extractD $dst,$src,$idx\t!" %}
22733   ins_encode %{
22734     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22735 
22736     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22737   %}
22738   ins_pipe( pipe_slow );
22739 %}
22740 
22741 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22742   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22743             Matcher::vector_length(n->in(1)) == 8);  // src
22744   match(Set dst (ExtractD src idx));
22745   effect(TEMP vtmp);
22746   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22747   ins_encode %{
22748     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22749 
22750     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22751     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22752   %}
22753   ins_pipe( pipe_slow );
22754 %}
22755 
22756 // --------------------------------- Vector Blend --------------------------------------
22757 
22758 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22759   predicate(UseAVX == 0);
22760   match(Set dst (VectorBlend (Binary dst src) mask));
22761   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
22762   effect(TEMP tmp);
22763   ins_encode %{
22764     assert(UseSSE >= 4, "required");
22765 
22766     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22767       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22768     }
22769     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22770   %}
22771   ins_pipe( pipe_slow );
22772 %}
22773 
22774 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22775   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22776             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22777             Matcher::vector_length_in_bytes(n) <= 32 &&
22778             is_integral_type(Matcher::vector_element_basic_type(n)));
22779   match(Set dst (VectorBlend (Binary src1 src2) mask));
22780   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22781   ins_encode %{
22782     int vlen_enc = vector_length_encoding(this);
22783     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22784   %}
22785   ins_pipe( pipe_slow );
22786 %}
22787 
22788 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22789   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22790             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22791             Matcher::vector_length_in_bytes(n) <= 32 &&
22792             !is_integral_type(Matcher::vector_element_basic_type(n)));
22793   match(Set dst (VectorBlend (Binary src1 src2) mask));
22794   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22795   ins_encode %{
22796     int vlen_enc = vector_length_encoding(this);
22797     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22798   %}
22799   ins_pipe( pipe_slow );
22800 %}
22801 
22802 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22803   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22804             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22805             Matcher::vector_length_in_bytes(n) <= 32);
22806   match(Set dst (VectorBlend (Binary src1 src2) mask));
22807   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22808   effect(TEMP vtmp, TEMP dst);
22809   ins_encode %{
22810     int vlen_enc = vector_length_encoding(this);
22811     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22812     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22813     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
22814   %}
22815   ins_pipe( pipe_slow );
22816 %}
22817 
22818 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22819   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22820             n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22821   match(Set dst (VectorBlend (Binary src1 src2) mask));
22822   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22823   effect(TEMP ktmp);
22824   ins_encode %{
22825      int vlen_enc = Assembler::AVX_512bit;
22826      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22827     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22828     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22829   %}
22830   ins_pipe( pipe_slow );
22831 %}
22832 
22833 
22834 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22835   predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22836             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22837              VM_Version::supports_avx512bw()));
22838   match(Set dst (VectorBlend (Binary src1 src2) mask));
22839   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22840   ins_encode %{
22841     int vlen_enc = vector_length_encoding(this);
22842     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22843     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22844   %}
22845   ins_pipe( pipe_slow );
22846 %}
22847 
22848 // --------------------------------- ABS --------------------------------------
22849 // a = |a|
22850 instruct vabsB_reg(vec dst, vec src) %{
22851   match(Set dst (AbsVB  src));
22852   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22853   ins_encode %{
22854     uint vlen = Matcher::vector_length(this);
22855     if (vlen <= 16) {
22856       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22857     } else {
22858       int vlen_enc = vector_length_encoding(this);
22859       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22860     }
22861   %}
22862   ins_pipe( pipe_slow );
22863 %}
22864 
22865 instruct vabsS_reg(vec dst, vec src) %{
22866   match(Set dst (AbsVS  src));
22867   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22868   ins_encode %{
22869     uint vlen = Matcher::vector_length(this);
22870     if (vlen <= 8) {
22871       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22872     } else {
22873       int vlen_enc = vector_length_encoding(this);
22874       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22875     }
22876   %}
22877   ins_pipe( pipe_slow );
22878 %}
22879 
22880 instruct vabsI_reg(vec dst, vec src) %{
22881   match(Set dst (AbsVI  src));
22882   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22883   ins_encode %{
22884     uint vlen = Matcher::vector_length(this);
22885     if (vlen <= 4) {
22886       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22887     } else {
22888       int vlen_enc = vector_length_encoding(this);
22889       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22890     }
22891   %}
22892   ins_pipe( pipe_slow );
22893 %}
22894 
22895 instruct vabsL_reg(vec dst, vec src) %{
22896   match(Set dst (AbsVL  src));
22897   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22898   ins_encode %{
22899     assert(UseAVX > 2, "required");
22900     int vlen_enc = vector_length_encoding(this);
22901     if (!VM_Version::supports_avx512vl()) {
22902       vlen_enc = Assembler::AVX_512bit;
22903     }
22904     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22905   %}
22906   ins_pipe( pipe_slow );
22907 %}
22908 
22909 // --------------------------------- ABSNEG --------------------------------------
22910 
22911 instruct vabsnegF(vec dst, vec src) %{
22912   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22913   match(Set dst (AbsVF src));
22914   match(Set dst (NegVF src));
22915   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22916   ins_cost(150);
22917   ins_encode %{
22918     int opcode = this->ideal_Opcode();
22919     int vlen = Matcher::vector_length(this);
22920     if (vlen == 2) {
22921       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22922     } else {
22923       assert(vlen == 8 || vlen == 16, "required");
22924       int vlen_enc = vector_length_encoding(this);
22925       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22926     }
22927   %}
22928   ins_pipe( pipe_slow );
22929 %}
22930 
22931 instruct vabsneg4F(vec dst) %{
22932   predicate(Matcher::vector_length(n) == 4);
22933   match(Set dst (AbsVF dst));
22934   match(Set dst (NegVF dst));
22935   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22936   ins_cost(150);
22937   ins_encode %{
22938     int opcode = this->ideal_Opcode();
22939     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22940   %}
22941   ins_pipe( pipe_slow );
22942 %}
22943 
22944 instruct vabsnegD(vec dst, vec src) %{
22945   match(Set dst (AbsVD  src));
22946   match(Set dst (NegVD  src));
22947   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22948   ins_encode %{
22949     int opcode = this->ideal_Opcode();
22950     uint vlen = Matcher::vector_length(this);
22951     if (vlen == 2) {
22952       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22953     } else {
22954       int vlen_enc = vector_length_encoding(this);
22955       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22956     }
22957   %}
22958   ins_pipe( pipe_slow );
22959 %}
22960 
22961 //------------------------------------- VectorTest --------------------------------------------
22962 
22963 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22964   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22965   match(Set cr (VectorTest src1 src2));
22966   effect(TEMP vtmp);
22967   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
22968   ins_encode %{
22969     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22970     int vlen = Matcher::vector_length_in_bytes(this, $src1);
22971     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22972   %}
22973   ins_pipe( pipe_slow );
22974 %}
22975 
22976 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
22977   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
22978   match(Set cr (VectorTest src1 src2));
22979   format %{ "vptest_ge16  $src1, $src2\n\t" %}
22980   ins_encode %{
22981     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22982     int vlen = Matcher::vector_length_in_bytes(this, $src1);
22983     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
22984   %}
22985   ins_pipe( pipe_slow );
22986 %}
22987 
22988 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22989   predicate((Matcher::vector_length(n->in(1)) < 8 ||
22990              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22991             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
22992   match(Set cr (VectorTest src1 src2));
22993   effect(TEMP tmp);
22994   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
22995   ins_encode %{
22996     uint masklen = Matcher::vector_length(this, $src1);
22997     __ kmovwl($tmp$$Register, $src1$$KRegister);
22998     __ andl($tmp$$Register, (1 << masklen) - 1);
22999     __ cmpl($tmp$$Register, (1 << masklen) - 1);
23000   %}
23001   ins_pipe( pipe_slow );
23002 %}
23003 
23004 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23005   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23006              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23007             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23008   match(Set cr (VectorTest src1 src2));
23009   effect(TEMP tmp);
23010   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23011   ins_encode %{
23012     uint masklen = Matcher::vector_length(this, $src1);
23013     __ kmovwl($tmp$$Register, $src1$$KRegister);
23014     __ andl($tmp$$Register, (1 << masklen) - 1);
23015   %}
23016   ins_pipe( pipe_slow );
23017 %}
23018 
23019 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23020   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23021             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23022   match(Set cr (VectorTest src1 src2));
23023   format %{ "ktest_ge8  $src1, $src2\n\t" %}
23024   ins_encode %{
23025     uint masklen = Matcher::vector_length(this, $src1);
23026     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23027   %}
23028   ins_pipe( pipe_slow );
23029 %}
23030 
23031 //------------------------------------- LoadMask --------------------------------------------
23032 
23033 instruct loadMask(legVec dst, legVec src) %{
23034   predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23035   match(Set dst (VectorLoadMask src));
23036   effect(TEMP dst);
23037   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23038   ins_encode %{
23039     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23040     BasicType elem_bt = Matcher::vector_element_basic_type(this);
23041     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23042   %}
23043   ins_pipe( pipe_slow );
23044 %}
23045 
23046 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23047   predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23048   match(Set dst (VectorLoadMask src));
23049   effect(TEMP xtmp);
23050   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23051   ins_encode %{
23052     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23053                         true, Assembler::AVX_512bit);
23054   %}
23055   ins_pipe( pipe_slow );
23056 %}
23057 
23058 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
23059   predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23060   match(Set dst (VectorLoadMask src));
23061   effect(TEMP xtmp);
23062   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23063   ins_encode %{
23064     int vlen_enc = vector_length_encoding(in(1));
23065     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23066                         false, vlen_enc);
23067   %}
23068   ins_pipe( pipe_slow );
23069 %}
23070 
23071 //------------------------------------- StoreMask --------------------------------------------
23072 
23073 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23074   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23075   match(Set dst (VectorStoreMask src size));
23076   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23077   ins_encode %{
23078     int vlen = Matcher::vector_length(this);
23079     if (vlen <= 16 && UseAVX <= 2) {
23080       assert(UseSSE >= 3, "required");
23081       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23082     } else {
23083       assert(UseAVX > 0, "required");
23084       int src_vlen_enc = vector_length_encoding(this, $src);
23085       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23086     }
23087   %}
23088   ins_pipe( pipe_slow );
23089 %}
23090 
23091 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23092   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23093   match(Set dst (VectorStoreMask src size));
23094   effect(TEMP_DEF dst, TEMP xtmp);
23095   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23096   ins_encode %{
23097     int vlen_enc = Assembler::AVX_128bit;
23098     int vlen = Matcher::vector_length(this);
23099     if (vlen <= 8) {
23100       assert(UseSSE >= 3, "required");
23101       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23102       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23103       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23104     } else {
23105       assert(UseAVX > 0, "required");
23106       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23107       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23108       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23109     }
23110   %}
23111   ins_pipe( pipe_slow );
23112 %}
23113 
23114 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23115   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23116   match(Set dst (VectorStoreMask src size));
23117   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23118   effect(TEMP_DEF dst, TEMP xtmp);
23119   ins_encode %{
23120     int vlen_enc = Assembler::AVX_128bit;
23121     int vlen = Matcher::vector_length(this);
23122     if (vlen <= 4) {
23123       assert(UseSSE >= 3, "required");
23124       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23125       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23126       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23127       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23128     } else {
23129       assert(UseAVX > 0, "required");
23130       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23131       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23132       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23133       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23134       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23135     }
23136   %}
23137   ins_pipe( pipe_slow );
23138 %}
23139 
23140 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23141   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23142   match(Set dst (VectorStoreMask src size));
23143   effect(TEMP_DEF dst, TEMP xtmp);
23144   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23145   ins_encode %{
23146     assert(UseSSE >= 3, "required");
23147     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23148     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23149     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23150     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23151     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23152   %}
23153   ins_pipe( pipe_slow );
23154 %}
23155 
23156 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23157   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23158   match(Set dst (VectorStoreMask src size));
23159   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23160   effect(TEMP_DEF dst, TEMP vtmp);
23161   ins_encode %{
23162     int vlen_enc = Assembler::AVX_128bit;
23163     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23164     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23165     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23166     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23167     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23168     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23169     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23170   %}
23171   ins_pipe( pipe_slow );
23172 %}
23173 
23174 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23175   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23176   match(Set dst (VectorStoreMask src size));
23177   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23178   ins_encode %{
23179     int src_vlen_enc = vector_length_encoding(this, $src);
23180     int dst_vlen_enc = vector_length_encoding(this);
23181     if (!VM_Version::supports_avx512vl()) {
23182       src_vlen_enc = Assembler::AVX_512bit;
23183     }
23184     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23185     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23186   %}
23187   ins_pipe( pipe_slow );
23188 %}
23189 
23190 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23191   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23192   match(Set dst (VectorStoreMask src size));
23193   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23194   ins_encode %{
23195     int src_vlen_enc = vector_length_encoding(this, $src);
23196     int dst_vlen_enc = vector_length_encoding(this);
23197     if (!VM_Version::supports_avx512vl()) {
23198       src_vlen_enc = Assembler::AVX_512bit;
23199     }
23200     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23201     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23202   %}
23203   ins_pipe( pipe_slow );
23204 %}
23205 
23206 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23207   predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23208   match(Set dst (VectorStoreMask mask size));
23209   effect(TEMP_DEF dst);
23210   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23211   ins_encode %{
23212     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23213     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23214                  false, Assembler::AVX_512bit, noreg);
23215     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23216   %}
23217   ins_pipe( pipe_slow );
23218 %}
23219 
23220 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23221   predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23222   match(Set dst (VectorStoreMask mask size));
23223   effect(TEMP_DEF dst);
23224   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23225   ins_encode %{
23226     int dst_vlen_enc = vector_length_encoding(this);
23227     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23228     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23229   %}
23230   ins_pipe( pipe_slow );
23231 %}
23232 
23233 instruct vmaskcast_evex(kReg dst) %{
23234   match(Set dst (VectorMaskCast dst));
23235   ins_cost(0);
23236   format %{ "vector_mask_cast $dst" %}
23237   ins_encode %{
23238     // empty
23239   %}
23240   ins_pipe(empty);
23241 %}
23242 
23243 instruct vmaskcast(vec dst) %{
23244   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23245   match(Set dst (VectorMaskCast dst));
23246   ins_cost(0);
23247   format %{ "vector_mask_cast $dst" %}
23248   ins_encode %{
23249     // empty
23250   %}
23251   ins_pipe(empty);
23252 %}
23253 
23254 instruct vmaskcast_avx(vec dst, vec src) %{
23255   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23256   match(Set dst (VectorMaskCast src));
23257   format %{ "vector_mask_cast $dst, $src" %}
23258   ins_encode %{
23259     int vlen = Matcher::vector_length(this);
23260     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23261     BasicType dst_bt = Matcher::vector_element_basic_type(this);
23262     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23263   %}
23264   ins_pipe(pipe_slow);
23265 %}
23266 
23267 //-------------------------------- Load Iota Indices ----------------------------------
23268 
23269 instruct loadIotaIndices(vec dst, immI_0 src) %{
23270   match(Set dst (VectorLoadConst src));
23271   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23272   ins_encode %{
23273      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23274      BasicType bt = Matcher::vector_element_basic_type(this);
23275      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23276   %}
23277   ins_pipe( pipe_slow );
23278 %}
23279 
23280 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23281   match(Set dst (PopulateIndex src1 src2));
23282   effect(TEMP dst, TEMP vtmp);
23283   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23284   ins_encode %{
23285      assert($src2$$constant == 1, "required");
23286      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23287      int vlen_enc = vector_length_encoding(this);
23288      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23289      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23290      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23291      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23292   %}
23293   ins_pipe( pipe_slow );
23294 %}
23295 
23296 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23297   match(Set dst (PopulateIndex src1 src2));
23298   effect(TEMP dst, TEMP vtmp);
23299   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23300   ins_encode %{
23301      assert($src2$$constant == 1, "required");
23302      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23303      int vlen_enc = vector_length_encoding(this);
23304      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23305      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23306      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23307      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23308   %}
23309   ins_pipe( pipe_slow );
23310 %}
23311 
23312 //-------------------------------- Rearrange ----------------------------------
23313 
23314 // LoadShuffle/Rearrange for Byte
23315 instruct rearrangeB(vec dst, vec shuffle) %{
23316   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23317             Matcher::vector_length(n) < 32);
23318   match(Set dst (VectorRearrange dst shuffle));
23319   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23320   ins_encode %{
23321     assert(UseSSE >= 4, "required");
23322     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23323   %}
23324   ins_pipe( pipe_slow );
23325 %}
23326 
23327 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23328   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23329             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23330   match(Set dst (VectorRearrange src shuffle));
23331   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23332   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23333   ins_encode %{
23334     assert(UseAVX >= 2, "required");
23335     // Swap src into vtmp1
23336     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23337     // Shuffle swapped src to get entries from other 128 bit lane
23338     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23339     // Shuffle original src to get entries from self 128 bit lane
23340     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23341     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23342     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23343     // Perform the blend
23344     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23345   %}
23346   ins_pipe( pipe_slow );
23347 %}
23348 
23349 
23350 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23351   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23352             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23353   match(Set dst (VectorRearrange src shuffle));
23354   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23355   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23356   ins_encode %{
23357     int vlen_enc = vector_length_encoding(this);
23358     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23359                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23360                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23361   %}
23362   ins_pipe( pipe_slow );
23363 %}
23364 
23365 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23366   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23367             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23368   match(Set dst (VectorRearrange src shuffle));
23369   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23370   ins_encode %{
23371     int vlen_enc = vector_length_encoding(this);
23372     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23373   %}
23374   ins_pipe( pipe_slow );
23375 %}
23376 
23377 // LoadShuffle/Rearrange for Short
23378 
23379 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23380   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23381             !VM_Version::supports_avx512bw());
23382   match(Set dst (VectorLoadShuffle src));
23383   effect(TEMP dst, TEMP vtmp);
23384   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23385   ins_encode %{
23386     // Create a byte shuffle mask from short shuffle mask
23387     // only byte shuffle instruction available on these platforms
23388     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23389     if (UseAVX == 0) {
23390       assert(vlen_in_bytes <= 16, "required");
23391       // Multiply each shuffle by two to get byte index
23392       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23393       __ psllw($vtmp$$XMMRegister, 1);
23394 
23395       // Duplicate to create 2 copies of byte index
23396       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23397       __ psllw($dst$$XMMRegister, 8);
23398       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23399 
23400       // Add one to get alternate byte index
23401       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23402       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23403     } else {
23404       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23405       int vlen_enc = vector_length_encoding(this);
23406       // Multiply each shuffle by two to get byte index
23407       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23408 
23409       // Duplicate to create 2 copies of byte index
23410       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
23411       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23412 
23413       // Add one to get alternate byte index
23414       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23415     }
23416   %}
23417   ins_pipe( pipe_slow );
23418 %}
23419 
23420 instruct rearrangeS(vec dst, vec shuffle) %{
23421   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23422             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23423   match(Set dst (VectorRearrange dst shuffle));
23424   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23425   ins_encode %{
23426     assert(UseSSE >= 4, "required");
23427     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23428   %}
23429   ins_pipe( pipe_slow );
23430 %}
23431 
23432 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23433   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23434             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23435   match(Set dst (VectorRearrange src shuffle));
23436   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23437   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23438   ins_encode %{
23439     assert(UseAVX >= 2, "required");
23440     // Swap src into vtmp1
23441     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23442     // Shuffle swapped src to get entries from other 128 bit lane
23443     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23444     // Shuffle original src to get entries from self 128 bit lane
23445     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23446     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23447     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23448     // Perform the blend
23449     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23450   %}
23451   ins_pipe( pipe_slow );
23452 %}
23453 
23454 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23455   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23456             VM_Version::supports_avx512bw());
23457   match(Set dst (VectorRearrange src shuffle));
23458   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23459   ins_encode %{
23460     int vlen_enc = vector_length_encoding(this);
23461     if (!VM_Version::supports_avx512vl()) {
23462       vlen_enc = Assembler::AVX_512bit;
23463     }
23464     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23465   %}
23466   ins_pipe( pipe_slow );
23467 %}
23468 
23469 // LoadShuffle/Rearrange for Integer and Float
23470 
23471 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23472   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23473             Matcher::vector_length(n) == 4 && UseAVX == 0);
23474   match(Set dst (VectorLoadShuffle src));
23475   effect(TEMP dst, TEMP vtmp);
23476   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23477   ins_encode %{
23478     assert(UseSSE >= 4, "required");
23479 
23480     // Create a byte shuffle mask from int shuffle mask
23481     // only byte shuffle instruction available on these platforms
23482 
23483     // Duplicate and multiply each shuffle by 4
23484     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23485     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23486     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23487     __ psllw($vtmp$$XMMRegister, 2);
23488 
23489     // Duplicate again to create 4 copies of byte index
23490     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23491     __ psllw($dst$$XMMRegister, 8);
23492     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23493 
23494     // Add 3,2,1,0 to get alternate byte index
23495     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23496     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23497   %}
23498   ins_pipe( pipe_slow );
23499 %}
23500 
23501 instruct rearrangeI(vec dst, vec shuffle) %{
23502   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23503             UseAVX == 0);
23504   match(Set dst (VectorRearrange dst shuffle));
23505   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23506   ins_encode %{
23507     assert(UseSSE >= 4, "required");
23508     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23509   %}
23510   ins_pipe( pipe_slow );
23511 %}
23512 
23513 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23514   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23515             UseAVX > 0);
23516   match(Set dst (VectorRearrange src shuffle));
23517   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23518   ins_encode %{
23519     int vlen_enc = vector_length_encoding(this);
23520     BasicType bt = Matcher::vector_element_basic_type(this);
23521     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23522   %}
23523   ins_pipe( pipe_slow );
23524 %}
23525 
23526 // LoadShuffle/Rearrange for Long and Double
23527 
23528 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23529   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23530             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23531   match(Set dst (VectorLoadShuffle src));
23532   effect(TEMP dst, TEMP vtmp);
23533   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23534   ins_encode %{
23535     assert(UseAVX >= 2, "required");
23536 
23537     int vlen_enc = vector_length_encoding(this);
23538     // Create a double word shuffle mask from long shuffle mask
23539     // only double word shuffle instruction available on these platforms
23540 
23541     // Multiply each shuffle by two to get double word index
23542     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23543 
23544     // Duplicate each double word shuffle
23545     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23546     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23547 
23548     // Add one to get alternate double word index
23549     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23550   %}
23551   ins_pipe( pipe_slow );
23552 %}
23553 
23554 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23555   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23556             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23557   match(Set dst (VectorRearrange src shuffle));
23558   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23559   ins_encode %{
23560     assert(UseAVX >= 2, "required");
23561 
23562     int vlen_enc = vector_length_encoding(this);
23563     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23564   %}
23565   ins_pipe( pipe_slow );
23566 %}
23567 
23568 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23569   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23570             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23571   match(Set dst (VectorRearrange src shuffle));
23572   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23573   ins_encode %{
23574     assert(UseAVX > 2, "required");
23575 
23576     int vlen_enc = vector_length_encoding(this);
23577     if (vlen_enc == Assembler::AVX_128bit) {
23578       vlen_enc = Assembler::AVX_256bit;
23579     }
23580     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23581   %}
23582   ins_pipe( pipe_slow );
23583 %}
23584 
23585 // --------------------------------- FMA --------------------------------------
23586 // a * b + c
23587 
23588 instruct vfmaF_reg(vec a, vec b, vec c) %{
23589   match(Set c (FmaVF  c (Binary a b)));
23590   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23591   ins_cost(150);
23592   ins_encode %{
23593     assert(UseFMA, "not enabled");
23594     int vlen_enc = vector_length_encoding(this);
23595     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23596   %}
23597   ins_pipe( pipe_slow );
23598 %}
23599 
23600 instruct vfmaF_mem(vec a, memory b, vec c) %{
23601   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23602   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23603   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23604   ins_cost(150);
23605   ins_encode %{
23606     assert(UseFMA, "not enabled");
23607     int vlen_enc = vector_length_encoding(this);
23608     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23609   %}
23610   ins_pipe( pipe_slow );
23611 %}
23612 
23613 instruct vfmaD_reg(vec a, vec b, vec c) %{
23614   match(Set c (FmaVD  c (Binary a b)));
23615   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23616   ins_cost(150);
23617   ins_encode %{
23618     assert(UseFMA, "not enabled");
23619     int vlen_enc = vector_length_encoding(this);
23620     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23621   %}
23622   ins_pipe( pipe_slow );
23623 %}
23624 
23625 instruct vfmaD_mem(vec a, memory b, vec c) %{
23626   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23627   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23628   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23629   ins_cost(150);
23630   ins_encode %{
23631     assert(UseFMA, "not enabled");
23632     int vlen_enc = vector_length_encoding(this);
23633     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23634   %}
23635   ins_pipe( pipe_slow );
23636 %}
23637 
23638 // --------------------------------- Vector Multiply Add --------------------------------------
23639 
23640 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23641   predicate(UseAVX == 0);
23642   match(Set dst (MulAddVS2VI dst src1));
23643   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23644   ins_encode %{
23645     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23646   %}
23647   ins_pipe( pipe_slow );
23648 %}
23649 
23650 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23651   predicate(UseAVX > 0);
23652   match(Set dst (MulAddVS2VI src1 src2));
23653   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23654   ins_encode %{
23655     int vlen_enc = vector_length_encoding(this);
23656     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23657   %}
23658   ins_pipe( pipe_slow );
23659 %}
23660 
23661 // --------------------------------- Vector Multiply Add Add ----------------------------------
23662 
23663 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23664   predicate(VM_Version::supports_avx512_vnni());
23665   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23666   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23667   ins_encode %{
23668     assert(UseAVX > 2, "required");
23669     int vlen_enc = vector_length_encoding(this);
23670     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23671   %}
23672   ins_pipe( pipe_slow );
23673   ins_cost(10);
23674 %}
23675 
23676 // --------------------------------- PopCount --------------------------------------
23677 
23678 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23679   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23680   match(Set dst (PopCountVI src));
23681   match(Set dst (PopCountVL src));
23682   format %{ "vector_popcount_integral $dst, $src" %}
23683   ins_encode %{
23684     int opcode = this->ideal_Opcode();
23685     int vlen_enc = vector_length_encoding(this, $src);
23686     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23687     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23688   %}
23689   ins_pipe( pipe_slow );
23690 %}
23691 
23692 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23693   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23694   match(Set dst (PopCountVI src mask));
23695   match(Set dst (PopCountVL src mask));
23696   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23697   ins_encode %{
23698     int vlen_enc = vector_length_encoding(this, $src);
23699     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23700     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23701     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23702   %}
23703   ins_pipe( pipe_slow );
23704 %}
23705 
23706 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23707   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23708   match(Set dst (PopCountVI src));
23709   match(Set dst (PopCountVL src));
23710   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23711   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23712   ins_encode %{
23713     int opcode = this->ideal_Opcode();
23714     int vlen_enc = vector_length_encoding(this, $src);
23715     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23716     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23717                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23718   %}
23719   ins_pipe( pipe_slow );
23720 %}
23721 
23722 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23723 
23724 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23725   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23726                                               Matcher::vector_length_in_bytes(n->in(1))));
23727   match(Set dst (CountTrailingZerosV src));
23728   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23729   ins_cost(400);
23730   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23731   ins_encode %{
23732     int vlen_enc = vector_length_encoding(this, $src);
23733     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23734     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23735                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23736   %}
23737   ins_pipe( pipe_slow );
23738 %}
23739 
23740 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23741   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23742             VM_Version::supports_avx512cd() &&
23743             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23744   match(Set dst (CountTrailingZerosV src));
23745   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23746   ins_cost(400);
23747   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23748   ins_encode %{
23749     int vlen_enc = vector_length_encoding(this, $src);
23750     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23751     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23752                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23753   %}
23754   ins_pipe( pipe_slow );
23755 %}
23756 
23757 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23758   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23759   match(Set dst (CountTrailingZerosV src));
23760   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23761   ins_cost(400);
23762   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23763   ins_encode %{
23764     int vlen_enc = vector_length_encoding(this, $src);
23765     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23766     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23767                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23768                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23769   %}
23770   ins_pipe( pipe_slow );
23771 %}
23772 
23773 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23774   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23775   match(Set dst (CountTrailingZerosV src));
23776   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23777   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23778   ins_encode %{
23779     int vlen_enc = vector_length_encoding(this, $src);
23780     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23781     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23782                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23783   %}
23784   ins_pipe( pipe_slow );
23785 %}
23786 
23787 
23788 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23789 
23790 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23791   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23792   effect(TEMP dst);
23793   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23794   ins_encode %{
23795     int vector_len = vector_length_encoding(this);
23796     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23797   %}
23798   ins_pipe( pipe_slow );
23799 %}
23800 
23801 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23802   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23803   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23804   effect(TEMP dst);
23805   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23806   ins_encode %{
23807     int vector_len = vector_length_encoding(this);
23808     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23809   %}
23810   ins_pipe( pipe_slow );
23811 %}
23812 
23813 // --------------------------------- Rotation Operations ----------------------------------
23814 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23815   match(Set dst (RotateLeftV src shift));
23816   match(Set dst (RotateRightV src shift));
23817   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23818   ins_encode %{
23819     int opcode      = this->ideal_Opcode();
23820     int vector_len  = vector_length_encoding(this);
23821     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23822     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23823   %}
23824   ins_pipe( pipe_slow );
23825 %}
23826 
23827 instruct vprorate(vec dst, vec src, vec shift) %{
23828   match(Set dst (RotateLeftV src shift));
23829   match(Set dst (RotateRightV src shift));
23830   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23831   ins_encode %{
23832     int opcode      = this->ideal_Opcode();
23833     int vector_len  = vector_length_encoding(this);
23834     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23835     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23836   %}
23837   ins_pipe( pipe_slow );
23838 %}
23839 
23840 // ---------------------------------- Masked Operations ------------------------------------
23841 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23842   predicate(!n->in(3)->bottom_type()->isa_vectmask());
23843   match(Set dst (LoadVectorMasked mem mask));
23844   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23845   ins_encode %{
23846     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23847     int vlen_enc = vector_length_encoding(this);
23848     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23849   %}
23850   ins_pipe( pipe_slow );
23851 %}
23852 
23853 
23854 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23855   predicate(n->in(3)->bottom_type()->isa_vectmask());
23856   match(Set dst (LoadVectorMasked mem mask));
23857   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23858   ins_encode %{
23859     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
23860     int vector_len = vector_length_encoding(this);
23861     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23862   %}
23863   ins_pipe( pipe_slow );
23864 %}
23865 
23866 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23867   predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23868   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23869   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23870   ins_encode %{
23871     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23872     int vlen_enc = vector_length_encoding(src_node);
23873     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23874     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23875   %}
23876   ins_pipe( pipe_slow );
23877 %}
23878 
23879 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23880   predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23881   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23882   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23883   ins_encode %{
23884     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23885     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23886     int vlen_enc = vector_length_encoding(src_node);
23887     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23888   %}
23889   ins_pipe( pipe_slow );
23890 %}
23891 
23892 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23893   match(Set addr (VerifyVectorAlignment addr mask));
23894   effect(KILL cr);
23895   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23896   ins_encode %{
23897     Label Lskip;
23898     // check if masked bits of addr are zero
23899     __ testq($addr$$Register, $mask$$constant);
23900     __ jccb(Assembler::equal, Lskip);
23901     __ stop("verify_vector_alignment found a misaligned vector memory access");
23902     __ bind(Lskip);
23903   %}
23904   ins_pipe(pipe_slow);
23905 %}
23906 
23907 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23908   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23909   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23910   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23911   ins_encode %{
23912     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23913     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23914 
23915     Label DONE;
23916     int vlen_enc = vector_length_encoding(this, $src1);
23917     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23918 
23919     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23920     __ mov64($dst$$Register, -1L);
23921     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23922     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23923     __ jccb(Assembler::carrySet, DONE);
23924     __ kmovql($dst$$Register, $ktmp1$$KRegister);
23925     __ notq($dst$$Register);
23926     __ tzcntq($dst$$Register, $dst$$Register);
23927     __ bind(DONE);
23928   %}
23929   ins_pipe( pipe_slow );
23930 %}
23931 
23932 
23933 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23934   match(Set dst (VectorMaskGen len));
23935   effect(TEMP temp, KILL cr);
23936   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23937   ins_encode %{
23938     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23939   %}
23940   ins_pipe( pipe_slow );
23941 %}
23942 
23943 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23944   match(Set dst (VectorMaskGen len));
23945   format %{ "vector_mask_gen $len \t! vector mask generator" %}
23946   effect(TEMP temp);
23947   ins_encode %{
23948     __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
23949     __ kmovql($dst$$KRegister, $temp$$Register);
23950   %}
23951   ins_pipe( pipe_slow );
23952 %}
23953 
23954 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23955   predicate(n->in(1)->bottom_type()->isa_vectmask());
23956   match(Set dst (VectorMaskToLong mask));
23957   effect(TEMP dst, KILL cr);
23958   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23959   ins_encode %{
23960     int opcode = this->ideal_Opcode();
23961     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23962     int mask_len = Matcher::vector_length(this, $mask);
23963     int mask_size = mask_len * type2aelembytes(mbt);
23964     int vlen_enc = vector_length_encoding(this, $mask);
23965     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23966                              $dst$$Register, mask_len, mask_size, vlen_enc);
23967   %}
23968   ins_pipe( pipe_slow );
23969 %}
23970 
23971 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23972   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23973   match(Set dst (VectorMaskToLong mask));
23974   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23975   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23976   ins_encode %{
23977     int opcode = this->ideal_Opcode();
23978     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23979     int mask_len = Matcher::vector_length(this, $mask);
23980     int vlen_enc = vector_length_encoding(this, $mask);
23981     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23982                              $dst$$Register, mask_len, mbt, vlen_enc);
23983   %}
23984   ins_pipe( pipe_slow );
23985 %}
23986 
23987 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
23988   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23989   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
23990   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
23991   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23992   ins_encode %{
23993     int opcode = this->ideal_Opcode();
23994     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23995     int mask_len = Matcher::vector_length(this, $mask);
23996     int vlen_enc = vector_length_encoding(this, $mask);
23997     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23998                              $dst$$Register, mask_len, mbt, vlen_enc);
23999   %}
24000   ins_pipe( pipe_slow );
24001 %}
24002 
24003 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24004   predicate(n->in(1)->bottom_type()->isa_vectmask());
24005   match(Set dst (VectorMaskTrueCount mask));
24006   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24007   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24008   ins_encode %{
24009     int opcode = this->ideal_Opcode();
24010     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24011     int mask_len = Matcher::vector_length(this, $mask);
24012     int mask_size = mask_len * type2aelembytes(mbt);
24013     int vlen_enc = vector_length_encoding(this, $mask);
24014     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24015                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24016   %}
24017   ins_pipe( pipe_slow );
24018 %}
24019 
24020 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24021   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24022   match(Set dst (VectorMaskTrueCount mask));
24023   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24024   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24025   ins_encode %{
24026     int opcode = this->ideal_Opcode();
24027     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24028     int mask_len = Matcher::vector_length(this, $mask);
24029     int vlen_enc = vector_length_encoding(this, $mask);
24030     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24031                              $tmp$$Register, mask_len, mbt, vlen_enc);
24032   %}
24033   ins_pipe( pipe_slow );
24034 %}
24035 
24036 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24037   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24038   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24039   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24040   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24041   ins_encode %{
24042     int opcode = this->ideal_Opcode();
24043     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24044     int mask_len = Matcher::vector_length(this, $mask);
24045     int vlen_enc = vector_length_encoding(this, $mask);
24046     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24047                              $tmp$$Register, mask_len, mbt, vlen_enc);
24048   %}
24049   ins_pipe( pipe_slow );
24050 %}
24051 
24052 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24053   predicate(n->in(1)->bottom_type()->isa_vectmask());
24054   match(Set dst (VectorMaskFirstTrue mask));
24055   match(Set dst (VectorMaskLastTrue mask));
24056   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24057   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24058   ins_encode %{
24059     int opcode = this->ideal_Opcode();
24060     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24061     int mask_len = Matcher::vector_length(this, $mask);
24062     int mask_size = mask_len * type2aelembytes(mbt);
24063     int vlen_enc = vector_length_encoding(this, $mask);
24064     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24065                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24066   %}
24067   ins_pipe( pipe_slow );
24068 %}
24069 
24070 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24071   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24072   match(Set dst (VectorMaskFirstTrue mask));
24073   match(Set dst (VectorMaskLastTrue mask));
24074   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24075   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24076   ins_encode %{
24077     int opcode = this->ideal_Opcode();
24078     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24079     int mask_len = Matcher::vector_length(this, $mask);
24080     int vlen_enc = vector_length_encoding(this, $mask);
24081     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24082                              $tmp$$Register, mask_len, mbt, vlen_enc);
24083   %}
24084   ins_pipe( pipe_slow );
24085 %}
24086 
24087 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24088   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24089   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24090   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24091   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24092   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24093   ins_encode %{
24094     int opcode = this->ideal_Opcode();
24095     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24096     int mask_len = Matcher::vector_length(this, $mask);
24097     int vlen_enc = vector_length_encoding(this, $mask);
24098     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24099                              $tmp$$Register, mask_len, mbt, vlen_enc);
24100   %}
24101   ins_pipe( pipe_slow );
24102 %}
24103 
24104 // --------------------------------- Compress/Expand Operations ---------------------------
24105 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24106   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24107   match(Set dst (CompressV src mask));
24108   match(Set dst (ExpandV src mask));
24109   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24110   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24111   ins_encode %{
24112     int opcode = this->ideal_Opcode();
24113     int vlen_enc = vector_length_encoding(this);
24114     BasicType bt  = Matcher::vector_element_basic_type(this);
24115     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24116                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24117   %}
24118   ins_pipe( pipe_slow );
24119 %}
24120 
24121 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24122   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24123   match(Set dst (CompressV src mask));
24124   match(Set dst (ExpandV src mask));
24125   format %{ "vector_compress_expand $dst, $src, $mask" %}
24126   ins_encode %{
24127     int opcode = this->ideal_Opcode();
24128     int vector_len = vector_length_encoding(this);
24129     BasicType bt  = Matcher::vector_element_basic_type(this);
24130     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24131   %}
24132   ins_pipe( pipe_slow );
24133 %}
24134 
24135 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24136   match(Set dst (CompressM mask));
24137   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24138   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24139   ins_encode %{
24140     assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24141     int mask_len = Matcher::vector_length(this);
24142     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24143   %}
24144   ins_pipe( pipe_slow );
24145 %}
24146 
24147 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24148 
24149 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24150   predicate(!VM_Version::supports_gfni());
24151   match(Set dst (ReverseV src));
24152   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24153   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24154   ins_encode %{
24155     int vec_enc = vector_length_encoding(this);
24156     BasicType bt = Matcher::vector_element_basic_type(this);
24157     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24158                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24159   %}
24160   ins_pipe( pipe_slow );
24161 %}
24162 
24163 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24164   predicate(VM_Version::supports_gfni());
24165   match(Set dst (ReverseV src));
24166   effect(TEMP dst, TEMP xtmp);
24167   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24168   ins_encode %{
24169     int vec_enc = vector_length_encoding(this);
24170     BasicType bt  = Matcher::vector_element_basic_type(this);
24171     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24172     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24173                                $xtmp$$XMMRegister);
24174   %}
24175   ins_pipe( pipe_slow );
24176 %}
24177 
24178 instruct vreverse_byte_reg(vec dst, vec src) %{
24179   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24180   match(Set dst (ReverseBytesV src));
24181   effect(TEMP dst);
24182   format %{ "vector_reverse_byte $dst, $src" %}
24183   ins_encode %{
24184     int vec_enc = vector_length_encoding(this);
24185     BasicType bt = Matcher::vector_element_basic_type(this);
24186     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24187   %}
24188   ins_pipe( pipe_slow );
24189 %}
24190 
24191 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24192   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24193   match(Set dst (ReverseBytesV src));
24194   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24195   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24196   ins_encode %{
24197     int vec_enc = vector_length_encoding(this);
24198     BasicType bt = Matcher::vector_element_basic_type(this);
24199     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24200                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24201   %}
24202   ins_pipe( pipe_slow );
24203 %}
24204 
24205 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24206 
24207 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24208   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24209                                               Matcher::vector_length_in_bytes(n->in(1))));
24210   match(Set dst (CountLeadingZerosV src));
24211   format %{ "vector_count_leading_zeros $dst, $src" %}
24212   ins_encode %{
24213      int vlen_enc = vector_length_encoding(this, $src);
24214      BasicType bt = Matcher::vector_element_basic_type(this, $src);
24215      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24216                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24217   %}
24218   ins_pipe( pipe_slow );
24219 %}
24220 
24221 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24222   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24223                                               Matcher::vector_length_in_bytes(n->in(1))));
24224   match(Set dst (CountLeadingZerosV src mask));
24225   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24226   ins_encode %{
24227     int vlen_enc = vector_length_encoding(this, $src);
24228     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24229     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24230     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24231                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24232   %}
24233   ins_pipe( pipe_slow );
24234 %}
24235 
24236 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24237   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24238             VM_Version::supports_avx512cd() &&
24239             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24240   match(Set dst (CountLeadingZerosV src));
24241   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24242   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24243   ins_encode %{
24244     int vlen_enc = vector_length_encoding(this, $src);
24245     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24246     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24247                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24248   %}
24249   ins_pipe( pipe_slow );
24250 %}
24251 
24252 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24253   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24254   match(Set dst (CountLeadingZerosV src));
24255   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24256   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24257   ins_encode %{
24258     int vlen_enc = vector_length_encoding(this, $src);
24259     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24260     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24261                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24262                                        $rtmp$$Register, true, vlen_enc);
24263   %}
24264   ins_pipe( pipe_slow );
24265 %}
24266 
24267 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24268   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24269             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24270   match(Set dst (CountLeadingZerosV src));
24271   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24272   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24273   ins_encode %{
24274     int vlen_enc = vector_length_encoding(this, $src);
24275     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24276     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24277                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24278   %}
24279   ins_pipe( pipe_slow );
24280 %}
24281 
24282 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24283   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24284             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24285   match(Set dst (CountLeadingZerosV src));
24286   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24287   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24288   ins_encode %{
24289     int vlen_enc = vector_length_encoding(this, $src);
24290     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24291     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24292                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24293   %}
24294   ins_pipe( pipe_slow );
24295 %}
24296 
24297 // ---------------------------------- Vector Masked Operations ------------------------------------
24298 
24299 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24300   match(Set dst (AddVB (Binary dst src2) mask));
24301   match(Set dst (AddVS (Binary dst src2) mask));
24302   match(Set dst (AddVI (Binary dst src2) mask));
24303   match(Set dst (AddVL (Binary dst src2) mask));
24304   match(Set dst (AddVF (Binary dst src2) mask));
24305   match(Set dst (AddVD (Binary dst src2) mask));
24306   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24307   ins_encode %{
24308     int vlen_enc = vector_length_encoding(this);
24309     BasicType bt = Matcher::vector_element_basic_type(this);
24310     int opc = this->ideal_Opcode();
24311     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24312                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24313   %}
24314   ins_pipe( pipe_slow );
24315 %}
24316 
24317 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24318   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24319   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24320   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24321   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24322   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24323   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24324   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24325   ins_encode %{
24326     int vlen_enc = vector_length_encoding(this);
24327     BasicType bt = Matcher::vector_element_basic_type(this);
24328     int opc = this->ideal_Opcode();
24329     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24330                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24331   %}
24332   ins_pipe( pipe_slow );
24333 %}
24334 
24335 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24336   match(Set dst (XorV (Binary dst src2) mask));
24337   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24338   ins_encode %{
24339     int vlen_enc = vector_length_encoding(this);
24340     BasicType bt = Matcher::vector_element_basic_type(this);
24341     int opc = this->ideal_Opcode();
24342     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24343                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24344   %}
24345   ins_pipe( pipe_slow );
24346 %}
24347 
24348 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24349   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24350   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24351   ins_encode %{
24352     int vlen_enc = vector_length_encoding(this);
24353     BasicType bt = Matcher::vector_element_basic_type(this);
24354     int opc = this->ideal_Opcode();
24355     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24356                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24357   %}
24358   ins_pipe( pipe_slow );
24359 %}
24360 
24361 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24362   match(Set dst (OrV (Binary dst src2) mask));
24363   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24364   ins_encode %{
24365     int vlen_enc = vector_length_encoding(this);
24366     BasicType bt = Matcher::vector_element_basic_type(this);
24367     int opc = this->ideal_Opcode();
24368     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24369                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24370   %}
24371   ins_pipe( pipe_slow );
24372 %}
24373 
24374 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24375   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24376   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24377   ins_encode %{
24378     int vlen_enc = vector_length_encoding(this);
24379     BasicType bt = Matcher::vector_element_basic_type(this);
24380     int opc = this->ideal_Opcode();
24381     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24382                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24383   %}
24384   ins_pipe( pipe_slow );
24385 %}
24386 
24387 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24388   match(Set dst (AndV (Binary dst src2) mask));
24389   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24390   ins_encode %{
24391     int vlen_enc = vector_length_encoding(this);
24392     BasicType bt = Matcher::vector_element_basic_type(this);
24393     int opc = this->ideal_Opcode();
24394     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24395                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24396   %}
24397   ins_pipe( pipe_slow );
24398 %}
24399 
24400 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24401   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24402   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24403   ins_encode %{
24404     int vlen_enc = vector_length_encoding(this);
24405     BasicType bt = Matcher::vector_element_basic_type(this);
24406     int opc = this->ideal_Opcode();
24407     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24408                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24409   %}
24410   ins_pipe( pipe_slow );
24411 %}
24412 
24413 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24414   match(Set dst (SubVB (Binary dst src2) mask));
24415   match(Set dst (SubVS (Binary dst src2) mask));
24416   match(Set dst (SubVI (Binary dst src2) mask));
24417   match(Set dst (SubVL (Binary dst src2) mask));
24418   match(Set dst (SubVF (Binary dst src2) mask));
24419   match(Set dst (SubVD (Binary dst src2) mask));
24420   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24421   ins_encode %{
24422     int vlen_enc = vector_length_encoding(this);
24423     BasicType bt = Matcher::vector_element_basic_type(this);
24424     int opc = this->ideal_Opcode();
24425     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24426                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24427   %}
24428   ins_pipe( pipe_slow );
24429 %}
24430 
24431 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24432   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24433   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24434   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24435   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24436   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24437   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24438   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24439   ins_encode %{
24440     int vlen_enc = vector_length_encoding(this);
24441     BasicType bt = Matcher::vector_element_basic_type(this);
24442     int opc = this->ideal_Opcode();
24443     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24444                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24445   %}
24446   ins_pipe( pipe_slow );
24447 %}
24448 
24449 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24450   match(Set dst (MulVS (Binary dst src2) mask));
24451   match(Set dst (MulVI (Binary dst src2) mask));
24452   match(Set dst (MulVL (Binary dst src2) mask));
24453   match(Set dst (MulVF (Binary dst src2) mask));
24454   match(Set dst (MulVD (Binary dst src2) mask));
24455   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24456   ins_encode %{
24457     int vlen_enc = vector_length_encoding(this);
24458     BasicType bt = Matcher::vector_element_basic_type(this);
24459     int opc = this->ideal_Opcode();
24460     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24461                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24462   %}
24463   ins_pipe( pipe_slow );
24464 %}
24465 
24466 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24467   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24468   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24469   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24470   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24471   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24472   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24473   ins_encode %{
24474     int vlen_enc = vector_length_encoding(this);
24475     BasicType bt = Matcher::vector_element_basic_type(this);
24476     int opc = this->ideal_Opcode();
24477     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24478                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24479   %}
24480   ins_pipe( pipe_slow );
24481 %}
24482 
24483 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24484   match(Set dst (SqrtVF dst mask));
24485   match(Set dst (SqrtVD dst mask));
24486   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24487   ins_encode %{
24488     int vlen_enc = vector_length_encoding(this);
24489     BasicType bt = Matcher::vector_element_basic_type(this);
24490     int opc = this->ideal_Opcode();
24491     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24492                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24493   %}
24494   ins_pipe( pipe_slow );
24495 %}
24496 
24497 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24498   match(Set dst (DivVF (Binary dst src2) mask));
24499   match(Set dst (DivVD (Binary dst src2) mask));
24500   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24501   ins_encode %{
24502     int vlen_enc = vector_length_encoding(this);
24503     BasicType bt = Matcher::vector_element_basic_type(this);
24504     int opc = this->ideal_Opcode();
24505     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24506                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24507   %}
24508   ins_pipe( pipe_slow );
24509 %}
24510 
24511 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24512   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24513   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24514   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24515   ins_encode %{
24516     int vlen_enc = vector_length_encoding(this);
24517     BasicType bt = Matcher::vector_element_basic_type(this);
24518     int opc = this->ideal_Opcode();
24519     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24520                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24521   %}
24522   ins_pipe( pipe_slow );
24523 %}
24524 
24525 
24526 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24527   match(Set dst (RotateLeftV (Binary dst shift) mask));
24528   match(Set dst (RotateRightV (Binary dst shift) mask));
24529   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24530   ins_encode %{
24531     int vlen_enc = vector_length_encoding(this);
24532     BasicType bt = Matcher::vector_element_basic_type(this);
24533     int opc = this->ideal_Opcode();
24534     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24535                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24536   %}
24537   ins_pipe( pipe_slow );
24538 %}
24539 
24540 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24541   match(Set dst (RotateLeftV (Binary dst src2) mask));
24542   match(Set dst (RotateRightV (Binary dst src2) mask));
24543   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24544   ins_encode %{
24545     int vlen_enc = vector_length_encoding(this);
24546     BasicType bt = Matcher::vector_element_basic_type(this);
24547     int opc = this->ideal_Opcode();
24548     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24549                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24550   %}
24551   ins_pipe( pipe_slow );
24552 %}
24553 
24554 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24555   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24556   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24557   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24558   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24559   ins_encode %{
24560     int vlen_enc = vector_length_encoding(this);
24561     BasicType bt = Matcher::vector_element_basic_type(this);
24562     int opc = this->ideal_Opcode();
24563     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24564                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24565   %}
24566   ins_pipe( pipe_slow );
24567 %}
24568 
24569 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24570   predicate(!n->as_ShiftV()->is_var_shift());
24571   match(Set dst (LShiftVS (Binary dst src2) mask));
24572   match(Set dst (LShiftVI (Binary dst src2) mask));
24573   match(Set dst (LShiftVL (Binary dst src2) mask));
24574   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24575   ins_encode %{
24576     int vlen_enc = vector_length_encoding(this);
24577     BasicType bt = Matcher::vector_element_basic_type(this);
24578     int opc = this->ideal_Opcode();
24579     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24580                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24581   %}
24582   ins_pipe( pipe_slow );
24583 %}
24584 
24585 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24586   predicate(n->as_ShiftV()->is_var_shift());
24587   match(Set dst (LShiftVS (Binary dst src2) mask));
24588   match(Set dst (LShiftVI (Binary dst src2) mask));
24589   match(Set dst (LShiftVL (Binary dst src2) mask));
24590   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24591   ins_encode %{
24592     int vlen_enc = vector_length_encoding(this);
24593     BasicType bt = Matcher::vector_element_basic_type(this);
24594     int opc = this->ideal_Opcode();
24595     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24596                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24597   %}
24598   ins_pipe( pipe_slow );
24599 %}
24600 
24601 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24602   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24603   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24604   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24605   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24606   ins_encode %{
24607     int vlen_enc = vector_length_encoding(this);
24608     BasicType bt = Matcher::vector_element_basic_type(this);
24609     int opc = this->ideal_Opcode();
24610     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24611                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24612   %}
24613   ins_pipe( pipe_slow );
24614 %}
24615 
24616 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24617   predicate(!n->as_ShiftV()->is_var_shift());
24618   match(Set dst (RShiftVS (Binary dst src2) mask));
24619   match(Set dst (RShiftVI (Binary dst src2) mask));
24620   match(Set dst (RShiftVL (Binary dst src2) mask));
24621   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24622   ins_encode %{
24623     int vlen_enc = vector_length_encoding(this);
24624     BasicType bt = Matcher::vector_element_basic_type(this);
24625     int opc = this->ideal_Opcode();
24626     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24627                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24628   %}
24629   ins_pipe( pipe_slow );
24630 %}
24631 
24632 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24633   predicate(n->as_ShiftV()->is_var_shift());
24634   match(Set dst (RShiftVS (Binary dst src2) mask));
24635   match(Set dst (RShiftVI (Binary dst src2) mask));
24636   match(Set dst (RShiftVL (Binary dst src2) mask));
24637   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24638   ins_encode %{
24639     int vlen_enc = vector_length_encoding(this);
24640     BasicType bt = Matcher::vector_element_basic_type(this);
24641     int opc = this->ideal_Opcode();
24642     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24643                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24644   %}
24645   ins_pipe( pipe_slow );
24646 %}
24647 
24648 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24649   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24650   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24651   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24652   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24653   ins_encode %{
24654     int vlen_enc = vector_length_encoding(this);
24655     BasicType bt = Matcher::vector_element_basic_type(this);
24656     int opc = this->ideal_Opcode();
24657     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24658                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24659   %}
24660   ins_pipe( pipe_slow );
24661 %}
24662 
24663 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24664   predicate(!n->as_ShiftV()->is_var_shift());
24665   match(Set dst (URShiftVS (Binary dst src2) mask));
24666   match(Set dst (URShiftVI (Binary dst src2) mask));
24667   match(Set dst (URShiftVL (Binary dst src2) mask));
24668   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24669   ins_encode %{
24670     int vlen_enc = vector_length_encoding(this);
24671     BasicType bt = Matcher::vector_element_basic_type(this);
24672     int opc = this->ideal_Opcode();
24673     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24674                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24675   %}
24676   ins_pipe( pipe_slow );
24677 %}
24678 
24679 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24680   predicate(n->as_ShiftV()->is_var_shift());
24681   match(Set dst (URShiftVS (Binary dst src2) mask));
24682   match(Set dst (URShiftVI (Binary dst src2) mask));
24683   match(Set dst (URShiftVL (Binary dst src2) mask));
24684   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24685   ins_encode %{
24686     int vlen_enc = vector_length_encoding(this);
24687     BasicType bt = Matcher::vector_element_basic_type(this);
24688     int opc = this->ideal_Opcode();
24689     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24690                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24691   %}
24692   ins_pipe( pipe_slow );
24693 %}
24694 
24695 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24696   match(Set dst (MaxV (Binary dst src2) mask));
24697   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24698   ins_encode %{
24699     int vlen_enc = vector_length_encoding(this);
24700     BasicType bt = Matcher::vector_element_basic_type(this);
24701     int opc = this->ideal_Opcode();
24702     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24703                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24704   %}
24705   ins_pipe( pipe_slow );
24706 %}
24707 
24708 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24709   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24710   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24711   ins_encode %{
24712     int vlen_enc = vector_length_encoding(this);
24713     BasicType bt = Matcher::vector_element_basic_type(this);
24714     int opc = this->ideal_Opcode();
24715     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24716                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24717   %}
24718   ins_pipe( pipe_slow );
24719 %}
24720 
24721 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24722   match(Set dst (MinV (Binary dst src2) mask));
24723   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24724   ins_encode %{
24725     int vlen_enc = vector_length_encoding(this);
24726     BasicType bt = Matcher::vector_element_basic_type(this);
24727     int opc = this->ideal_Opcode();
24728     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24729                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24730   %}
24731   ins_pipe( pipe_slow );
24732 %}
24733 
24734 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24735   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24736   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24737   ins_encode %{
24738     int vlen_enc = vector_length_encoding(this);
24739     BasicType bt = Matcher::vector_element_basic_type(this);
24740     int opc = this->ideal_Opcode();
24741     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24742                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24743   %}
24744   ins_pipe( pipe_slow );
24745 %}
24746 
24747 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24748   match(Set dst (VectorRearrange (Binary dst src2) mask));
24749   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24750   ins_encode %{
24751     int vlen_enc = vector_length_encoding(this);
24752     BasicType bt = Matcher::vector_element_basic_type(this);
24753     int opc = this->ideal_Opcode();
24754     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24755                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24756   %}
24757   ins_pipe( pipe_slow );
24758 %}
24759 
24760 instruct vabs_masked(vec dst, kReg mask) %{
24761   match(Set dst (AbsVB dst mask));
24762   match(Set dst (AbsVS dst mask));
24763   match(Set dst (AbsVI dst mask));
24764   match(Set dst (AbsVL dst mask));
24765   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24766   ins_encode %{
24767     int vlen_enc = vector_length_encoding(this);
24768     BasicType bt = Matcher::vector_element_basic_type(this);
24769     int opc = this->ideal_Opcode();
24770     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24771                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24772   %}
24773   ins_pipe( pipe_slow );
24774 %}
24775 
24776 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24777   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24778   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24779   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24780   ins_encode %{
24781     assert(UseFMA, "Needs FMA instructions support.");
24782     int vlen_enc = vector_length_encoding(this);
24783     BasicType bt = Matcher::vector_element_basic_type(this);
24784     int opc = this->ideal_Opcode();
24785     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24786                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24787   %}
24788   ins_pipe( pipe_slow );
24789 %}
24790 
24791 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24792   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24793   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24794   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24795   ins_encode %{
24796     assert(UseFMA, "Needs FMA instructions support.");
24797     int vlen_enc = vector_length_encoding(this);
24798     BasicType bt = Matcher::vector_element_basic_type(this);
24799     int opc = this->ideal_Opcode();
24800     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24801                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24802   %}
24803   ins_pipe( pipe_slow );
24804 %}
24805 
24806 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24807   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24808   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24809   ins_encode %{
24810     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24811     int vlen_enc = vector_length_encoding(this, $src1);
24812     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24813 
24814     // Comparison i
24815     switch (src1_elem_bt) {
24816       case T_BYTE: {
24817         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24818         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24819         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24820         break;
24821       }
24822       case T_SHORT: {
24823         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24824         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24825         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24826         break;
24827       }
24828       case T_INT: {
24829         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24830         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24831         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24832         break;
24833       }
24834       case T_LONG: {
24835         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24836         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24837         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24838         break;
24839       }
24840       case T_FLOAT: {
24841         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24842         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24843         break;
24844       }
24845       case T_DOUBLE: {
24846         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24847         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24848         break;
24849       }
24850       default: assert(false, "%s", type2name(src1_elem_bt)); break;
24851     }
24852   %}
24853   ins_pipe( pipe_slow );
24854 %}
24855 
24856 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24857   predicate(Matcher::vector_length(n) <= 32);
24858   match(Set dst (MaskAll src));
24859   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24860   ins_encode %{
24861     int mask_len = Matcher::vector_length(this);
24862     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24863   %}
24864   ins_pipe( pipe_slow );
24865 %}
24866 
24867 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24868   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24869   match(Set dst (XorVMask src (MaskAll cnt)));
24870   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24871   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24872   ins_encode %{
24873     uint masklen = Matcher::vector_length(this);
24874     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24875   %}
24876   ins_pipe( pipe_slow );
24877 %}
24878 
24879 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24880   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24881             (Matcher::vector_length(n) == 16) ||
24882             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24883   match(Set dst (XorVMask src (MaskAll cnt)));
24884   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24885   ins_encode %{
24886     uint masklen = Matcher::vector_length(this);
24887     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24888   %}
24889   ins_pipe( pipe_slow );
24890 %}
24891 
24892 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24893   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24894   match(Set dst (VectorLongToMask src));
24895   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24896   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24897   ins_encode %{
24898     int mask_len = Matcher::vector_length(this);
24899     int vec_enc  = vector_length_encoding(mask_len);
24900     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24901                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24902   %}
24903   ins_pipe( pipe_slow );
24904 %}
24905 
24906 
24907 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24908   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24909   match(Set dst (VectorLongToMask src));
24910   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24911   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24912   ins_encode %{
24913     int mask_len = Matcher::vector_length(this);
24914     assert(mask_len <= 32, "invalid mask length");
24915     int vec_enc  = vector_length_encoding(mask_len);
24916     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24917                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24918   %}
24919   ins_pipe( pipe_slow );
24920 %}
24921 
24922 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24923   predicate(n->bottom_type()->isa_vectmask());
24924   match(Set dst (VectorLongToMask src));
24925   format %{ "long_to_mask_evex $dst, $src\t!" %}
24926   ins_encode %{
24927     __ kmov($dst$$KRegister, $src$$Register);
24928   %}
24929   ins_pipe( pipe_slow );
24930 %}
24931 
24932 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24933   match(Set dst (AndVMask src1 src2));
24934   match(Set dst (OrVMask src1 src2));
24935   match(Set dst (XorVMask src1 src2));
24936   effect(TEMP kscratch);
24937   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24938   ins_encode %{
24939     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24940     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24941     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24942     uint masklen = Matcher::vector_length(this);
24943     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24944     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24945   %}
24946   ins_pipe( pipe_slow );
24947 %}
24948 
24949 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24950   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24951   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24952   ins_encode %{
24953     int vlen_enc = vector_length_encoding(this);
24954     BasicType bt = Matcher::vector_element_basic_type(this);
24955     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24956                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24957   %}
24958   ins_pipe( pipe_slow );
24959 %}
24960 
24961 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24962   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24963   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24964   ins_encode %{
24965     int vlen_enc = vector_length_encoding(this);
24966     BasicType bt = Matcher::vector_element_basic_type(this);
24967     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24968                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24969   %}
24970   ins_pipe( pipe_slow );
24971 %}
24972 
24973 instruct castMM(kReg dst)
24974 %{
24975   match(Set dst (CastVV dst));
24976 
24977   size(0);
24978   format %{ "# castVV of $dst" %}
24979   ins_encode(/* empty encoding */);
24980   ins_cost(0);
24981   ins_pipe(empty);
24982 %}
24983 
24984 instruct castVV(vec dst)
24985 %{
24986   match(Set dst (CastVV dst));
24987 
24988   size(0);
24989   format %{ "# castVV of $dst" %}
24990   ins_encode(/* empty encoding */);
24991   ins_cost(0);
24992   ins_pipe(empty);
24993 %}
24994 
24995 instruct castVVLeg(legVec dst)
24996 %{
24997   match(Set dst (CastVV dst));
24998 
24999   size(0);
25000   format %{ "# castVV of $dst" %}
25001   ins_encode(/* empty encoding */);
25002   ins_cost(0);
25003   ins_pipe(empty);
25004 %}
25005 
25006 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25007 %{
25008   match(Set dst (IsInfiniteF src));
25009   effect(TEMP ktmp, KILL cr);
25010   format %{ "float_class_check $dst, $src" %}
25011   ins_encode %{
25012     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25013     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25014   %}
25015   ins_pipe(pipe_slow);
25016 %}
25017 
25018 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25019 %{
25020   match(Set dst (IsInfiniteD src));
25021   effect(TEMP ktmp, KILL cr);
25022   format %{ "double_class_check $dst, $src" %}
25023   ins_encode %{
25024     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25025     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25026   %}
25027   ins_pipe(pipe_slow);
25028 %}
25029 
25030 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25031 %{
25032   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25033             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25034   match(Set dst (SaturatingAddV src1 src2));
25035   match(Set dst (SaturatingSubV src1 src2));
25036   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25037   ins_encode %{
25038     int vlen_enc = vector_length_encoding(this);
25039     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25040     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25041                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25042   %}
25043   ins_pipe(pipe_slow);
25044 %}
25045 
25046 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25047 %{
25048   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25049             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25050   match(Set dst (SaturatingAddV src1 src2));
25051   match(Set dst (SaturatingSubV src1 src2));
25052   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25053   ins_encode %{
25054     int vlen_enc = vector_length_encoding(this);
25055     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25056     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25057                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25058   %}
25059   ins_pipe(pipe_slow);
25060 %}
25061 
25062 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25063 %{
25064   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25065             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25066             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25067   match(Set dst (SaturatingAddV src1 src2));
25068   match(Set dst (SaturatingSubV src1 src2));
25069   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25070   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25071   ins_encode %{
25072     int vlen_enc = vector_length_encoding(this);
25073     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25074     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25075                                         $src1$$XMMRegister, $src2$$XMMRegister,
25076                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25077                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25078   %}
25079   ins_pipe(pipe_slow);
25080 %}
25081 
25082 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25083 %{
25084   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25085             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25086             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25087   match(Set dst (SaturatingAddV src1 src2));
25088   match(Set dst (SaturatingSubV src1 src2));
25089   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25090   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25091   ins_encode %{
25092     int vlen_enc = vector_length_encoding(this);
25093     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25094     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25095                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25096                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25097   %}
25098   ins_pipe(pipe_slow);
25099 %}
25100 
25101 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25102 %{
25103   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25104             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25105             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25106   match(Set dst (SaturatingAddV src1 src2));
25107   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25108   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25109   ins_encode %{
25110     int vlen_enc = vector_length_encoding(this);
25111     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25112     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25113                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25114   %}
25115   ins_pipe(pipe_slow);
25116 %}
25117 
25118 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25119 %{
25120   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25121             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25122             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25123   match(Set dst (SaturatingAddV src1 src2));
25124   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25125   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25126   ins_encode %{
25127     int vlen_enc = vector_length_encoding(this);
25128     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25129     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25130                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25131   %}
25132   ins_pipe(pipe_slow);
25133 %}
25134 
25135 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25136 %{
25137   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25138             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25139             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25140   match(Set dst (SaturatingSubV src1 src2));
25141   effect(TEMP ktmp);
25142   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25143   ins_encode %{
25144     int vlen_enc = vector_length_encoding(this);
25145     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25146     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25147                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25148   %}
25149   ins_pipe(pipe_slow);
25150 %}
25151 
25152 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25153 %{
25154   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25155             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25156             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25157   match(Set dst (SaturatingSubV src1 src2));
25158   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25159   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25160   ins_encode %{
25161     int vlen_enc = vector_length_encoding(this);
25162     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25163     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25164                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25165   %}
25166   ins_pipe(pipe_slow);
25167 %}
25168 
25169 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25170 %{
25171   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25172             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25173   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25174   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25175   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25176   ins_encode %{
25177     int vlen_enc = vector_length_encoding(this);
25178     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25179     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25180                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25181   %}
25182   ins_pipe(pipe_slow);
25183 %}
25184 
25185 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25186 %{
25187   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25188             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25189   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25190   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25191   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25192   ins_encode %{
25193     int vlen_enc = vector_length_encoding(this);
25194     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25195     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25196                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25197   %}
25198   ins_pipe(pipe_slow);
25199 %}
25200 
25201 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25202   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25203             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25204   match(Set dst (SaturatingAddV (Binary dst src) mask));
25205   match(Set dst (SaturatingSubV (Binary dst src) mask));
25206   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25207   ins_encode %{
25208     int vlen_enc = vector_length_encoding(this);
25209     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25210     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25211                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25212   %}
25213   ins_pipe( pipe_slow );
25214 %}
25215 
25216 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25217   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25218             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25219   match(Set dst (SaturatingAddV (Binary dst src) mask));
25220   match(Set dst (SaturatingSubV (Binary dst src) mask));
25221   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25222   ins_encode %{
25223     int vlen_enc = vector_length_encoding(this);
25224     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25225     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25226                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25227   %}
25228   ins_pipe( pipe_slow );
25229 %}
25230 
25231 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25232   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25233             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25234   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25235   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25236   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25237   ins_encode %{
25238     int vlen_enc = vector_length_encoding(this);
25239     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25240     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25241                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25242   %}
25243   ins_pipe( pipe_slow );
25244 %}
25245 
25246 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25247   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25248             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25249   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25250   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25251   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25252   ins_encode %{
25253     int vlen_enc = vector_length_encoding(this);
25254     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25255     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25256                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25257   %}
25258   ins_pipe( pipe_slow );
25259 %}
25260 
25261 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25262 %{
25263   match(Set index (SelectFromTwoVector (Binary index src1) src2));
25264   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25265   ins_encode %{
25266     int vlen_enc = vector_length_encoding(this);
25267     BasicType bt = Matcher::vector_element_basic_type(this);
25268     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25269   %}
25270   ins_pipe(pipe_slow);
25271 %}
25272 
25273 instruct reinterpretS2HF(regF dst, rRegI src)
25274 %{
25275   match(Set dst (ReinterpretS2HF src));
25276   format %{ "vmovw $dst, $src" %}
25277   ins_encode %{
25278     __ vmovw($dst$$XMMRegister, $src$$Register);
25279   %}
25280   ins_pipe(pipe_slow);
25281 %}
25282 
25283 instruct reinterpretHF2S(rRegI dst, regF src)
25284 %{
25285   match(Set dst (ReinterpretHF2S src));
25286   format %{ "vmovw $dst, $src" %}
25287   ins_encode %{
25288     __ vmovw($dst$$Register, $src$$XMMRegister);
25289   %}
25290   ins_pipe(pipe_slow);
25291 %}
25292 
25293 instruct convF2HFAndS2HF(regF dst, regF src)
25294 %{
25295   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25296   format %{ "convF2HFAndS2HF $dst, $src" %}
25297   ins_encode %{
25298     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25299   %}
25300   ins_pipe(pipe_slow);
25301 %}
25302 
25303 instruct convHF2SAndHF2F(regF dst, regF src)
25304 %{
25305   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25306   format %{ "convHF2SAndHF2F $dst, $src" %}
25307   ins_encode %{
25308     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25309   %}
25310   ins_pipe(pipe_slow);
25311 %}
25312 
25313 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25314 %{
25315   match(Set dst (SqrtHF src));
25316   format %{ "scalar_sqrt_fp16 $dst, $src" %}
25317   ins_encode %{
25318     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25319   %}
25320   ins_pipe(pipe_slow);
25321 %}
25322 
25323 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25324 %{
25325   match(Set dst (AddHF src1 src2));
25326   match(Set dst (DivHF src1 src2));
25327   match(Set dst (MulHF src1 src2));
25328   match(Set dst (SubHF src1 src2));
25329   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25330   ins_encode %{
25331     int opcode = this->ideal_Opcode();
25332     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25333   %}
25334   ins_pipe(pipe_slow);
25335 %}
25336 
25337 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25338 %{
25339   predicate(VM_Version::supports_avx10_2());
25340   match(Set dst (MaxHF src1 src2));
25341   match(Set dst (MinHF src1 src2));
25342   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25343   ins_encode %{
25344     int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25345     __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25346   %}
25347   ins_pipe( pipe_slow );
25348 %}
25349 
25350 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25351 %{
25352   predicate(!VM_Version::supports_avx10_2());
25353   match(Set dst (MaxHF src1 src2));
25354   match(Set dst (MinHF src1 src2));
25355   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25356   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25357   ins_encode %{
25358     int opcode = this->ideal_Opcode();
25359     __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25360                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25361   %}
25362   ins_pipe( pipe_slow );
25363 %}
25364 
25365 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25366 %{
25367   match(Set dst (FmaHF  src2 (Binary dst src1)));
25368   effect(DEF dst);
25369   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25370   ins_encode %{
25371     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25372   %}
25373   ins_pipe( pipe_slow );
25374 %}
25375 
25376 
25377 instruct vector_sqrt_HF_reg(vec dst, vec src)
25378 %{
25379   match(Set dst (SqrtVHF src));
25380   format %{ "vector_sqrt_fp16 $dst, $src" %}
25381   ins_encode %{
25382     int vlen_enc = vector_length_encoding(this);
25383     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25384   %}
25385   ins_pipe(pipe_slow);
25386 %}
25387 
25388 instruct vector_sqrt_HF_mem(vec dst, memory src)
25389 %{
25390   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25391   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25392   ins_encode %{
25393     int vlen_enc = vector_length_encoding(this);
25394     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25395   %}
25396   ins_pipe(pipe_slow);
25397 %}
25398 
25399 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25400 %{
25401   match(Set dst (AddVHF src1 src2));
25402   match(Set dst (DivVHF src1 src2));
25403   match(Set dst (MulVHF src1 src2));
25404   match(Set dst (SubVHF src1 src2));
25405   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25406   ins_encode %{
25407     int vlen_enc = vector_length_encoding(this);
25408     int opcode = this->ideal_Opcode();
25409     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25410   %}
25411   ins_pipe(pipe_slow);
25412 %}
25413 
25414 
25415 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25416 %{
25417   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25418   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25419   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25420   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25421   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25422   ins_encode %{
25423     int vlen_enc = vector_length_encoding(this);
25424     int opcode = this->ideal_Opcode();
25425     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25426   %}
25427   ins_pipe(pipe_slow);
25428 %}
25429 
25430 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25431 %{
25432   match(Set dst (FmaVHF src2 (Binary dst src1)));
25433   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25434   ins_encode %{
25435     int vlen_enc = vector_length_encoding(this);
25436     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25437   %}
25438   ins_pipe( pipe_slow );
25439 %}
25440 
25441 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25442 %{
25443   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25444   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25445   ins_encode %{
25446     int vlen_enc = vector_length_encoding(this);
25447     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25448   %}
25449   ins_pipe( pipe_slow );
25450 %}
25451 
25452 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25453 %{
25454   predicate(VM_Version::supports_avx10_2());
25455   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25456   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25457   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25458   ins_encode %{
25459     int vlen_enc = vector_length_encoding(this);
25460     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25461     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25462   %}
25463   ins_pipe( pipe_slow );
25464 %}
25465 
25466 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25467 %{
25468   predicate(VM_Version::supports_avx10_2());
25469   match(Set dst (MinVHF src1 src2));
25470   match(Set dst (MaxVHF src1 src2));
25471   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25472   ins_encode %{
25473     int vlen_enc = vector_length_encoding(this);
25474     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25475     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25476   %}
25477   ins_pipe( pipe_slow );
25478 %}
25479 
25480 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25481 %{
25482   predicate(!VM_Version::supports_avx10_2());
25483   match(Set dst (MinVHF src1 src2));
25484   match(Set dst (MaxVHF src1 src2));
25485   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25486   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25487   ins_encode %{
25488     int vlen_enc = vector_length_encoding(this);
25489     int opcode = this->ideal_Opcode();
25490     __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25491                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25492   %}
25493   ins_pipe( pipe_slow );
25494 %}
25495 
25496 //----------PEEPHOLE RULES-----------------------------------------------------
25497 // These must follow all instruction definitions as they use the names
25498 // defined in the instructions definitions.
25499 //
25500 // peeppredicate ( rule_predicate );
25501 // // the predicate unless which the peephole rule will be ignored
25502 //
25503 // peepmatch ( root_instr_name [preceding_instruction]* );
25504 //
25505 // peepprocedure ( procedure_name );
25506 // // provide a procedure name to perform the optimization, the procedure should
25507 // // reside in the architecture dependent peephole file, the method has the
25508 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25509 // // with the arguments being the basic block, the current node index inside the
25510 // // block, the register allocator, the functions upon invoked return a new node
25511 // // defined in peepreplace, and the rules of the nodes appearing in the
25512 // // corresponding peepmatch, the function return true if successful, else
25513 // // return false
25514 //
25515 // peepconstraint %{
25516 // (instruction_number.operand_name relational_op instruction_number.operand_name
25517 //  [, ...] );
25518 // // instruction numbers are zero-based using left to right order in peepmatch
25519 //
25520 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25521 // // provide an instruction_number.operand_name for each operand that appears
25522 // // in the replacement instruction's match rule
25523 //
25524 // ---------VM FLAGS---------------------------------------------------------
25525 //
25526 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25527 //
25528 // Each peephole rule is given an identifying number starting with zero and
25529 // increasing by one in the order seen by the parser.  An individual peephole
25530 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25531 // on the command-line.
25532 //
25533 // ---------CURRENT LIMITATIONS----------------------------------------------
25534 //
25535 // Only transformations inside a basic block (do we need more for peephole)
25536 //
25537 // ---------EXAMPLE----------------------------------------------------------
25538 //
25539 // // pertinent parts of existing instructions in architecture description
25540 // instruct movI(rRegI dst, rRegI src)
25541 // %{
25542 //   match(Set dst (CopyI src));
25543 // %}
25544 //
25545 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25546 // %{
25547 //   match(Set dst (AddI dst src));
25548 //   effect(KILL cr);
25549 // %}
25550 //
25551 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25552 // %{
25553 //   match(Set dst (AddI dst src));
25554 // %}
25555 //
25556 // 1. Simple replacement
25557 // - Only match adjacent instructions in same basic block
25558 // - Only equality constraints
25559 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25560 // - Only one replacement instruction
25561 //
25562 // // Change (inc mov) to lea
25563 // peephole %{
25564 //   // lea should only be emitted when beneficial
25565 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25566 //   // increment preceded by register-register move
25567 //   peepmatch ( incI_rReg movI );
25568 //   // require that the destination register of the increment
25569 //   // match the destination register of the move
25570 //   peepconstraint ( 0.dst == 1.dst );
25571 //   // construct a replacement instruction that sets
25572 //   // the destination to ( move's source register + one )
25573 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25574 // %}
25575 //
25576 // 2. Procedural replacement
25577 // - More flexible finding relevent nodes
25578 // - More flexible constraints
25579 // - More flexible transformations
25580 // - May utilise architecture-dependent API more effectively
25581 // - Currently only one replacement instruction due to adlc parsing capabilities
25582 //
25583 // // Change (inc mov) to lea
25584 // peephole %{
25585 //   // lea should only be emitted when beneficial
25586 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25587 //   // the rule numbers of these nodes inside are passed into the function below
25588 //   peepmatch ( incI_rReg movI );
25589 //   // the method that takes the responsibility of transformation
25590 //   peepprocedure ( inc_mov_to_lea );
25591 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25592 //   // node is passed into the function above
25593 //   peepreplace ( leaI_rReg_immI() );
25594 // %}
25595 
25596 // These instructions is not matched by the matcher but used by the peephole
25597 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25598 %{
25599   predicate(false);
25600   match(Set dst (AddI src1 src2));
25601   format %{ "leal    $dst, [$src1 + $src2]" %}
25602   ins_encode %{
25603     Register dst = $dst$$Register;
25604     Register src1 = $src1$$Register;
25605     Register src2 = $src2$$Register;
25606     if (src1 != rbp && src1 != r13) {
25607       __ leal(dst, Address(src1, src2, Address::times_1));
25608     } else {
25609       assert(src2 != rbp && src2 != r13, "");
25610       __ leal(dst, Address(src2, src1, Address::times_1));
25611     }
25612   %}
25613   ins_pipe(ialu_reg_reg);
25614 %}
25615 
25616 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25617 %{
25618   predicate(false);
25619   match(Set dst (AddI src1 src2));
25620   format %{ "leal    $dst, [$src1 + $src2]" %}
25621   ins_encode %{
25622     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25623   %}
25624   ins_pipe(ialu_reg_reg);
25625 %}
25626 
25627 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25628 %{
25629   predicate(false);
25630   match(Set dst (LShiftI src shift));
25631   format %{ "leal    $dst, [$src << $shift]" %}
25632   ins_encode %{
25633     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25634     Register src = $src$$Register;
25635     if (scale == Address::times_2 && src != rbp && src != r13) {
25636       __ leal($dst$$Register, Address(src, src, Address::times_1));
25637     } else {
25638       __ leal($dst$$Register, Address(noreg, src, scale));
25639     }
25640   %}
25641   ins_pipe(ialu_reg_reg);
25642 %}
25643 
25644 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25645 %{
25646   predicate(false);
25647   match(Set dst (AddL src1 src2));
25648   format %{ "leaq    $dst, [$src1 + $src2]" %}
25649   ins_encode %{
25650     Register dst = $dst$$Register;
25651     Register src1 = $src1$$Register;
25652     Register src2 = $src2$$Register;
25653     if (src1 != rbp && src1 != r13) {
25654       __ leaq(dst, Address(src1, src2, Address::times_1));
25655     } else {
25656       assert(src2 != rbp && src2 != r13, "");
25657       __ leaq(dst, Address(src2, src1, Address::times_1));
25658     }
25659   %}
25660   ins_pipe(ialu_reg_reg);
25661 %}
25662 
25663 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25664 %{
25665   predicate(false);
25666   match(Set dst (AddL src1 src2));
25667   format %{ "leaq    $dst, [$src1 + $src2]" %}
25668   ins_encode %{
25669     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25670   %}
25671   ins_pipe(ialu_reg_reg);
25672 %}
25673 
25674 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25675 %{
25676   predicate(false);
25677   match(Set dst (LShiftL src shift));
25678   format %{ "leaq    $dst, [$src << $shift]" %}
25679   ins_encode %{
25680     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25681     Register src = $src$$Register;
25682     if (scale == Address::times_2 && src != rbp && src != r13) {
25683       __ leaq($dst$$Register, Address(src, src, Address::times_1));
25684     } else {
25685       __ leaq($dst$$Register, Address(noreg, src, scale));
25686     }
25687   %}
25688   ins_pipe(ialu_reg_reg);
25689 %}
25690 
25691 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25692 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25693 // processors with at least partial ALU support for lea
25694 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25695 // beneficial for processors with full ALU support
25696 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25697 
25698 peephole
25699 %{
25700   peeppredicate(VM_Version::supports_fast_2op_lea());
25701   peepmatch (addI_rReg);
25702   peepprocedure (lea_coalesce_reg);
25703   peepreplace (leaI_rReg_rReg_peep());
25704 %}
25705 
25706 peephole
25707 %{
25708   peeppredicate(VM_Version::supports_fast_2op_lea());
25709   peepmatch (addI_rReg_imm);
25710   peepprocedure (lea_coalesce_imm);
25711   peepreplace (leaI_rReg_immI_peep());
25712 %}
25713 
25714 peephole
25715 %{
25716   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25717                 VM_Version::is_intel_cascade_lake());
25718   peepmatch (incI_rReg);
25719   peepprocedure (lea_coalesce_imm);
25720   peepreplace (leaI_rReg_immI_peep());
25721 %}
25722 
25723 peephole
25724 %{
25725   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25726                 VM_Version::is_intel_cascade_lake());
25727   peepmatch (decI_rReg);
25728   peepprocedure (lea_coalesce_imm);
25729   peepreplace (leaI_rReg_immI_peep());
25730 %}
25731 
25732 peephole
25733 %{
25734   peeppredicate(VM_Version::supports_fast_2op_lea());
25735   peepmatch (salI_rReg_immI2);
25736   peepprocedure (lea_coalesce_imm);
25737   peepreplace (leaI_rReg_immI2_peep());
25738 %}
25739 
25740 peephole
25741 %{
25742   peeppredicate(VM_Version::supports_fast_2op_lea());
25743   peepmatch (addL_rReg);
25744   peepprocedure (lea_coalesce_reg);
25745   peepreplace (leaL_rReg_rReg_peep());
25746 %}
25747 
25748 peephole
25749 %{
25750   peeppredicate(VM_Version::supports_fast_2op_lea());
25751   peepmatch (addL_rReg_imm);
25752   peepprocedure (lea_coalesce_imm);
25753   peepreplace (leaL_rReg_immL32_peep());
25754 %}
25755 
25756 peephole
25757 %{
25758   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25759                 VM_Version::is_intel_cascade_lake());
25760   peepmatch (incL_rReg);
25761   peepprocedure (lea_coalesce_imm);
25762   peepreplace (leaL_rReg_immL32_peep());
25763 %}
25764 
25765 peephole
25766 %{
25767   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25768                 VM_Version::is_intel_cascade_lake());
25769   peepmatch (decL_rReg);
25770   peepprocedure (lea_coalesce_imm);
25771   peepreplace (leaL_rReg_immL32_peep());
25772 %}
25773 
25774 peephole
25775 %{
25776   peeppredicate(VM_Version::supports_fast_2op_lea());
25777   peepmatch (salL_rReg_immI2);
25778   peepprocedure (lea_coalesce_imm);
25779   peepreplace (leaL_rReg_immI2_peep());
25780 %}
25781 
25782 peephole
25783 %{
25784   peepmatch (leaPCompressedOopOffset);
25785   peepprocedure (lea_remove_redundant);
25786 %}
25787 
25788 peephole
25789 %{
25790   peepmatch (leaP8Narrow);
25791   peepprocedure (lea_remove_redundant);
25792 %}
25793 
25794 peephole
25795 %{
25796   peepmatch (leaP32Narrow);
25797   peepprocedure (lea_remove_redundant);
25798 %}
25799 
25800 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25801 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25802 
25803 //int variant
25804 peephole
25805 %{
25806   peepmatch (testI_reg);
25807   peepprocedure (test_may_remove);
25808 %}
25809 
25810 //long variant
25811 peephole
25812 %{
25813   peepmatch (testL_reg);
25814   peepprocedure (test_may_remove);
25815 %}
25816 
25817 
25818 //----------SMARTSPILL RULES---------------------------------------------------
25819 // These must follow all instruction definitions as they use the names
25820 // defined in the instructions definitions.