1 //
    2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }
 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;
 1678 }
 1679 
 1680 // This could be in MacroAssembler but it's fairly C2 specific
 1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1682   Label exit;
 1683   __ jccb(Assembler::noParity, exit);
 1684   __ pushf();
 1685   //
 1686   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1687   // zero OF,AF,SF for NaN values.
 1688   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1689   // values returns 'less than' result (CF is set).
 1690   // Leave the rest of flags unchanged.
 1691   //
 1692   //    7 6 5 4 3 2 1 0
 1693   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1694   //    0 0 1 0 1 0 1 1   (0x2B)
 1695   //
 1696   __ andq(Address(rsp, 0), 0xffffff2b);
 1697   __ popf();
 1698   __ bind(exit);
 1699 }
 1700 
 1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1702   Label done;
 1703   __ movl(dst, -1);
 1704   __ jcc(Assembler::parity, done);
 1705   __ jcc(Assembler::below, done);
 1706   __ setcc(Assembler::notEqual, dst);
 1707   __ bind(done);
 1708 }
 1709 
 1710 // Math.min()    # Math.max()
 1711 // --------------------------
 1712 // ucomis[s/d]   #
 1713 // ja   -> b     # a
 1714 // jp   -> NaN   # NaN
 1715 // jb   -> a     # b
 1716 // je            #
 1717 // |-jz -> a | b # a & b
 1718 // |    -> a     #
 1719 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1720                             XMMRegister a, XMMRegister b,
 1721                             XMMRegister xmmt, Register rt,
 1722                             bool min, bool single) {
 1723 
 1724   Label nan, zero, below, above, done;
 1725 
 1726   if (single)
 1727     __ ucomiss(a, b);
 1728   else
 1729     __ ucomisd(a, b);
 1730 
 1731   if (dst->encoding() != (min ? b : a)->encoding())
 1732     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1733   else
 1734     __ jccb(Assembler::above, done);
 1735 
 1736   __ jccb(Assembler::parity, nan);  // PF=1
 1737   __ jccb(Assembler::below, below); // CF=1
 1738 
 1739   // equal
 1740   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
 1741   if (single) {
 1742     __ ucomiss(a, xmmt);
 1743     __ jccb(Assembler::equal, zero);
 1744 
 1745     __ movflt(dst, a);
 1746     __ jmp(done);
 1747   }
 1748   else {
 1749     __ ucomisd(a, xmmt);
 1750     __ jccb(Assembler::equal, zero);
 1751 
 1752     __ movdbl(dst, a);
 1753     __ jmp(done);
 1754   }
 1755 
 1756   __ bind(zero);
 1757   if (min)
 1758     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1759   else
 1760     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1761 
 1762   __ jmp(done);
 1763 
 1764   __ bind(above);
 1765   if (single)
 1766     __ movflt(dst, min ? b : a);
 1767   else
 1768     __ movdbl(dst, min ? b : a);
 1769 
 1770   __ jmp(done);
 1771 
 1772   __ bind(nan);
 1773   if (single) {
 1774     __ movl(rt, 0x7fc00000); // Float.NaN
 1775     __ movdl(dst, rt);
 1776   }
 1777   else {
 1778     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1779     __ movdq(dst, rt);
 1780   }
 1781   __ jmp(done);
 1782 
 1783   __ bind(below);
 1784   if (single)
 1785     __ movflt(dst, min ? a : b);
 1786   else
 1787     __ movdbl(dst, min ? a : b);
 1788 
 1789   __ bind(done);
 1790 }
 1791 
 1792 //=============================================================================
 1793 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1794 
 1795 int ConstantTable::calculate_table_base_offset() const {
 1796   return 0;  // absolute addressing, no offset
 1797 }
 1798 
 1799 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1800 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1801   ShouldNotReachHere();
 1802 }
 1803 
 1804 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1805   // Empty encoding
 1806 }
 1807 
 1808 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1809   return 0;
 1810 }
 1811 
 1812 #ifndef PRODUCT
 1813 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1814   st->print("# MachConstantBaseNode (empty encoding)");
 1815 }
 1816 #endif
 1817 
 1818 
 1819 //=============================================================================
 1820 #ifndef PRODUCT
 1821 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1822   Compile* C = ra_->C;
 1823 
 1824   int framesize = C->output()->frame_size_in_bytes();
 1825   int bangsize = C->output()->bang_size_in_bytes();
 1826   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1827   // Remove wordSize for return addr which is already pushed.
 1828   framesize -= wordSize;
 1829 
 1830   if (C->output()->need_stack_bang(bangsize)) {
 1831     framesize -= wordSize;
 1832     st->print("# stack bang (%d bytes)", bangsize);
 1833     st->print("\n\t");
 1834     st->print("pushq   rbp\t# Save rbp");
 1835     if (PreserveFramePointer) {
 1836         st->print("\n\t");
 1837         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1838     }
 1839     if (framesize) {
 1840       st->print("\n\t");
 1841       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1842     }
 1843   } else {
 1844     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1845     st->print("\n\t");
 1846     framesize -= wordSize;
 1847     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1848     if (PreserveFramePointer) {
 1849       st->print("\n\t");
 1850       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1851       if (framesize > 0) {
 1852         st->print("\n\t");
 1853         st->print("addq    rbp, #%d", framesize);
 1854       }
 1855     }
 1856   }
 1857 
 1858   if (VerifyStackAtCalls) {
 1859     st->print("\n\t");
 1860     framesize -= wordSize;
 1861     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1862 #ifdef ASSERT
 1863     st->print("\n\t");
 1864     st->print("# stack alignment check");
 1865 #endif
 1866   }
 1867   if (C->stub_function() != nullptr) {
 1868     st->print("\n\t");
 1869     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1870     st->print("\n\t");
 1871     st->print("je      fast_entry\t");
 1872     st->print("\n\t");
 1873     st->print("call    #nmethod_entry_barrier_stub\t");
 1874     st->print("\n\tfast_entry:");
 1875   }
 1876   st->cr();
 1877 }
 1878 #endif
 1879 
 1880 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1881   Compile* C = ra_->C;
 1882 
 1883   int framesize = C->output()->frame_size_in_bytes();
 1884   int bangsize = C->output()->bang_size_in_bytes();
 1885 
 1886   if (C->clinit_barrier_on_entry()) {
 1887     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1888     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1889 
 1890     Label L_skip_barrier;
 1891     Register klass = rscratch1;
 1892 
 1893     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1894     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1895 
 1896     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1897 
 1898     __ bind(L_skip_barrier);
 1899   }
 1900 
 1901   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
 1902 
 1903   C->output()->set_frame_complete(__ offset());
 1904 
 1905   if (C->has_mach_constant_base_node()) {
 1906     // NOTE: We set the table base offset here because users might be
 1907     // emitted before MachConstantBaseNode.
 1908     ConstantTable& constant_table = C->output()->constant_table();
 1909     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1910   }
 1911 }
 1912 
 1913 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1914 {
 1915   return MachNode::size(ra_); // too many variables; just compute it
 1916                               // the hard way
 1917 }
 1918 
 1919 int MachPrologNode::reloc() const
 1920 {
 1921   return 0; // a large enough number
 1922 }
 1923 
 1924 //=============================================================================
 1925 #ifndef PRODUCT
 1926 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1927 {
 1928   Compile* C = ra_->C;
 1929   if (generate_vzeroupper(C)) {
 1930     st->print("vzeroupper");
 1931     st->cr(); st->print("\t");
 1932   }
 1933 
 1934   int framesize = C->output()->frame_size_in_bytes();
 1935   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1936   // Remove word for return adr already pushed
 1937   // and RBP
 1938   framesize -= 2*wordSize;
 1939 
 1940   if (framesize) {
 1941     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1942     st->print("\t");
 1943   }
 1944 
 1945   st->print_cr("popq    rbp");
 1946   if (do_polling() && C->is_method_compilation()) {
 1947     st->print("\t");
 1948     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1949                  "ja      #safepoint_stub\t"
 1950                  "# Safepoint: poll for GC");
 1951   }
 1952 }
 1953 #endif
 1954 
 1955 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1956 {
 1957   Compile* C = ra_->C;
 1958 
 1959   if (generate_vzeroupper(C)) {
 1960     // Clear upper bits of YMM registers when current compiled code uses
 1961     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1962     __ vzeroupper();
 1963   }
 1964 
 1965   int framesize = C->output()->frame_size_in_bytes();
 1966   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1967   // Remove word for return adr already pushed
 1968   // and RBP
 1969   framesize -= 2*wordSize;
 1970 
 1971   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1972 
 1973   if (framesize) {
 1974     __ addq(rsp, framesize);
 1975   }
 1976 
 1977   __ popq(rbp);
 1978 
 1979   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1980     __ reserved_stack_check();
 1981   }
 1982 
 1983   if (do_polling() && C->is_method_compilation()) {
 1984     Label dummy_label;
 1985     Label* code_stub = &dummy_label;
 1986     if (!C->output()->in_scratch_emit_size()) {
 1987       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1988       C->output()->add_stub(stub);
 1989       code_stub = &stub->entry();
 1990     }
 1991     __ relocate(relocInfo::poll_return_type);
 1992     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1993   }
 1994 }
 1995 
 1996 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1997 {
 1998   return MachNode::size(ra_); // too many variables; just compute it
 1999                               // the hard way
 2000 }
 2001 
 2002 int MachEpilogNode::reloc() const
 2003 {
 2004   return 2; // a large enough number
 2005 }
 2006 
 2007 const Pipeline* MachEpilogNode::pipeline() const
 2008 {
 2009   return MachNode::pipeline_class();
 2010 }
 2011 
 2012 //=============================================================================
 2013 
 2014 enum RC {
 2015   rc_bad,
 2016   rc_int,
 2017   rc_kreg,
 2018   rc_float,
 2019   rc_stack
 2020 };
 2021 
 2022 static enum RC rc_class(OptoReg::Name reg)
 2023 {
 2024   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 2025 
 2026   if (OptoReg::is_stack(reg)) return rc_stack;
 2027 
 2028   VMReg r = OptoReg::as_VMReg(reg);
 2029 
 2030   if (r->is_Register()) return rc_int;
 2031 
 2032   if (r->is_KRegister()) return rc_kreg;
 2033 
 2034   assert(r->is_XMMRegister(), "must be");
 2035   return rc_float;
 2036 }
 2037 
 2038 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2039 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2040                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2041 
 2042 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2043                      int stack_offset, int reg, uint ireg, outputStream* st);
 2044 
 2045 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2046                                       int dst_offset, uint ireg, outputStream* st) {
 2047   if (masm) {
 2048     switch (ireg) {
 2049     case Op_VecS:
 2050       __ movq(Address(rsp, -8), rax);
 2051       __ movl(rax, Address(rsp, src_offset));
 2052       __ movl(Address(rsp, dst_offset), rax);
 2053       __ movq(rax, Address(rsp, -8));
 2054       break;
 2055     case Op_VecD:
 2056       __ pushq(Address(rsp, src_offset));
 2057       __ popq (Address(rsp, dst_offset));
 2058       break;
 2059     case Op_VecX:
 2060       __ pushq(Address(rsp, src_offset));
 2061       __ popq (Address(rsp, dst_offset));
 2062       __ pushq(Address(rsp, src_offset+8));
 2063       __ popq (Address(rsp, dst_offset+8));
 2064       break;
 2065     case Op_VecY:
 2066       __ vmovdqu(Address(rsp, -32), xmm0);
 2067       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2068       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2069       __ vmovdqu(xmm0, Address(rsp, -32));
 2070       break;
 2071     case Op_VecZ:
 2072       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2073       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2074       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2075       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2076       break;
 2077     default:
 2078       ShouldNotReachHere();
 2079     }
 2080 #ifndef PRODUCT
 2081   } else {
 2082     switch (ireg) {
 2083     case Op_VecS:
 2084       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2085                 "movl    rax, [rsp + #%d]\n\t"
 2086                 "movl    [rsp + #%d], rax\n\t"
 2087                 "movq    rax, [rsp - #8]",
 2088                 src_offset, dst_offset);
 2089       break;
 2090     case Op_VecD:
 2091       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2092                 "popq    [rsp + #%d]",
 2093                 src_offset, dst_offset);
 2094       break;
 2095      case Op_VecX:
 2096       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2097                 "popq    [rsp + #%d]\n\t"
 2098                 "pushq   [rsp + #%d]\n\t"
 2099                 "popq    [rsp + #%d]",
 2100                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2101       break;
 2102     case Op_VecY:
 2103       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2104                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2105                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2106                 "vmovdqu xmm0, [rsp - #32]",
 2107                 src_offset, dst_offset);
 2108       break;
 2109     case Op_VecZ:
 2110       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2111                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2112                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2113                 "vmovdqu xmm0, [rsp - #64]",
 2114                 src_offset, dst_offset);
 2115       break;
 2116     default:
 2117       ShouldNotReachHere();
 2118     }
 2119 #endif
 2120   }
 2121 }
 2122 
 2123 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2124                                        PhaseRegAlloc* ra_,
 2125                                        bool do_size,
 2126                                        outputStream* st) const {
 2127   assert(masm != nullptr || st  != nullptr, "sanity");
 2128   // Get registers to move
 2129   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2130   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2131   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2132   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2133 
 2134   enum RC src_second_rc = rc_class(src_second);
 2135   enum RC src_first_rc = rc_class(src_first);
 2136   enum RC dst_second_rc = rc_class(dst_second);
 2137   enum RC dst_first_rc = rc_class(dst_first);
 2138 
 2139   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2140          "must move at least 1 register" );
 2141 
 2142   if (src_first == dst_first && src_second == dst_second) {
 2143     // Self copy, no move
 2144     return 0;
 2145   }
 2146   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 2147     uint ireg = ideal_reg();
 2148     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2149     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2150     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2151       // mem -> mem
 2152       int src_offset = ra_->reg2offset(src_first);
 2153       int dst_offset = ra_->reg2offset(dst_first);
 2154       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2155     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2156       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2157     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2158       int stack_offset = ra_->reg2offset(dst_first);
 2159       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2160     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2161       int stack_offset = ra_->reg2offset(src_first);
 2162       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2163     } else {
 2164       ShouldNotReachHere();
 2165     }
 2166     return 0;
 2167   }
 2168   if (src_first_rc == rc_stack) {
 2169     // mem ->
 2170     if (dst_first_rc == rc_stack) {
 2171       // mem -> mem
 2172       assert(src_second != dst_first, "overlap");
 2173       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2174           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2175         // 64-bit
 2176         int src_offset = ra_->reg2offset(src_first);
 2177         int dst_offset = ra_->reg2offset(dst_first);
 2178         if (masm) {
 2179           __ pushq(Address(rsp, src_offset));
 2180           __ popq (Address(rsp, dst_offset));
 2181 #ifndef PRODUCT
 2182         } else {
 2183           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2184                     "popq    [rsp + #%d]",
 2185                      src_offset, dst_offset);
 2186 #endif
 2187         }
 2188       } else {
 2189         // 32-bit
 2190         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2191         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2192         // No pushl/popl, so:
 2193         int src_offset = ra_->reg2offset(src_first);
 2194         int dst_offset = ra_->reg2offset(dst_first);
 2195         if (masm) {
 2196           __ movq(Address(rsp, -8), rax);
 2197           __ movl(rax, Address(rsp, src_offset));
 2198           __ movl(Address(rsp, dst_offset), rax);
 2199           __ movq(rax, Address(rsp, -8));
 2200 #ifndef PRODUCT
 2201         } else {
 2202           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2203                     "movl    rax, [rsp + #%d]\n\t"
 2204                     "movl    [rsp + #%d], rax\n\t"
 2205                     "movq    rax, [rsp - #8]",
 2206                      src_offset, dst_offset);
 2207 #endif
 2208         }
 2209       }
 2210       return 0;
 2211     } else if (dst_first_rc == rc_int) {
 2212       // mem -> gpr
 2213       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2214           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2215         // 64-bit
 2216         int offset = ra_->reg2offset(src_first);
 2217         if (masm) {
 2218           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2219 #ifndef PRODUCT
 2220         } else {
 2221           st->print("movq    %s, [rsp + #%d]\t# spill",
 2222                      Matcher::regName[dst_first],
 2223                      offset);
 2224 #endif
 2225         }
 2226       } else {
 2227         // 32-bit
 2228         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2229         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2230         int offset = ra_->reg2offset(src_first);
 2231         if (masm) {
 2232           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2233 #ifndef PRODUCT
 2234         } else {
 2235           st->print("movl    %s, [rsp + #%d]\t# spill",
 2236                      Matcher::regName[dst_first],
 2237                      offset);
 2238 #endif
 2239         }
 2240       }
 2241       return 0;
 2242     } else if (dst_first_rc == rc_float) {
 2243       // mem-> xmm
 2244       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2245           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2246         // 64-bit
 2247         int offset = ra_->reg2offset(src_first);
 2248         if (masm) {
 2249           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2250 #ifndef PRODUCT
 2251         } else {
 2252           st->print("%s  %s, [rsp + #%d]\t# spill",
 2253                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2254                      Matcher::regName[dst_first],
 2255                      offset);
 2256 #endif
 2257         }
 2258       } else {
 2259         // 32-bit
 2260         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2261         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2262         int offset = ra_->reg2offset(src_first);
 2263         if (masm) {
 2264           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2265 #ifndef PRODUCT
 2266         } else {
 2267           st->print("movss   %s, [rsp + #%d]\t# spill",
 2268                      Matcher::regName[dst_first],
 2269                      offset);
 2270 #endif
 2271         }
 2272       }
 2273       return 0;
 2274     } else if (dst_first_rc == rc_kreg) {
 2275       // mem -> kreg
 2276       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2277           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2278         // 64-bit
 2279         int offset = ra_->reg2offset(src_first);
 2280         if (masm) {
 2281           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2282 #ifndef PRODUCT
 2283         } else {
 2284           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2285                      Matcher::regName[dst_first],
 2286                      offset);
 2287 #endif
 2288         }
 2289       }
 2290       return 0;
 2291     }
 2292   } else if (src_first_rc == rc_int) {
 2293     // gpr ->
 2294     if (dst_first_rc == rc_stack) {
 2295       // gpr -> mem
 2296       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2297           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2298         // 64-bit
 2299         int offset = ra_->reg2offset(dst_first);
 2300         if (masm) {
 2301           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2302 #ifndef PRODUCT
 2303         } else {
 2304           st->print("movq    [rsp + #%d], %s\t# spill",
 2305                      offset,
 2306                      Matcher::regName[src_first]);
 2307 #endif
 2308         }
 2309       } else {
 2310         // 32-bit
 2311         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2312         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2313         int offset = ra_->reg2offset(dst_first);
 2314         if (masm) {
 2315           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2316 #ifndef PRODUCT
 2317         } else {
 2318           st->print("movl    [rsp + #%d], %s\t# spill",
 2319                      offset,
 2320                      Matcher::regName[src_first]);
 2321 #endif
 2322         }
 2323       }
 2324       return 0;
 2325     } else if (dst_first_rc == rc_int) {
 2326       // gpr -> gpr
 2327       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2328           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2329         // 64-bit
 2330         if (masm) {
 2331           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2332                   as_Register(Matcher::_regEncode[src_first]));
 2333 #ifndef PRODUCT
 2334         } else {
 2335           st->print("movq    %s, %s\t# spill",
 2336                      Matcher::regName[dst_first],
 2337                      Matcher::regName[src_first]);
 2338 #endif
 2339         }
 2340         return 0;
 2341       } else {
 2342         // 32-bit
 2343         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2344         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2345         if (masm) {
 2346           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2347                   as_Register(Matcher::_regEncode[src_first]));
 2348 #ifndef PRODUCT
 2349         } else {
 2350           st->print("movl    %s, %s\t# spill",
 2351                      Matcher::regName[dst_first],
 2352                      Matcher::regName[src_first]);
 2353 #endif
 2354         }
 2355         return 0;
 2356       }
 2357     } else if (dst_first_rc == rc_float) {
 2358       // gpr -> xmm
 2359       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2360           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2361         // 64-bit
 2362         if (masm) {
 2363           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2364 #ifndef PRODUCT
 2365         } else {
 2366           st->print("movdq   %s, %s\t# spill",
 2367                      Matcher::regName[dst_first],
 2368                      Matcher::regName[src_first]);
 2369 #endif
 2370         }
 2371       } else {
 2372         // 32-bit
 2373         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2374         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2375         if (masm) {
 2376           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2377 #ifndef PRODUCT
 2378         } else {
 2379           st->print("movdl   %s, %s\t# spill",
 2380                      Matcher::regName[dst_first],
 2381                      Matcher::regName[src_first]);
 2382 #endif
 2383         }
 2384       }
 2385       return 0;
 2386     } else if (dst_first_rc == rc_kreg) {
 2387       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2388           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2389         // 64-bit
 2390         if (masm) {
 2391           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2392   #ifndef PRODUCT
 2393         } else {
 2394            st->print("kmovq   %s, %s\t# spill",
 2395                        Matcher::regName[dst_first],
 2396                        Matcher::regName[src_first]);
 2397   #endif
 2398         }
 2399       }
 2400       Unimplemented();
 2401       return 0;
 2402     }
 2403   } else if (src_first_rc == rc_float) {
 2404     // xmm ->
 2405     if (dst_first_rc == rc_stack) {
 2406       // xmm -> mem
 2407       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2408           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2409         // 64-bit
 2410         int offset = ra_->reg2offset(dst_first);
 2411         if (masm) {
 2412           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2413 #ifndef PRODUCT
 2414         } else {
 2415           st->print("movsd   [rsp + #%d], %s\t# spill",
 2416                      offset,
 2417                      Matcher::regName[src_first]);
 2418 #endif
 2419         }
 2420       } else {
 2421         // 32-bit
 2422         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2423         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2424         int offset = ra_->reg2offset(dst_first);
 2425         if (masm) {
 2426           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2427 #ifndef PRODUCT
 2428         } else {
 2429           st->print("movss   [rsp + #%d], %s\t# spill",
 2430                      offset,
 2431                      Matcher::regName[src_first]);
 2432 #endif
 2433         }
 2434       }
 2435       return 0;
 2436     } else if (dst_first_rc == rc_int) {
 2437       // xmm -> gpr
 2438       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2439           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2440         // 64-bit
 2441         if (masm) {
 2442           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2443 #ifndef PRODUCT
 2444         } else {
 2445           st->print("movdq   %s, %s\t# spill",
 2446                      Matcher::regName[dst_first],
 2447                      Matcher::regName[src_first]);
 2448 #endif
 2449         }
 2450       } else {
 2451         // 32-bit
 2452         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2453         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2454         if (masm) {
 2455           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2456 #ifndef PRODUCT
 2457         } else {
 2458           st->print("movdl   %s, %s\t# spill",
 2459                      Matcher::regName[dst_first],
 2460                      Matcher::regName[src_first]);
 2461 #endif
 2462         }
 2463       }
 2464       return 0;
 2465     } else if (dst_first_rc == rc_float) {
 2466       // xmm -> xmm
 2467       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2468           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2469         // 64-bit
 2470         if (masm) {
 2471           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2472 #ifndef PRODUCT
 2473         } else {
 2474           st->print("%s  %s, %s\t# spill",
 2475                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2476                      Matcher::regName[dst_first],
 2477                      Matcher::regName[src_first]);
 2478 #endif
 2479         }
 2480       } else {
 2481         // 32-bit
 2482         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2483         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2484         if (masm) {
 2485           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2486 #ifndef PRODUCT
 2487         } else {
 2488           st->print("%s  %s, %s\t# spill",
 2489                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2490                      Matcher::regName[dst_first],
 2491                      Matcher::regName[src_first]);
 2492 #endif
 2493         }
 2494       }
 2495       return 0;
 2496     } else if (dst_first_rc == rc_kreg) {
 2497       assert(false, "Illegal spilling");
 2498       return 0;
 2499     }
 2500   } else if (src_first_rc == rc_kreg) {
 2501     if (dst_first_rc == rc_stack) {
 2502       // mem -> kreg
 2503       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2504           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2505         // 64-bit
 2506         int offset = ra_->reg2offset(dst_first);
 2507         if (masm) {
 2508           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2509 #ifndef PRODUCT
 2510         } else {
 2511           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2512                      offset,
 2513                      Matcher::regName[src_first]);
 2514 #endif
 2515         }
 2516       }
 2517       return 0;
 2518     } else if (dst_first_rc == rc_int) {
 2519       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2520           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2521         // 64-bit
 2522         if (masm) {
 2523           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2524 #ifndef PRODUCT
 2525         } else {
 2526          st->print("kmovq   %s, %s\t# spill",
 2527                      Matcher::regName[dst_first],
 2528                      Matcher::regName[src_first]);
 2529 #endif
 2530         }
 2531       }
 2532       Unimplemented();
 2533       return 0;
 2534     } else if (dst_first_rc == rc_kreg) {
 2535       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2536           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2537         // 64-bit
 2538         if (masm) {
 2539           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2540 #ifndef PRODUCT
 2541         } else {
 2542          st->print("kmovq   %s, %s\t# spill",
 2543                      Matcher::regName[dst_first],
 2544                      Matcher::regName[src_first]);
 2545 #endif
 2546         }
 2547       }
 2548       return 0;
 2549     } else if (dst_first_rc == rc_float) {
 2550       assert(false, "Illegal spill");
 2551       return 0;
 2552     }
 2553   }
 2554 
 2555   assert(0," foo ");
 2556   Unimplemented();
 2557   return 0;
 2558 }
 2559 
 2560 #ifndef PRODUCT
 2561 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2562   implementation(nullptr, ra_, false, st);
 2563 }
 2564 #endif
 2565 
 2566 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2567   implementation(masm, ra_, false, nullptr);
 2568 }
 2569 
 2570 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2571   return MachNode::size(ra_);
 2572 }
 2573 
 2574 //=============================================================================
 2575 #ifndef PRODUCT
 2576 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2577 {
 2578   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2579   int reg = ra_->get_reg_first(this);
 2580   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2581             Matcher::regName[reg], offset);
 2582 }
 2583 #endif
 2584 
 2585 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2586 {
 2587   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2588   int reg = ra_->get_encode(this);
 2589 
 2590   __ lea(as_Register(reg), Address(rsp, offset));
 2591 }
 2592 
 2593 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2594 {
 2595   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2596   if (ra_->get_encode(this) > 15) {
 2597     return (offset < 0x80) ? 6 : 9; // REX2
 2598   } else {
 2599     return (offset < 0x80) ? 5 : 8; // REX
 2600   }
 2601 }
 2602 
 2603 //=============================================================================
 2604 #ifndef PRODUCT
 2605 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2606 {
 2607   if (UseCompressedClassPointers) {
 2608     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2609     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2610   } else {
 2611     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2612     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2613   }
 2614   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2615 }
 2616 #endif
 2617 
 2618 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2619 {
 2620   __ ic_check(InteriorEntryAlignment);
 2621 }
 2622 
 2623 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2624 {
 2625   return MachNode::size(ra_); // too many variables; just compute it
 2626                               // the hard way
 2627 }
 2628 
 2629 
 2630 //=============================================================================
 2631 
 2632 bool Matcher::supports_vector_calling_convention(void) {
 2633   return EnableVectorSupport;
 2634 }
 2635 
 2636 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2637   assert(EnableVectorSupport, "sanity");
 2638   int lo = XMM0_num;
 2639   int hi = XMM0b_num;
 2640   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2641   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2642   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2643   return OptoRegPair(hi, lo);
 2644 }
 2645 
 2646 // Is this branch offset short enough that a short branch can be used?
 2647 //
 2648 // NOTE: If the platform does not provide any short branch variants, then
 2649 //       this method should return false for offset 0.
 2650 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2651   // The passed offset is relative to address of the branch.
 2652   // On 86 a branch displacement is calculated relative to address
 2653   // of a next instruction.
 2654   offset -= br_size;
 2655 
 2656   // the short version of jmpConUCF2 contains multiple branches,
 2657   // making the reach slightly less
 2658   if (rule == jmpConUCF2_rule)
 2659     return (-126 <= offset && offset <= 125);
 2660   return (-128 <= offset && offset <= 127);
 2661 }
 2662 
 2663 // Return whether or not this register is ever used as an argument.
 2664 // This function is used on startup to build the trampoline stubs in
 2665 // generateOptoStub.  Registers not mentioned will be killed by the VM
 2666 // call in the trampoline, and arguments in those registers not be
 2667 // available to the callee.
 2668 bool Matcher::can_be_java_arg(int reg)
 2669 {
 2670   return
 2671     reg ==  RDI_num || reg == RDI_H_num ||
 2672     reg ==  RSI_num || reg == RSI_H_num ||
 2673     reg ==  RDX_num || reg == RDX_H_num ||
 2674     reg ==  RCX_num || reg == RCX_H_num ||
 2675     reg ==   R8_num || reg ==  R8_H_num ||
 2676     reg ==   R9_num || reg ==  R9_H_num ||
 2677     reg ==  R12_num || reg == R12_H_num ||
 2678     reg == XMM0_num || reg == XMM0b_num ||
 2679     reg == XMM1_num || reg == XMM1b_num ||
 2680     reg == XMM2_num || reg == XMM2b_num ||
 2681     reg == XMM3_num || reg == XMM3b_num ||
 2682     reg == XMM4_num || reg == XMM4b_num ||
 2683     reg == XMM5_num || reg == XMM5b_num ||
 2684     reg == XMM6_num || reg == XMM6b_num ||
 2685     reg == XMM7_num || reg == XMM7b_num;
 2686 }
 2687 
 2688 bool Matcher::is_spillable_arg(int reg)
 2689 {
 2690   return can_be_java_arg(reg);
 2691 }
 2692 
 2693 uint Matcher::int_pressure_limit()
 2694 {
 2695   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2696 }
 2697 
 2698 uint Matcher::float_pressure_limit()
 2699 {
 2700   // After experiment around with different values, the following default threshold
 2701   // works best for LCM's register pressure scheduling on x64.
 2702   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2703   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2704   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2705 }
 2706 
 2707 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 2708   // In 64 bit mode a code which use multiply when
 2709   // devisor is constant is faster than hardware
 2710   // DIV instruction (it uses MulHiL).
 2711   return false;
 2712 }
 2713 
 2714 // Register for DIVI projection of divmodI
 2715 const RegMask& Matcher::divI_proj_mask() {
 2716   return INT_RAX_REG_mask();
 2717 }
 2718 
 2719 // Register for MODI projection of divmodI
 2720 const RegMask& Matcher::modI_proj_mask() {
 2721   return INT_RDX_REG_mask();
 2722 }
 2723 
 2724 // Register for DIVL projection of divmodL
 2725 const RegMask& Matcher::divL_proj_mask() {
 2726   return LONG_RAX_REG_mask();
 2727 }
 2728 
 2729 // Register for MODL projection of divmodL
 2730 const RegMask& Matcher::modL_proj_mask() {
 2731   return LONG_RDX_REG_mask();
 2732 }
 2733 
 2734 %}
 2735 
 2736 source_hpp %{
 2737 // Header information of the source block.
 2738 // Method declarations/definitions which are used outside
 2739 // the ad-scope can conveniently be defined here.
 2740 //
 2741 // To keep related declarations/definitions/uses close together,
 2742 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2743 
 2744 #include "runtime/vm_version.hpp"
 2745 
 2746 class NativeJump;
 2747 
 2748 class CallStubImpl {
 2749 
 2750   //--------------------------------------------------------------
 2751   //---<  Used for optimization in Compile::shorten_branches  >---
 2752   //--------------------------------------------------------------
 2753 
 2754  public:
 2755   // Size of call trampoline stub.
 2756   static uint size_call_trampoline() {
 2757     return 0; // no call trampolines on this platform
 2758   }
 2759 
 2760   // number of relocations needed by a call trampoline stub
 2761   static uint reloc_call_trampoline() {
 2762     return 0; // no call trampolines on this platform
 2763   }
 2764 };
 2765 
 2766 class HandlerImpl {
 2767 
 2768  public:
 2769 
 2770   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2771 
 2772   static uint size_deopt_handler() {
 2773     // one call and one jmp.
 2774     return 10;
 2775   }
 2776 };
 2777 
 2778 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2779   switch(bytes) {
 2780     case  4: // fall-through
 2781     case  8: // fall-through
 2782     case 16: return Assembler::AVX_128bit;
 2783     case 32: return Assembler::AVX_256bit;
 2784     case 64: return Assembler::AVX_512bit;
 2785 
 2786     default: {
 2787       ShouldNotReachHere();
 2788       return Assembler::AVX_NoVec;
 2789     }
 2790   }
 2791 }
 2792 
 2793 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2794   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2795 }
 2796 
 2797 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2798   uint def_idx = use->operand_index(opnd);
 2799   Node* def = use->in(def_idx);
 2800   return vector_length_encoding(def);
 2801 }
 2802 
 2803 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2804   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2805          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2806 }
 2807 
 2808 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2809   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2810            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2811 }
 2812 
 2813 class Node::PD {
 2814 public:
 2815   enum NodeFlags {
 2816     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2817     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2818     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2819     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2820     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2821     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2822     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2823     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2824     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2825     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2826     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2827     _last_flag                = Flag_clears_sign_flag
 2828   };
 2829 };
 2830 
 2831 %} // end source_hpp
 2832 
 2833 source %{
 2834 
 2835 #include "opto/addnode.hpp"
 2836 #include "c2_intelJccErratum_x86.hpp"
 2837 
 2838 void PhaseOutput::pd_perform_mach_node_analysis() {
 2839   if (VM_Version::has_intel_jcc_erratum()) {
 2840     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2841     _buf_sizes._code += extra_padding;
 2842   }
 2843 }
 2844 
 2845 int MachNode::pd_alignment_required() const {
 2846   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2847     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2848     return IntelJccErratum::largest_jcc_size() + 1;
 2849   } else {
 2850     return 1;
 2851   }
 2852 }
 2853 
 2854 int MachNode::compute_padding(int current_offset) const {
 2855   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2856     Compile* C = Compile::current();
 2857     PhaseOutput* output = C->output();
 2858     Block* block = output->block();
 2859     int index = output->index();
 2860     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2861   } else {
 2862     return 0;
 2863   }
 2864 }
 2865 
 2866 // Emit deopt handler code.
 2867 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2868 
 2869   // Note that the code buffer's insts_mark is always relative to insts.
 2870   // That's why we must use the macroassembler to generate a handler.
 2871   address base = __ start_a_stub(size_deopt_handler());
 2872   if (base == nullptr) {
 2873     ciEnv::current()->record_failure("CodeCache is full");
 2874     return 0;  // CodeBuffer::expand failed
 2875   }
 2876   int offset = __ offset();
 2877 
 2878   Label start;
 2879   __ bind(start);
 2880 
 2881   __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2882 
 2883   int entry_offset = __ offset();
 2884 
 2885   __ jmp(start);
 2886 
 2887   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2888   __ end_a_stub();
 2889   return entry_offset;
 2890 }
 2891 
 2892 static Assembler::Width widthForType(BasicType bt) {
 2893   if (bt == T_BYTE) {
 2894     return Assembler::B;
 2895   } else if (bt == T_SHORT) {
 2896     return Assembler::W;
 2897   } else if (bt == T_INT) {
 2898     return Assembler::D;
 2899   } else {
 2900     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2901     return Assembler::Q;
 2902   }
 2903 }
 2904 
 2905 //=============================================================================
 2906 
 2907   // Float masks come from different places depending on platform.
 2908   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2909   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2910   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2911   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2912   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2913   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2914   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2915   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2916   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2917   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2918   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2919   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 2920   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 2921   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 2922   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 2923   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 2924   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 2925   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 2926   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 2927 
 2928 //=============================================================================
 2929 bool Matcher::match_rule_supported(int opcode) {
 2930   if (!has_match_rule(opcode)) {
 2931     return false; // no match rule present
 2932   }
 2933   switch (opcode) {
 2934     case Op_AbsVL:
 2935     case Op_StoreVectorScatter:
 2936       if (UseAVX < 3) {
 2937         return false;
 2938       }
 2939       break;
 2940     case Op_PopCountI:
 2941     case Op_PopCountL:
 2942       if (!UsePopCountInstruction) {
 2943         return false;
 2944       }
 2945       break;
 2946     case Op_PopCountVI:
 2947       if (UseAVX < 2) {
 2948         return false;
 2949       }
 2950       break;
 2951     case Op_CompressV:
 2952     case Op_ExpandV:
 2953     case Op_PopCountVL:
 2954       if (UseAVX < 2) {
 2955         return false;
 2956       }
 2957       break;
 2958     case Op_MulVI:
 2959       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 2960         return false;
 2961       }
 2962       break;
 2963     case Op_MulVL:
 2964       if (UseSSE < 4) { // only with SSE4_1 or AVX
 2965         return false;
 2966       }
 2967       break;
 2968     case Op_MulReductionVL:
 2969       if (VM_Version::supports_avx512dq() == false) {
 2970         return false;
 2971       }
 2972       break;
 2973     case Op_AbsVB:
 2974     case Op_AbsVS:
 2975     case Op_AbsVI:
 2976     case Op_AddReductionVI:
 2977     case Op_AndReductionV:
 2978     case Op_OrReductionV:
 2979     case Op_XorReductionV:
 2980       if (UseSSE < 3) { // requires at least SSSE3
 2981         return false;
 2982       }
 2983       break;
 2984     case Op_MaxHF:
 2985     case Op_MinHF:
 2986       if (!VM_Version::supports_avx512vlbw()) {
 2987         return false;
 2988       }  // fallthrough
 2989     case Op_AddHF:
 2990     case Op_DivHF:
 2991     case Op_FmaHF:
 2992     case Op_MulHF:
 2993     case Op_ReinterpretS2HF:
 2994     case Op_ReinterpretHF2S:
 2995     case Op_SubHF:
 2996     case Op_SqrtHF:
 2997       if (!VM_Version::supports_avx512_fp16()) {
 2998         return false;
 2999       }
 3000       break;
 3001     case Op_VectorLoadShuffle:
 3002     case Op_VectorRearrange:
 3003     case Op_MulReductionVI:
 3004       if (UseSSE < 4) { // requires at least SSE4
 3005         return false;
 3006       }
 3007       break;
 3008     case Op_IsInfiniteF:
 3009     case Op_IsInfiniteD:
 3010       if (!VM_Version::supports_avx512dq()) {
 3011         return false;
 3012       }
 3013       break;
 3014     case Op_SqrtVD:
 3015     case Op_SqrtVF:
 3016     case Op_VectorMaskCmp:
 3017     case Op_VectorCastB2X:
 3018     case Op_VectorCastS2X:
 3019     case Op_VectorCastI2X:
 3020     case Op_VectorCastL2X:
 3021     case Op_VectorCastF2X:
 3022     case Op_VectorCastD2X:
 3023     case Op_VectorUCastB2X:
 3024     case Op_VectorUCastS2X:
 3025     case Op_VectorUCastI2X:
 3026     case Op_VectorMaskCast:
 3027       if (UseAVX < 1) { // enabled for AVX only
 3028         return false;
 3029       }
 3030       break;
 3031     case Op_PopulateIndex:
 3032       if (UseAVX < 2) {
 3033         return false;
 3034       }
 3035       break;
 3036     case Op_RoundVF:
 3037       if (UseAVX < 2) { // enabled for AVX2 only
 3038         return false;
 3039       }
 3040       break;
 3041     case Op_RoundVD:
 3042       if (UseAVX < 3) {
 3043         return false;  // enabled for AVX3 only
 3044       }
 3045       break;
 3046     case Op_CompareAndSwapL:
 3047     case Op_CompareAndSwapP:
 3048       break;
 3049     case Op_StrIndexOf:
 3050       if (!UseSSE42Intrinsics) {
 3051         return false;
 3052       }
 3053       break;
 3054     case Op_StrIndexOfChar:
 3055       if (!UseSSE42Intrinsics) {
 3056         return false;
 3057       }
 3058       break;
 3059     case Op_OnSpinWait:
 3060       if (VM_Version::supports_on_spin_wait() == false) {
 3061         return false;
 3062       }
 3063       break;
 3064     case Op_MulVB:
 3065     case Op_LShiftVB:
 3066     case Op_RShiftVB:
 3067     case Op_URShiftVB:
 3068     case Op_VectorInsert:
 3069     case Op_VectorLoadMask:
 3070     case Op_VectorStoreMask:
 3071     case Op_VectorBlend:
 3072       if (UseSSE < 4) {
 3073         return false;
 3074       }
 3075       break;
 3076     case Op_MaxD:
 3077     case Op_MaxF:
 3078     case Op_MinD:
 3079     case Op_MinF:
 3080       if (UseAVX < 1) { // enabled for AVX only
 3081         return false;
 3082       }
 3083       break;
 3084     case Op_CacheWB:
 3085     case Op_CacheWBPreSync:
 3086     case Op_CacheWBPostSync:
 3087       if (!VM_Version::supports_data_cache_line_flush()) {
 3088         return false;
 3089       }
 3090       break;
 3091     case Op_ExtractB:
 3092     case Op_ExtractL:
 3093     case Op_ExtractI:
 3094     case Op_RoundDoubleMode:
 3095       if (UseSSE < 4) {
 3096         return false;
 3097       }
 3098       break;
 3099     case Op_RoundDoubleModeV:
 3100       if (VM_Version::supports_avx() == false) {
 3101         return false; // 128bit vroundpd is not available
 3102       }
 3103       break;
 3104     case Op_LoadVectorGather:
 3105     case Op_LoadVectorGatherMasked:
 3106       if (UseAVX < 2) {
 3107         return false;
 3108       }
 3109       break;
 3110     case Op_FmaF:
 3111     case Op_FmaD:
 3112     case Op_FmaVD:
 3113     case Op_FmaVF:
 3114       if (!UseFMA) {
 3115         return false;
 3116       }
 3117       break;
 3118     case Op_MacroLogicV:
 3119       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3120         return false;
 3121       }
 3122       break;
 3123 
 3124     case Op_VectorCmpMasked:
 3125     case Op_VectorMaskGen:
 3126       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3127         return false;
 3128       }
 3129       break;
 3130     case Op_VectorMaskFirstTrue:
 3131     case Op_VectorMaskLastTrue:
 3132     case Op_VectorMaskTrueCount:
 3133     case Op_VectorMaskToLong:
 3134       if (UseAVX < 1) {
 3135          return false;
 3136       }
 3137       break;
 3138     case Op_RoundF:
 3139     case Op_RoundD:
 3140       break;
 3141     case Op_CopySignD:
 3142     case Op_CopySignF:
 3143       if (UseAVX < 3)  {
 3144         return false;
 3145       }
 3146       if (!VM_Version::supports_avx512vl()) {
 3147         return false;
 3148       }
 3149       break;
 3150     case Op_CompressBits:
 3151     case Op_ExpandBits:
 3152       if (!VM_Version::supports_bmi2()) {
 3153         return false;
 3154       }
 3155       break;
 3156     case Op_CompressM:
 3157       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3158         return false;
 3159       }
 3160       break;
 3161     case Op_ConvF2HF:
 3162     case Op_ConvHF2F:
 3163       if (!VM_Version::supports_float16()) {
 3164         return false;
 3165       }
 3166       break;
 3167     case Op_VectorCastF2HF:
 3168     case Op_VectorCastHF2F:
 3169       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3170         return false;
 3171       }
 3172       break;
 3173   }
 3174   return true;  // Match rules are supported by default.
 3175 }
 3176 
 3177 //------------------------------------------------------------------------
 3178 
 3179 static inline bool is_pop_count_instr_target(BasicType bt) {
 3180   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3181          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3182 }
 3183 
 3184 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3185   return match_rule_supported_vector(opcode, vlen, bt);
 3186 }
 3187 
 3188 // Identify extra cases that we might want to provide match rules for vector nodes and
 3189 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3190 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3191   if (!match_rule_supported(opcode)) {
 3192     return false;
 3193   }
 3194   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3195   //   * SSE2 supports 128bit vectors for all types;
 3196   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3197   //   * AVX2 supports 256bit vectors for all types;
 3198   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3199   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3200   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3201   // And MaxVectorSize is taken into account as well.
 3202   if (!vector_size_supported(bt, vlen)) {
 3203     return false;
 3204   }
 3205   // Special cases which require vector length follow:
 3206   //   * implementation limitations
 3207   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3208   //   * 128bit vroundpd instruction is present only in AVX1
 3209   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3210   switch (opcode) {
 3211     case Op_MaxVHF:
 3212     case Op_MinVHF:
 3213       if (!VM_Version::supports_avx512bw()) {
 3214         return false;
 3215       }
 3216     case Op_AddVHF:
 3217     case Op_DivVHF:
 3218     case Op_FmaVHF:
 3219     case Op_MulVHF:
 3220     case Op_SubVHF:
 3221     case Op_SqrtVHF:
 3222       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3223         return false;
 3224       }
 3225       if (!VM_Version::supports_avx512_fp16()) {
 3226         return false;
 3227       }
 3228       break;
 3229     case Op_AbsVF:
 3230     case Op_NegVF:
 3231       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3232         return false; // 512bit vandps and vxorps are not available
 3233       }
 3234       break;
 3235     case Op_AbsVD:
 3236     case Op_NegVD:
 3237       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3238         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3239       }
 3240       break;
 3241     case Op_RotateRightV:
 3242     case Op_RotateLeftV:
 3243       if (bt != T_INT && bt != T_LONG) {
 3244         return false;
 3245       } // fallthrough
 3246     case Op_MacroLogicV:
 3247       if (!VM_Version::supports_evex() ||
 3248           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3249         return false;
 3250       }
 3251       break;
 3252     case Op_ClearArray:
 3253     case Op_VectorMaskGen:
 3254     case Op_VectorCmpMasked:
 3255       if (!VM_Version::supports_avx512bw()) {
 3256         return false;
 3257       }
 3258       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3259         return false;
 3260       }
 3261       break;
 3262     case Op_LoadVectorMasked:
 3263     case Op_StoreVectorMasked:
 3264       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3265         return false;
 3266       }
 3267       break;
 3268     case Op_UMinV:
 3269     case Op_UMaxV:
 3270       if (UseAVX == 0) {
 3271         return false;
 3272       }
 3273       break;
 3274     case Op_MaxV:
 3275     case Op_MinV:
 3276       if (UseSSE < 4 && is_integral_type(bt)) {
 3277         return false;
 3278       }
 3279       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3280           // Float/Double intrinsics are enabled for AVX family currently.
 3281           if (UseAVX == 0) {
 3282             return false;
 3283           }
 3284           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3285             return false;
 3286           }
 3287       }
 3288       break;
 3289     case Op_CallLeafVector:
 3290       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3291         return false;
 3292       }
 3293       break;
 3294     case Op_AddReductionVI:
 3295       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3296         return false;
 3297       }
 3298       // fallthrough
 3299     case Op_AndReductionV:
 3300     case Op_OrReductionV:
 3301     case Op_XorReductionV:
 3302       if (is_subword_type(bt) && (UseSSE < 4)) {
 3303         return false;
 3304       }
 3305       break;
 3306     case Op_MinReductionV:
 3307     case Op_MaxReductionV:
 3308       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3309         return false;
 3310       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3311         return false;
 3312       }
 3313       // Float/Double intrinsics enabled for AVX family.
 3314       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3315         return false;
 3316       }
 3317       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3318         return false;
 3319       }
 3320       break;
 3321     case Op_VectorTest:
 3322       if (UseSSE < 4) {
 3323         return false; // Implementation limitation
 3324       } else if (size_in_bits < 32) {
 3325         return false; // Implementation limitation
 3326       }
 3327       break;
 3328     case Op_VectorLoadShuffle:
 3329     case Op_VectorRearrange:
 3330       if(vlen == 2) {
 3331         return false; // Implementation limitation due to how shuffle is loaded
 3332       } else if (size_in_bits == 256 && UseAVX < 2) {
 3333         return false; // Implementation limitation
 3334       }
 3335       break;
 3336     case Op_VectorLoadMask:
 3337     case Op_VectorMaskCast:
 3338       if (size_in_bits == 256 && UseAVX < 2) {
 3339         return false; // Implementation limitation
 3340       }
 3341       // fallthrough
 3342     case Op_VectorStoreMask:
 3343       if (vlen == 2) {
 3344         return false; // Implementation limitation
 3345       }
 3346       break;
 3347     case Op_PopulateIndex:
 3348       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3349         return false;
 3350       }
 3351       break;
 3352     case Op_VectorCastB2X:
 3353     case Op_VectorCastS2X:
 3354     case Op_VectorCastI2X:
 3355       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3356         return false;
 3357       }
 3358       break;
 3359     case Op_VectorCastL2X:
 3360       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3361         return false;
 3362       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3363         return false;
 3364       }
 3365       break;
 3366     case Op_VectorCastF2X: {
 3367         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3368         // happen after intermediate conversion to integer and special handling
 3369         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3370         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3371         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3372           return false;
 3373         }
 3374       }
 3375       // fallthrough
 3376     case Op_VectorCastD2X:
 3377       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3378         return false;
 3379       }
 3380       break;
 3381     case Op_VectorCastF2HF:
 3382     case Op_VectorCastHF2F:
 3383       if (!VM_Version::supports_f16c() &&
 3384          ((!VM_Version::supports_evex() ||
 3385          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3386         return false;
 3387       }
 3388       break;
 3389     case Op_RoundVD:
 3390       if (!VM_Version::supports_avx512dq()) {
 3391         return false;
 3392       }
 3393       break;
 3394     case Op_MulReductionVI:
 3395       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3396         return false;
 3397       }
 3398       break;
 3399     case Op_LoadVectorGatherMasked:
 3400       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3401         return false;
 3402       }
 3403       if (is_subword_type(bt) &&
 3404          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3405           (size_in_bits < 64)                                      ||
 3406           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3407         return false;
 3408       }
 3409       break;
 3410     case Op_StoreVectorScatterMasked:
 3411     case Op_StoreVectorScatter:
 3412       if (is_subword_type(bt)) {
 3413         return false;
 3414       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3415         return false;
 3416       }
 3417       // fallthrough
 3418     case Op_LoadVectorGather:
 3419       if (!is_subword_type(bt) && size_in_bits == 64) {
 3420         return false;
 3421       }
 3422       if (is_subword_type(bt) && size_in_bits < 64) {
 3423         return false;
 3424       }
 3425       break;
 3426     case Op_SaturatingAddV:
 3427     case Op_SaturatingSubV:
 3428       if (UseAVX < 1) {
 3429         return false; // Implementation limitation
 3430       }
 3431       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3432         return false;
 3433       }
 3434       break;
 3435     case Op_SelectFromTwoVector:
 3436        if (size_in_bits < 128) {
 3437          return false;
 3438        }
 3439        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3440          return false;
 3441        }
 3442        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3443          return false;
 3444        }
 3445        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3446          return false;
 3447        }
 3448        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3449          return false;
 3450        }
 3451        break;
 3452     case Op_MaskAll:
 3453       if (!VM_Version::supports_evex()) {
 3454         return false;
 3455       }
 3456       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3457         return false;
 3458       }
 3459       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3460         return false;
 3461       }
 3462       break;
 3463     case Op_VectorMaskCmp:
 3464       if (vlen < 2 || size_in_bits < 32) {
 3465         return false;
 3466       }
 3467       break;
 3468     case Op_CompressM:
 3469       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3470         return false;
 3471       }
 3472       break;
 3473     case Op_CompressV:
 3474     case Op_ExpandV:
 3475       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3476         return false;
 3477       }
 3478       if (size_in_bits < 128 ) {
 3479         return false;
 3480       }
 3481     case Op_VectorLongToMask:
 3482       if (UseAVX < 1) {
 3483         return false;
 3484       }
 3485       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3486         return false;
 3487       }
 3488       break;
 3489     case Op_SignumVD:
 3490     case Op_SignumVF:
 3491       if (UseAVX < 1) {
 3492         return false;
 3493       }
 3494       break;
 3495     case Op_PopCountVI:
 3496     case Op_PopCountVL: {
 3497         if (!is_pop_count_instr_target(bt) &&
 3498             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3499           return false;
 3500         }
 3501       }
 3502       break;
 3503     case Op_ReverseV:
 3504     case Op_ReverseBytesV:
 3505       if (UseAVX < 2) {
 3506         return false;
 3507       }
 3508       break;
 3509     case Op_CountTrailingZerosV:
 3510     case Op_CountLeadingZerosV:
 3511       if (UseAVX < 2) {
 3512         return false;
 3513       }
 3514       break;
 3515   }
 3516   return true;  // Per default match rules are supported.
 3517 }
 3518 
 3519 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3520   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3521   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3522   // of their non-masked counterpart with mask edge being the differentiator.
 3523   // This routine does a strict check on the existence of masked operation patterns
 3524   // by returning a default false value for all the other opcodes apart from the
 3525   // ones whose masked instruction patterns are defined in this file.
 3526   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3527     return false;
 3528   }
 3529 
 3530   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3531   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3532     return false;
 3533   }
 3534   switch(opcode) {
 3535     // Unary masked operations
 3536     case Op_AbsVB:
 3537     case Op_AbsVS:
 3538       if(!VM_Version::supports_avx512bw()) {
 3539         return false;  // Implementation limitation
 3540       }
 3541     case Op_AbsVI:
 3542     case Op_AbsVL:
 3543       return true;
 3544 
 3545     // Ternary masked operations
 3546     case Op_FmaVF:
 3547     case Op_FmaVD:
 3548       return true;
 3549 
 3550     case Op_MacroLogicV:
 3551       if(bt != T_INT && bt != T_LONG) {
 3552         return false;
 3553       }
 3554       return true;
 3555 
 3556     // Binary masked operations
 3557     case Op_AddVB:
 3558     case Op_AddVS:
 3559     case Op_SubVB:
 3560     case Op_SubVS:
 3561     case Op_MulVS:
 3562     case Op_LShiftVS:
 3563     case Op_RShiftVS:
 3564     case Op_URShiftVS:
 3565       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3566       if (!VM_Version::supports_avx512bw()) {
 3567         return false;  // Implementation limitation
 3568       }
 3569       return true;
 3570 
 3571     case Op_MulVL:
 3572       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3573       if (!VM_Version::supports_avx512dq()) {
 3574         return false;  // Implementation limitation
 3575       }
 3576       return true;
 3577 
 3578     case Op_AndV:
 3579     case Op_OrV:
 3580     case Op_XorV:
 3581     case Op_RotateRightV:
 3582     case Op_RotateLeftV:
 3583       if (bt != T_INT && bt != T_LONG) {
 3584         return false; // Implementation limitation
 3585       }
 3586       return true;
 3587 
 3588     case Op_VectorLoadMask:
 3589       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3590       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3591         return false;
 3592       }
 3593       return true;
 3594 
 3595     case Op_AddVI:
 3596     case Op_AddVL:
 3597     case Op_AddVF:
 3598     case Op_AddVD:
 3599     case Op_SubVI:
 3600     case Op_SubVL:
 3601     case Op_SubVF:
 3602     case Op_SubVD:
 3603     case Op_MulVI:
 3604     case Op_MulVF:
 3605     case Op_MulVD:
 3606     case Op_DivVF:
 3607     case Op_DivVD:
 3608     case Op_SqrtVF:
 3609     case Op_SqrtVD:
 3610     case Op_LShiftVI:
 3611     case Op_LShiftVL:
 3612     case Op_RShiftVI:
 3613     case Op_RShiftVL:
 3614     case Op_URShiftVI:
 3615     case Op_URShiftVL:
 3616     case Op_LoadVectorMasked:
 3617     case Op_StoreVectorMasked:
 3618     case Op_LoadVectorGatherMasked:
 3619     case Op_StoreVectorScatterMasked:
 3620       return true;
 3621 
 3622     case Op_UMinV:
 3623     case Op_UMaxV:
 3624       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3625         return false;
 3626       } // fallthrough
 3627     case Op_MaxV:
 3628     case Op_MinV:
 3629       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3630         return false; // Implementation limitation
 3631       }
 3632       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3633         return false; // Implementation limitation
 3634       }
 3635       return true;
 3636     case Op_SaturatingAddV:
 3637     case Op_SaturatingSubV:
 3638       if (!is_subword_type(bt)) {
 3639         return false;
 3640       }
 3641       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3642         return false; // Implementation limitation
 3643       }
 3644       return true;
 3645 
 3646     case Op_VectorMaskCmp:
 3647       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3648         return false; // Implementation limitation
 3649       }
 3650       return true;
 3651 
 3652     case Op_VectorRearrange:
 3653       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3654         return false; // Implementation limitation
 3655       }
 3656       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3657         return false; // Implementation limitation
 3658       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3659         return false; // Implementation limitation
 3660       }
 3661       return true;
 3662 
 3663     // Binary Logical operations
 3664     case Op_AndVMask:
 3665     case Op_OrVMask:
 3666     case Op_XorVMask:
 3667       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3668         return false; // Implementation limitation
 3669       }
 3670       return true;
 3671 
 3672     case Op_PopCountVI:
 3673     case Op_PopCountVL:
 3674       if (!is_pop_count_instr_target(bt)) {
 3675         return false;
 3676       }
 3677       return true;
 3678 
 3679     case Op_MaskAll:
 3680       return true;
 3681 
 3682     case Op_CountLeadingZerosV:
 3683       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3684         return true;
 3685       }
 3686     default:
 3687       return false;
 3688   }
 3689 }
 3690 
 3691 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3692   return false;
 3693 }
 3694 
 3695 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3696 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3697   switch (elem_bt) {
 3698     case T_BYTE:  return false;
 3699     case T_SHORT: return !VM_Version::supports_avx512bw();
 3700     case T_INT:   return !VM_Version::supports_avx();
 3701     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3702     default:
 3703       ShouldNotReachHere();
 3704       return false;
 3705   }
 3706 }
 3707 
 3708 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3709   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3710   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3711   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3712       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3713     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3714     return new legVecZOper();
 3715   }
 3716   if (legacy) {
 3717     switch (ideal_reg) {
 3718       case Op_VecS: return new legVecSOper();
 3719       case Op_VecD: return new legVecDOper();
 3720       case Op_VecX: return new legVecXOper();
 3721       case Op_VecY: return new legVecYOper();
 3722       case Op_VecZ: return new legVecZOper();
 3723     }
 3724   } else {
 3725     switch (ideal_reg) {
 3726       case Op_VecS: return new vecSOper();
 3727       case Op_VecD: return new vecDOper();
 3728       case Op_VecX: return new vecXOper();
 3729       case Op_VecY: return new vecYOper();
 3730       case Op_VecZ: return new vecZOper();
 3731     }
 3732   }
 3733   ShouldNotReachHere();
 3734   return nullptr;
 3735 }
 3736 
 3737 bool Matcher::is_reg2reg_move(MachNode* m) {
 3738   switch (m->rule()) {
 3739     case MoveVec2Leg_rule:
 3740     case MoveLeg2Vec_rule:
 3741     case MoveF2VL_rule:
 3742     case MoveF2LEG_rule:
 3743     case MoveVL2F_rule:
 3744     case MoveLEG2F_rule:
 3745     case MoveD2VL_rule:
 3746     case MoveD2LEG_rule:
 3747     case MoveVL2D_rule:
 3748     case MoveLEG2D_rule:
 3749       return true;
 3750     default:
 3751       return false;
 3752   }
 3753 }
 3754 
 3755 bool Matcher::is_generic_vector(MachOper* opnd) {
 3756   switch (opnd->opcode()) {
 3757     case VEC:
 3758     case LEGVEC:
 3759       return true;
 3760     default:
 3761       return false;
 3762   }
 3763 }
 3764 
 3765 //------------------------------------------------------------------------
 3766 
 3767 const RegMask* Matcher::predicate_reg_mask(void) {
 3768   return &_VECTMASK_REG_mask;
 3769 }
 3770 
 3771 // Max vector size in bytes. 0 if not supported.
 3772 int Matcher::vector_width_in_bytes(BasicType bt) {
 3773   assert(is_java_primitive(bt), "only primitive type vectors");
 3774   // SSE2 supports 128bit vectors for all types.
 3775   // AVX2 supports 256bit vectors for all types.
 3776   // AVX2/EVEX supports 512bit vectors for all types.
 3777   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3778   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3779   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3780     size = (UseAVX > 2) ? 64 : 32;
 3781   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3782     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3783   // Use flag to limit vector size.
 3784   size = MIN2(size,(int)MaxVectorSize);
 3785   // Minimum 2 values in vector (or 4 for bytes).
 3786   switch (bt) {
 3787   case T_DOUBLE:
 3788   case T_LONG:
 3789     if (size < 16) return 0;
 3790     break;
 3791   case T_FLOAT:
 3792   case T_INT:
 3793     if (size < 8) return 0;
 3794     break;
 3795   case T_BOOLEAN:
 3796     if (size < 4) return 0;
 3797     break;
 3798   case T_CHAR:
 3799     if (size < 4) return 0;
 3800     break;
 3801   case T_BYTE:
 3802     if (size < 4) return 0;
 3803     break;
 3804   case T_SHORT:
 3805     if (size < 4) return 0;
 3806     break;
 3807   default:
 3808     ShouldNotReachHere();
 3809   }
 3810   return size;
 3811 }
 3812 
 3813 // Limits on vector size (number of elements) loaded into vector.
 3814 int Matcher::max_vector_size(const BasicType bt) {
 3815   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3816 }
 3817 int Matcher::min_vector_size(const BasicType bt) {
 3818   int max_size = max_vector_size(bt);
 3819   // Min size which can be loaded into vector is 4 bytes.
 3820   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3821   // Support for calling svml double64 vectors
 3822   if (bt == T_DOUBLE) {
 3823     size = 1;
 3824   }
 3825   return MIN2(size,max_size);
 3826 }
 3827 
 3828 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3829   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3830   // by default on Cascade Lake
 3831   if (VM_Version::is_default_intel_cascade_lake()) {
 3832     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3833   }
 3834   return Matcher::max_vector_size(bt);
 3835 }
 3836 
 3837 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3838   return -1;
 3839 }
 3840 
 3841 // Vector ideal reg corresponding to specified size in bytes
 3842 uint Matcher::vector_ideal_reg(int size) {
 3843   assert(MaxVectorSize >= size, "");
 3844   switch(size) {
 3845     case  4: return Op_VecS;
 3846     case  8: return Op_VecD;
 3847     case 16: return Op_VecX;
 3848     case 32: return Op_VecY;
 3849     case 64: return Op_VecZ;
 3850   }
 3851   ShouldNotReachHere();
 3852   return 0;
 3853 }
 3854 
 3855 // Check for shift by small constant as well
 3856 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3857   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3858       shift->in(2)->get_int() <= 3 &&
 3859       // Are there other uses besides address expressions?
 3860       !matcher->is_visited(shift)) {
 3861     address_visited.set(shift->_idx); // Flag as address_visited
 3862     mstack.push(shift->in(2), Matcher::Visit);
 3863     Node *conv = shift->in(1);
 3864     // Allow Matcher to match the rule which bypass
 3865     // ConvI2L operation for an array index on LP64
 3866     // if the index value is positive.
 3867     if (conv->Opcode() == Op_ConvI2L &&
 3868         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3869         // Are there other uses besides address expressions?
 3870         !matcher->is_visited(conv)) {
 3871       address_visited.set(conv->_idx); // Flag as address_visited
 3872       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3873     } else {
 3874       mstack.push(conv, Matcher::Pre_Visit);
 3875     }
 3876     return true;
 3877   }
 3878   return false;
 3879 }
 3880 
 3881 // This function identifies sub-graphs in which a 'load' node is
 3882 // input to two different nodes, and such that it can be matched
 3883 // with BMI instructions like blsi, blsr, etc.
 3884 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3885 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3886 // refers to the same node.
 3887 //
 3888 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3889 // This is a temporary solution until we make DAGs expressible in ADL.
 3890 template<typename ConType>
 3891 class FusedPatternMatcher {
 3892   Node* _op1_node;
 3893   Node* _mop_node;
 3894   int _con_op;
 3895 
 3896   static int match_next(Node* n, int next_op, int next_op_idx) {
 3897     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3898       return -1;
 3899     }
 3900 
 3901     if (next_op_idx == -1) { // n is commutative, try rotations
 3902       if (n->in(1)->Opcode() == next_op) {
 3903         return 1;
 3904       } else if (n->in(2)->Opcode() == next_op) {
 3905         return 2;
 3906       }
 3907     } else {
 3908       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 3909       if (n->in(next_op_idx)->Opcode() == next_op) {
 3910         return next_op_idx;
 3911       }
 3912     }
 3913     return -1;
 3914   }
 3915 
 3916  public:
 3917   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 3918     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 3919 
 3920   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 3921              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 3922              typename ConType::NativeType con_value) {
 3923     if (_op1_node->Opcode() != op1) {
 3924       return false;
 3925     }
 3926     if (_mop_node->outcnt() > 2) {
 3927       return false;
 3928     }
 3929     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 3930     if (op1_op2_idx == -1) {
 3931       return false;
 3932     }
 3933     // Memory operation must be the other edge
 3934     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 3935 
 3936     // Check that the mop node is really what we want
 3937     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 3938       Node* op2_node = _op1_node->in(op1_op2_idx);
 3939       if (op2_node->outcnt() > 1) {
 3940         return false;
 3941       }
 3942       assert(op2_node->Opcode() == op2, "Should be");
 3943       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 3944       if (op2_con_idx == -1) {
 3945         return false;
 3946       }
 3947       // Memory operation must be the other edge
 3948       int op2_mop_idx = (op2_con_idx & 1) + 1;
 3949       // Check that the memory operation is the same node
 3950       if (op2_node->in(op2_mop_idx) == _mop_node) {
 3951         // Now check the constant
 3952         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 3953         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 3954           return true;
 3955         }
 3956       }
 3957     }
 3958     return false;
 3959   }
 3960 };
 3961 
 3962 static bool is_bmi_pattern(Node* n, Node* m) {
 3963   assert(UseBMI1Instructions, "sanity");
 3964   if (n != nullptr && m != nullptr) {
 3965     if (m->Opcode() == Op_LoadI) {
 3966       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 3967       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 3968              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 3969              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 3970     } else if (m->Opcode() == Op_LoadL) {
 3971       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 3972       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 3973              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 3974              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 3975     }
 3976   }
 3977   return false;
 3978 }
 3979 
 3980 // Should the matcher clone input 'm' of node 'n'?
 3981 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 3982   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 3983   if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
 3984     mstack.push(m, Visit);
 3985     return true;
 3986   }
 3987   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 3988     mstack.push(m, Visit);           // m = ShiftCntV
 3989     return true;
 3990   }
 3991   if (is_encode_and_store_pattern(n, m)) {
 3992     mstack.push(m, Visit);
 3993     return true;
 3994   }
 3995   return false;
 3996 }
 3997 
 3998 // Should the Matcher clone shifts on addressing modes, expecting them
 3999 // to be subsumed into complex addressing expressions or compute them
 4000 // into registers?
 4001 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4002   Node *off = m->in(AddPNode::Offset);
 4003   if (off->is_Con()) {
 4004     address_visited.test_set(m->_idx); // Flag as address_visited
 4005     Node *adr = m->in(AddPNode::Address);
 4006 
 4007     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4008     // AtomicAdd is not an addressing expression.
 4009     // Cheap to find it by looking for screwy base.
 4010     if (adr->is_AddP() &&
 4011         !adr->in(AddPNode::Base)->is_top() &&
 4012         !adr->in(AddPNode::Offset)->is_Con() &&
 4013         off->get_long() == (int) (off->get_long()) && // immL32
 4014         // Are there other uses besides address expressions?
 4015         !is_visited(adr)) {
 4016       address_visited.set(adr->_idx); // Flag as address_visited
 4017       Node *shift = adr->in(AddPNode::Offset);
 4018       if (!clone_shift(shift, this, mstack, address_visited)) {
 4019         mstack.push(shift, Pre_Visit);
 4020       }
 4021       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4022       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4023     } else {
 4024       mstack.push(adr, Pre_Visit);
 4025     }
 4026 
 4027     // Clone X+offset as it also folds into most addressing expressions
 4028     mstack.push(off, Visit);
 4029     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4030     return true;
 4031   } else if (clone_shift(off, this, mstack, address_visited)) {
 4032     address_visited.test_set(m->_idx); // Flag as address_visited
 4033     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4034     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4035     return true;
 4036   }
 4037   return false;
 4038 }
 4039 
 4040 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4041   switch (bt) {
 4042     case BoolTest::eq:
 4043       return Assembler::eq;
 4044     case BoolTest::ne:
 4045       return Assembler::neq;
 4046     case BoolTest::le:
 4047     case BoolTest::ule:
 4048       return Assembler::le;
 4049     case BoolTest::ge:
 4050     case BoolTest::uge:
 4051       return Assembler::nlt;
 4052     case BoolTest::lt:
 4053     case BoolTest::ult:
 4054       return Assembler::lt;
 4055     case BoolTest::gt:
 4056     case BoolTest::ugt:
 4057       return Assembler::nle;
 4058     default : ShouldNotReachHere(); return Assembler::_false;
 4059   }
 4060 }
 4061 
 4062 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4063   switch (bt) {
 4064   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4065   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4066   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4067   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4068   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4069   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4070   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4071   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4072   }
 4073 }
 4074 
 4075 // Helper methods for MachSpillCopyNode::implementation().
 4076 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4077                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4078   assert(ireg == Op_VecS || // 32bit vector
 4079          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4080           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4081          "no non-adjacent vector moves" );
 4082   if (masm) {
 4083     switch (ireg) {
 4084     case Op_VecS: // copy whole register
 4085     case Op_VecD:
 4086     case Op_VecX:
 4087       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4088         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4089       } else {
 4090         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4091      }
 4092       break;
 4093     case Op_VecY:
 4094       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4095         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4096       } else {
 4097         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4098      }
 4099       break;
 4100     case Op_VecZ:
 4101       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4102       break;
 4103     default:
 4104       ShouldNotReachHere();
 4105     }
 4106 #ifndef PRODUCT
 4107   } else {
 4108     switch (ireg) {
 4109     case Op_VecS:
 4110     case Op_VecD:
 4111     case Op_VecX:
 4112       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4113       break;
 4114     case Op_VecY:
 4115     case Op_VecZ:
 4116       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4117       break;
 4118     default:
 4119       ShouldNotReachHere();
 4120     }
 4121 #endif
 4122   }
 4123 }
 4124 
 4125 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4126                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4127   if (masm) {
 4128     if (is_load) {
 4129       switch (ireg) {
 4130       case Op_VecS:
 4131         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4132         break;
 4133       case Op_VecD:
 4134         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4135         break;
 4136       case Op_VecX:
 4137         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4138           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4139         } else {
 4140           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4141           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4142         }
 4143         break;
 4144       case Op_VecY:
 4145         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4146           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4147         } else {
 4148           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4149           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4150         }
 4151         break;
 4152       case Op_VecZ:
 4153         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4154         break;
 4155       default:
 4156         ShouldNotReachHere();
 4157       }
 4158     } else { // store
 4159       switch (ireg) {
 4160       case Op_VecS:
 4161         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4162         break;
 4163       case Op_VecD:
 4164         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4165         break;
 4166       case Op_VecX:
 4167         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4168           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4169         }
 4170         else {
 4171           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4172         }
 4173         break;
 4174       case Op_VecY:
 4175         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4176           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4177         }
 4178         else {
 4179           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4180         }
 4181         break;
 4182       case Op_VecZ:
 4183         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4184         break;
 4185       default:
 4186         ShouldNotReachHere();
 4187       }
 4188     }
 4189 #ifndef PRODUCT
 4190   } else {
 4191     if (is_load) {
 4192       switch (ireg) {
 4193       case Op_VecS:
 4194         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4195         break;
 4196       case Op_VecD:
 4197         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4198         break;
 4199        case Op_VecX:
 4200         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4201         break;
 4202       case Op_VecY:
 4203       case Op_VecZ:
 4204         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4205         break;
 4206       default:
 4207         ShouldNotReachHere();
 4208       }
 4209     } else { // store
 4210       switch (ireg) {
 4211       case Op_VecS:
 4212         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4213         break;
 4214       case Op_VecD:
 4215         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4216         break;
 4217        case Op_VecX:
 4218         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4219         break;
 4220       case Op_VecY:
 4221       case Op_VecZ:
 4222         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4223         break;
 4224       default:
 4225         ShouldNotReachHere();
 4226       }
 4227     }
 4228 #endif
 4229   }
 4230 }
 4231 
 4232 template <class T>
 4233 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4234   int size = type2aelembytes(bt) * len;
 4235   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4236   for (int i = 0; i < len; i++) {
 4237     int offset = i * type2aelembytes(bt);
 4238     switch (bt) {
 4239       case T_BYTE: val->at(i) = con; break;
 4240       case T_SHORT: {
 4241         jshort c = con;
 4242         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4243         break;
 4244       }
 4245       case T_INT: {
 4246         jint c = con;
 4247         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4248         break;
 4249       }
 4250       case T_LONG: {
 4251         jlong c = con;
 4252         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4253         break;
 4254       }
 4255       case T_FLOAT: {
 4256         jfloat c = con;
 4257         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4258         break;
 4259       }
 4260       case T_DOUBLE: {
 4261         jdouble c = con;
 4262         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4263         break;
 4264       }
 4265       default: assert(false, "%s", type2name(bt));
 4266     }
 4267   }
 4268   return val;
 4269 }
 4270 
 4271 static inline jlong high_bit_set(BasicType bt) {
 4272   switch (bt) {
 4273     case T_BYTE:  return 0x8080808080808080;
 4274     case T_SHORT: return 0x8000800080008000;
 4275     case T_INT:   return 0x8000000080000000;
 4276     case T_LONG:  return 0x8000000000000000;
 4277     default:
 4278       ShouldNotReachHere();
 4279       return 0;
 4280   }
 4281 }
 4282 
 4283 #ifndef PRODUCT
 4284   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4285     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4286   }
 4287 #endif
 4288 
 4289   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4290     __ nop(_count);
 4291   }
 4292 
 4293   uint MachNopNode::size(PhaseRegAlloc*) const {
 4294     return _count;
 4295   }
 4296 
 4297 #ifndef PRODUCT
 4298   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4299     st->print("# breakpoint");
 4300   }
 4301 #endif
 4302 
 4303   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4304     __ int3();
 4305   }
 4306 
 4307   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4308     return MachNode::size(ra_);
 4309   }
 4310 
 4311 %}
 4312 
 4313 //----------ENCODING BLOCK-----------------------------------------------------
 4314 // This block specifies the encoding classes used by the compiler to
 4315 // output byte streams.  Encoding classes are parameterized macros
 4316 // used by Machine Instruction Nodes in order to generate the bit
 4317 // encoding of the instruction.  Operands specify their base encoding
 4318 // interface with the interface keyword.  There are currently
 4319 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4320 // COND_INTER.  REG_INTER causes an operand to generate a function
 4321 // which returns its register number when queried.  CONST_INTER causes
 4322 // an operand to generate a function which returns the value of the
 4323 // constant when queried.  MEMORY_INTER causes an operand to generate
 4324 // four functions which return the Base Register, the Index Register,
 4325 // the Scale Value, and the Offset Value of the operand when queried.
 4326 // COND_INTER causes an operand to generate six functions which return
 4327 // the encoding code (ie - encoding bits for the instruction)
 4328 // associated with each basic boolean condition for a conditional
 4329 // instruction.
 4330 //
 4331 // Instructions specify two basic values for encoding.  Again, a
 4332 // function is available to check if the constant displacement is an
 4333 // oop. They use the ins_encode keyword to specify their encoding
 4334 // classes (which must be a sequence of enc_class names, and their
 4335 // parameters, specified in the encoding block), and they use the
 4336 // opcode keyword to specify, in order, their primary, secondary, and
 4337 // tertiary opcode.  Only the opcode sections which a particular
 4338 // instruction needs for encoding need to be specified.
 4339 encode %{
 4340   enc_class cdql_enc(no_rax_rdx_RegI div)
 4341   %{
 4342     // Full implementation of Java idiv and irem; checks for
 4343     // special case as described in JVM spec., p.243 & p.271.
 4344     //
 4345     //         normal case                           special case
 4346     //
 4347     // input : rax: dividend                         min_int
 4348     //         reg: divisor                          -1
 4349     //
 4350     // output: rax: quotient  (= rax idiv reg)       min_int
 4351     //         rdx: remainder (= rax irem reg)       0
 4352     //
 4353     //  Code sequnce:
 4354     //
 4355     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4356     //    5:   75 07/08                jne    e <normal>
 4357     //    7:   33 d2                   xor    %edx,%edx
 4358     //  [div >= 8 -> offset + 1]
 4359     //  [REX_B]
 4360     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4361     //    c:   74 03/04                je     11 <done>
 4362     // 000000000000000e <normal>:
 4363     //    e:   99                      cltd
 4364     //  [div >= 8 -> offset + 1]
 4365     //  [REX_B]
 4366     //    f:   f7 f9                   idiv   $div
 4367     // 0000000000000011 <done>:
 4368     Label normal;
 4369     Label done;
 4370 
 4371     // cmp    $0x80000000,%eax
 4372     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4373 
 4374     // jne    e <normal>
 4375     __ jccb(Assembler::notEqual, normal);
 4376 
 4377     // xor    %edx,%edx
 4378     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4379 
 4380     // cmp    $0xffffffffffffffff,%ecx
 4381     __ cmpl($div$$Register, -1);
 4382 
 4383     // je     11 <done>
 4384     __ jccb(Assembler::equal, done);
 4385 
 4386     // <normal>
 4387     // cltd
 4388     __ bind(normal);
 4389     __ cdql();
 4390 
 4391     // idivl
 4392     // <done>
 4393     __ idivl($div$$Register);
 4394     __ bind(done);
 4395   %}
 4396 
 4397   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4398   %{
 4399     // Full implementation of Java ldiv and lrem; checks for
 4400     // special case as described in JVM spec., p.243 & p.271.
 4401     //
 4402     //         normal case                           special case
 4403     //
 4404     // input : rax: dividend                         min_long
 4405     //         reg: divisor                          -1
 4406     //
 4407     // output: rax: quotient  (= rax idiv reg)       min_long
 4408     //         rdx: remainder (= rax irem reg)       0
 4409     //
 4410     //  Code sequnce:
 4411     //
 4412     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4413     //    7:   00 00 80
 4414     //    a:   48 39 d0                cmp    %rdx,%rax
 4415     //    d:   75 08                   jne    17 <normal>
 4416     //    f:   33 d2                   xor    %edx,%edx
 4417     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4418     //   15:   74 05                   je     1c <done>
 4419     // 0000000000000017 <normal>:
 4420     //   17:   48 99                   cqto
 4421     //   19:   48 f7 f9                idiv   $div
 4422     // 000000000000001c <done>:
 4423     Label normal;
 4424     Label done;
 4425 
 4426     // mov    $0x8000000000000000,%rdx
 4427     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4428 
 4429     // cmp    %rdx,%rax
 4430     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4431 
 4432     // jne    17 <normal>
 4433     __ jccb(Assembler::notEqual, normal);
 4434 
 4435     // xor    %edx,%edx
 4436     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4437 
 4438     // cmp    $0xffffffffffffffff,$div
 4439     __ cmpq($div$$Register, -1);
 4440 
 4441     // je     1e <done>
 4442     __ jccb(Assembler::equal, done);
 4443 
 4444     // <normal>
 4445     // cqto
 4446     __ bind(normal);
 4447     __ cdqq();
 4448 
 4449     // idivq (note: must be emitted by the user of this rule)
 4450     // <done>
 4451     __ idivq($div$$Register);
 4452     __ bind(done);
 4453   %}
 4454 
 4455   enc_class clear_avx %{
 4456     DEBUG_ONLY(int off0 = __ offset());
 4457     if (generate_vzeroupper(Compile::current())) {
 4458       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4459       // Clear upper bits of YMM registers when current compiled code uses
 4460       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4461       __ vzeroupper();
 4462     }
 4463     DEBUG_ONLY(int off1 = __ offset());
 4464     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4465   %}
 4466 
 4467   enc_class Java_To_Runtime(method meth) %{
 4468     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4469     __ call(r10);
 4470     __ post_call_nop();
 4471   %}
 4472 
 4473   enc_class Java_Static_Call(method meth)
 4474   %{
 4475     // JAVA STATIC CALL
 4476     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4477     // determine who we intended to call.
 4478     if (!_method) {
 4479       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4480     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4481       // The NOP here is purely to ensure that eliding a call to
 4482       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4483       __ addr_nop_5();
 4484       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4485     } else {
 4486       int method_index = resolved_method_index(masm);
 4487       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4488                                                   : static_call_Relocation::spec(method_index);
 4489       address mark = __ pc();
 4490       int call_offset = __ offset();
 4491       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4492       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4493         // Calls of the same statically bound method can share
 4494         // a stub to the interpreter.
 4495         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4496       } else {
 4497         // Emit stubs for static call.
 4498         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4499         __ clear_inst_mark();
 4500         if (stub == nullptr) {
 4501           ciEnv::current()->record_failure("CodeCache is full");
 4502           return;
 4503         }
 4504       }
 4505     }
 4506     __ post_call_nop();
 4507   %}
 4508 
 4509   enc_class Java_Dynamic_Call(method meth) %{
 4510     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4511     __ post_call_nop();
 4512   %}
 4513 
 4514   enc_class call_epilog %{
 4515     if (VerifyStackAtCalls) {
 4516       // Check that stack depth is unchanged: find majik cookie on stack
 4517       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4518       Label L;
 4519       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4520       __ jccb(Assembler::equal, L);
 4521       // Die if stack mismatch
 4522       __ int3();
 4523       __ bind(L);
 4524     }
 4525   %}
 4526 
 4527 %}
 4528 
 4529 //----------FRAME--------------------------------------------------------------
 4530 // Definition of frame structure and management information.
 4531 //
 4532 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4533 //                             |   (to get allocators register number
 4534 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4535 //  r   CALLER     |        |
 4536 //  o     |        +--------+      pad to even-align allocators stack-slot
 4537 //  w     V        |  pad0  |        numbers; owned by CALLER
 4538 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4539 //  h     ^        |   in   |  5
 4540 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4541 //  |     |        |        |  3
 4542 //  |     |        +--------+
 4543 //  V     |        | old out|      Empty on Intel, window on Sparc
 4544 //        |    old |preserve|      Must be even aligned.
 4545 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4546 //        |        |   in   |  3   area for Intel ret address
 4547 //     Owned by    |preserve|      Empty on Sparc.
 4548 //       SELF      +--------+
 4549 //        |        |  pad2  |  2   pad to align old SP
 4550 //        |        +--------+  1
 4551 //        |        | locks  |  0
 4552 //        |        +--------+----> OptoReg::stack0(), even aligned
 4553 //        |        |  pad1  | 11   pad to align new SP
 4554 //        |        +--------+
 4555 //        |        |        | 10
 4556 //        |        | spills |  9   spills
 4557 //        V        |        |  8   (pad0 slot for callee)
 4558 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4559 //        ^        |  out   |  7
 4560 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4561 //     Owned by    +--------+
 4562 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4563 //        |    new |preserve|      Must be even-aligned.
 4564 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4565 //        |        |        |
 4566 //
 4567 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4568 //         known from SELF's arguments and the Java calling convention.
 4569 //         Region 6-7 is determined per call site.
 4570 // Note 2: If the calling convention leaves holes in the incoming argument
 4571 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4572 //         are owned by the CALLEE.  Holes should not be necessary in the
 4573 //         incoming area, as the Java calling convention is completely under
 4574 //         the control of the AD file.  Doubles can be sorted and packed to
 4575 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4576 //         varargs C calling conventions.
 4577 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4578 //         even aligned with pad0 as needed.
 4579 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4580 //         region 6-11 is even aligned; it may be padded out more so that
 4581 //         the region from SP to FP meets the minimum stack alignment.
 4582 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4583 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4584 //         SP meets the minimum alignment.
 4585 
 4586 frame
 4587 %{
 4588   // These three registers define part of the calling convention
 4589   // between compiled code and the interpreter.
 4590   inline_cache_reg(RAX);                // Inline Cache Register
 4591 
 4592   // Optional: name the operand used by cisc-spilling to access
 4593   // [stack_pointer + offset]
 4594   cisc_spilling_operand_name(indOffset32);
 4595 
 4596   // Number of stack slots consumed by locking an object
 4597   sync_stack_slots(2);
 4598 
 4599   // Compiled code's Frame Pointer
 4600   frame_pointer(RSP);
 4601 
 4602   // Interpreter stores its frame pointer in a register which is
 4603   // stored to the stack by I2CAdaptors.
 4604   // I2CAdaptors convert from interpreted java to compiled java.
 4605   interpreter_frame_pointer(RBP);
 4606 
 4607   // Stack alignment requirement
 4608   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4609 
 4610   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4611   // for calls to C.  Supports the var-args backing area for register parms.
 4612   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4613 
 4614   // The after-PROLOG location of the return address.  Location of
 4615   // return address specifies a type (REG or STACK) and a number
 4616   // representing the register number (i.e. - use a register name) or
 4617   // stack slot.
 4618   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4619   // Otherwise, it is above the locks and verification slot and alignment word
 4620   return_addr(STACK - 2 +
 4621               align_up((Compile::current()->in_preserve_stack_slots() +
 4622                         Compile::current()->fixed_slots()),
 4623                        stack_alignment_in_slots()));
 4624 
 4625   // Location of compiled Java return values.  Same as C for now.
 4626   return_value
 4627   %{
 4628     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4629            "only return normal values");
 4630 
 4631     static const int lo[Op_RegL + 1] = {
 4632       0,
 4633       0,
 4634       RAX_num,  // Op_RegN
 4635       RAX_num,  // Op_RegI
 4636       RAX_num,  // Op_RegP
 4637       XMM0_num, // Op_RegF
 4638       XMM0_num, // Op_RegD
 4639       RAX_num   // Op_RegL
 4640     };
 4641     static const int hi[Op_RegL + 1] = {
 4642       0,
 4643       0,
 4644       OptoReg::Bad, // Op_RegN
 4645       OptoReg::Bad, // Op_RegI
 4646       RAX_H_num,    // Op_RegP
 4647       OptoReg::Bad, // Op_RegF
 4648       XMM0b_num,    // Op_RegD
 4649       RAX_H_num     // Op_RegL
 4650     };
 4651     // Excluded flags and vector registers.
 4652     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4653     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4654   %}
 4655 %}
 4656 
 4657 //----------ATTRIBUTES---------------------------------------------------------
 4658 //----------Operand Attributes-------------------------------------------------
 4659 op_attrib op_cost(0);        // Required cost attribute
 4660 
 4661 //----------Instruction Attributes---------------------------------------------
 4662 ins_attrib ins_cost(100);       // Required cost attribute
 4663 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4664 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4665                                 // a non-matching short branch variant
 4666                                 // of some long branch?
 4667 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4668                                 // be a power of 2) specifies the
 4669                                 // alignment that some part of the
 4670                                 // instruction (not necessarily the
 4671                                 // start) requires.  If > 1, a
 4672                                 // compute_padding() function must be
 4673                                 // provided for the instruction
 4674 
 4675 // Whether this node is expanded during code emission into a sequence of
 4676 // instructions and the first instruction can perform an implicit null check.
 4677 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4678 
 4679 //----------OPERANDS-----------------------------------------------------------
 4680 // Operand definitions must precede instruction definitions for correct parsing
 4681 // in the ADLC because operands constitute user defined types which are used in
 4682 // instruction definitions.
 4683 
 4684 //----------Simple Operands----------------------------------------------------
 4685 // Immediate Operands
 4686 // Integer Immediate
 4687 operand immI()
 4688 %{
 4689   match(ConI);
 4690 
 4691   op_cost(10);
 4692   format %{ %}
 4693   interface(CONST_INTER);
 4694 %}
 4695 
 4696 // Constant for test vs zero
 4697 operand immI_0()
 4698 %{
 4699   predicate(n->get_int() == 0);
 4700   match(ConI);
 4701 
 4702   op_cost(0);
 4703   format %{ %}
 4704   interface(CONST_INTER);
 4705 %}
 4706 
 4707 // Constant for increment
 4708 operand immI_1()
 4709 %{
 4710   predicate(n->get_int() == 1);
 4711   match(ConI);
 4712 
 4713   op_cost(0);
 4714   format %{ %}
 4715   interface(CONST_INTER);
 4716 %}
 4717 
 4718 // Constant for decrement
 4719 operand immI_M1()
 4720 %{
 4721   predicate(n->get_int() == -1);
 4722   match(ConI);
 4723 
 4724   op_cost(0);
 4725   format %{ %}
 4726   interface(CONST_INTER);
 4727 %}
 4728 
 4729 operand immI_2()
 4730 %{
 4731   predicate(n->get_int() == 2);
 4732   match(ConI);
 4733 
 4734   op_cost(0);
 4735   format %{ %}
 4736   interface(CONST_INTER);
 4737 %}
 4738 
 4739 operand immI_4()
 4740 %{
 4741   predicate(n->get_int() == 4);
 4742   match(ConI);
 4743 
 4744   op_cost(0);
 4745   format %{ %}
 4746   interface(CONST_INTER);
 4747 %}
 4748 
 4749 operand immI_8()
 4750 %{
 4751   predicate(n->get_int() == 8);
 4752   match(ConI);
 4753 
 4754   op_cost(0);
 4755   format %{ %}
 4756   interface(CONST_INTER);
 4757 %}
 4758 
 4759 // Valid scale values for addressing modes
 4760 operand immI2()
 4761 %{
 4762   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4763   match(ConI);
 4764 
 4765   format %{ %}
 4766   interface(CONST_INTER);
 4767 %}
 4768 
 4769 operand immU7()
 4770 %{
 4771   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4772   match(ConI);
 4773 
 4774   op_cost(5);
 4775   format %{ %}
 4776   interface(CONST_INTER);
 4777 %}
 4778 
 4779 operand immI8()
 4780 %{
 4781   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4782   match(ConI);
 4783 
 4784   op_cost(5);
 4785   format %{ %}
 4786   interface(CONST_INTER);
 4787 %}
 4788 
 4789 operand immU8()
 4790 %{
 4791   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4792   match(ConI);
 4793 
 4794   op_cost(5);
 4795   format %{ %}
 4796   interface(CONST_INTER);
 4797 %}
 4798 
 4799 operand immI16()
 4800 %{
 4801   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4802   match(ConI);
 4803 
 4804   op_cost(10);
 4805   format %{ %}
 4806   interface(CONST_INTER);
 4807 %}
 4808 
 4809 // Int Immediate non-negative
 4810 operand immU31()
 4811 %{
 4812   predicate(n->get_int() >= 0);
 4813   match(ConI);
 4814 
 4815   op_cost(0);
 4816   format %{ %}
 4817   interface(CONST_INTER);
 4818 %}
 4819 
 4820 // Pointer Immediate
 4821 operand immP()
 4822 %{
 4823   match(ConP);
 4824 
 4825   op_cost(10);
 4826   format %{ %}
 4827   interface(CONST_INTER);
 4828 %}
 4829 
 4830 // Null Pointer Immediate
 4831 operand immP0()
 4832 %{
 4833   predicate(n->get_ptr() == 0);
 4834   match(ConP);
 4835 
 4836   op_cost(5);
 4837   format %{ %}
 4838   interface(CONST_INTER);
 4839 %}
 4840 
 4841 // Pointer Immediate
 4842 operand immN() %{
 4843   match(ConN);
 4844 
 4845   op_cost(10);
 4846   format %{ %}
 4847   interface(CONST_INTER);
 4848 %}
 4849 
 4850 operand immNKlass() %{
 4851   match(ConNKlass);
 4852 
 4853   op_cost(10);
 4854   format %{ %}
 4855   interface(CONST_INTER);
 4856 %}
 4857 
 4858 // Null Pointer Immediate
 4859 operand immN0() %{
 4860   predicate(n->get_narrowcon() == 0);
 4861   match(ConN);
 4862 
 4863   op_cost(5);
 4864   format %{ %}
 4865   interface(CONST_INTER);
 4866 %}
 4867 
 4868 operand immP31()
 4869 %{
 4870   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 4871             && (n->get_ptr() >> 31) == 0);
 4872   match(ConP);
 4873 
 4874   op_cost(5);
 4875   format %{ %}
 4876   interface(CONST_INTER);
 4877 %}
 4878 
 4879 
 4880 // Long Immediate
 4881 operand immL()
 4882 %{
 4883   match(ConL);
 4884 
 4885   op_cost(20);
 4886   format %{ %}
 4887   interface(CONST_INTER);
 4888 %}
 4889 
 4890 // Long Immediate 8-bit
 4891 operand immL8()
 4892 %{
 4893   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 4894   match(ConL);
 4895 
 4896   op_cost(5);
 4897   format %{ %}
 4898   interface(CONST_INTER);
 4899 %}
 4900 
 4901 // Long Immediate 32-bit unsigned
 4902 operand immUL32()
 4903 %{
 4904   predicate(n->get_long() == (unsigned int) (n->get_long()));
 4905   match(ConL);
 4906 
 4907   op_cost(10);
 4908   format %{ %}
 4909   interface(CONST_INTER);
 4910 %}
 4911 
 4912 // Long Immediate 32-bit signed
 4913 operand immL32()
 4914 %{
 4915   predicate(n->get_long() == (int) (n->get_long()));
 4916   match(ConL);
 4917 
 4918   op_cost(15);
 4919   format %{ %}
 4920   interface(CONST_INTER);
 4921 %}
 4922 
 4923 operand immL_Pow2()
 4924 %{
 4925   predicate(is_power_of_2((julong)n->get_long()));
 4926   match(ConL);
 4927 
 4928   op_cost(15);
 4929   format %{ %}
 4930   interface(CONST_INTER);
 4931 %}
 4932 
 4933 operand immL_NotPow2()
 4934 %{
 4935   predicate(is_power_of_2((julong)~n->get_long()));
 4936   match(ConL);
 4937 
 4938   op_cost(15);
 4939   format %{ %}
 4940   interface(CONST_INTER);
 4941 %}
 4942 
 4943 // Long Immediate zero
 4944 operand immL0()
 4945 %{
 4946   predicate(n->get_long() == 0L);
 4947   match(ConL);
 4948 
 4949   op_cost(10);
 4950   format %{ %}
 4951   interface(CONST_INTER);
 4952 %}
 4953 
 4954 // Constant for increment
 4955 operand immL1()
 4956 %{
 4957   predicate(n->get_long() == 1);
 4958   match(ConL);
 4959 
 4960   format %{ %}
 4961   interface(CONST_INTER);
 4962 %}
 4963 
 4964 // Constant for decrement
 4965 operand immL_M1()
 4966 %{
 4967   predicate(n->get_long() == -1);
 4968   match(ConL);
 4969 
 4970   format %{ %}
 4971   interface(CONST_INTER);
 4972 %}
 4973 
 4974 // Long Immediate: low 32-bit mask
 4975 operand immL_32bits()
 4976 %{
 4977   predicate(n->get_long() == 0xFFFFFFFFL);
 4978   match(ConL);
 4979   op_cost(20);
 4980 
 4981   format %{ %}
 4982   interface(CONST_INTER);
 4983 %}
 4984 
 4985 // Int Immediate: 2^n-1, positive
 4986 operand immI_Pow2M1()
 4987 %{
 4988   predicate((n->get_int() > 0)
 4989             && is_power_of_2((juint)n->get_int() + 1));
 4990   match(ConI);
 4991 
 4992   op_cost(20);
 4993   format %{ %}
 4994   interface(CONST_INTER);
 4995 %}
 4996 
 4997 // Float Immediate zero
 4998 operand immF0()
 4999 %{
 5000   predicate(jint_cast(n->getf()) == 0);
 5001   match(ConF);
 5002 
 5003   op_cost(5);
 5004   format %{ %}
 5005   interface(CONST_INTER);
 5006 %}
 5007 
 5008 // Float Immediate
 5009 operand immF()
 5010 %{
 5011   match(ConF);
 5012 
 5013   op_cost(15);
 5014   format %{ %}
 5015   interface(CONST_INTER);
 5016 %}
 5017 
 5018 // Half Float Immediate
 5019 operand immH()
 5020 %{
 5021   match(ConH);
 5022 
 5023   op_cost(15);
 5024   format %{ %}
 5025   interface(CONST_INTER);
 5026 %}
 5027 
 5028 // Double Immediate zero
 5029 operand immD0()
 5030 %{
 5031   predicate(jlong_cast(n->getd()) == 0);
 5032   match(ConD);
 5033 
 5034   op_cost(5);
 5035   format %{ %}
 5036   interface(CONST_INTER);
 5037 %}
 5038 
 5039 // Double Immediate
 5040 operand immD()
 5041 %{
 5042   match(ConD);
 5043 
 5044   op_cost(15);
 5045   format %{ %}
 5046   interface(CONST_INTER);
 5047 %}
 5048 
 5049 // Immediates for special shifts (sign extend)
 5050 
 5051 // Constants for increment
 5052 operand immI_16()
 5053 %{
 5054   predicate(n->get_int() == 16);
 5055   match(ConI);
 5056 
 5057   format %{ %}
 5058   interface(CONST_INTER);
 5059 %}
 5060 
 5061 operand immI_24()
 5062 %{
 5063   predicate(n->get_int() == 24);
 5064   match(ConI);
 5065 
 5066   format %{ %}
 5067   interface(CONST_INTER);
 5068 %}
 5069 
 5070 // Constant for byte-wide masking
 5071 operand immI_255()
 5072 %{
 5073   predicate(n->get_int() == 255);
 5074   match(ConI);
 5075 
 5076   format %{ %}
 5077   interface(CONST_INTER);
 5078 %}
 5079 
 5080 // Constant for short-wide masking
 5081 operand immI_65535()
 5082 %{
 5083   predicate(n->get_int() == 65535);
 5084   match(ConI);
 5085 
 5086   format %{ %}
 5087   interface(CONST_INTER);
 5088 %}
 5089 
 5090 // Constant for byte-wide masking
 5091 operand immL_255()
 5092 %{
 5093   predicate(n->get_long() == 255);
 5094   match(ConL);
 5095 
 5096   format %{ %}
 5097   interface(CONST_INTER);
 5098 %}
 5099 
 5100 // Constant for short-wide masking
 5101 operand immL_65535()
 5102 %{
 5103   predicate(n->get_long() == 65535);
 5104   match(ConL);
 5105 
 5106   format %{ %}
 5107   interface(CONST_INTER);
 5108 %}
 5109 
 5110 operand kReg()
 5111 %{
 5112   constraint(ALLOC_IN_RC(vectmask_reg));
 5113   match(RegVectMask);
 5114   format %{%}
 5115   interface(REG_INTER);
 5116 %}
 5117 
 5118 // Register Operands
 5119 // Integer Register
 5120 operand rRegI()
 5121 %{
 5122   constraint(ALLOC_IN_RC(int_reg));
 5123   match(RegI);
 5124 
 5125   match(rax_RegI);
 5126   match(rbx_RegI);
 5127   match(rcx_RegI);
 5128   match(rdx_RegI);
 5129   match(rdi_RegI);
 5130 
 5131   format %{ %}
 5132   interface(REG_INTER);
 5133 %}
 5134 
 5135 // Special Registers
 5136 operand rax_RegI()
 5137 %{
 5138   constraint(ALLOC_IN_RC(int_rax_reg));
 5139   match(RegI);
 5140   match(rRegI);
 5141 
 5142   format %{ "RAX" %}
 5143   interface(REG_INTER);
 5144 %}
 5145 
 5146 // Special Registers
 5147 operand rbx_RegI()
 5148 %{
 5149   constraint(ALLOC_IN_RC(int_rbx_reg));
 5150   match(RegI);
 5151   match(rRegI);
 5152 
 5153   format %{ "RBX" %}
 5154   interface(REG_INTER);
 5155 %}
 5156 
 5157 operand rcx_RegI()
 5158 %{
 5159   constraint(ALLOC_IN_RC(int_rcx_reg));
 5160   match(RegI);
 5161   match(rRegI);
 5162 
 5163   format %{ "RCX" %}
 5164   interface(REG_INTER);
 5165 %}
 5166 
 5167 operand rdx_RegI()
 5168 %{
 5169   constraint(ALLOC_IN_RC(int_rdx_reg));
 5170   match(RegI);
 5171   match(rRegI);
 5172 
 5173   format %{ "RDX" %}
 5174   interface(REG_INTER);
 5175 %}
 5176 
 5177 operand rdi_RegI()
 5178 %{
 5179   constraint(ALLOC_IN_RC(int_rdi_reg));
 5180   match(RegI);
 5181   match(rRegI);
 5182 
 5183   format %{ "RDI" %}
 5184   interface(REG_INTER);
 5185 %}
 5186 
 5187 operand no_rax_rdx_RegI()
 5188 %{
 5189   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5190   match(RegI);
 5191   match(rbx_RegI);
 5192   match(rcx_RegI);
 5193   match(rdi_RegI);
 5194 
 5195   format %{ %}
 5196   interface(REG_INTER);
 5197 %}
 5198 
 5199 operand no_rbp_r13_RegI()
 5200 %{
 5201   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5202   match(RegI);
 5203   match(rRegI);
 5204   match(rax_RegI);
 5205   match(rbx_RegI);
 5206   match(rcx_RegI);
 5207   match(rdx_RegI);
 5208   match(rdi_RegI);
 5209 
 5210   format %{ %}
 5211   interface(REG_INTER);
 5212 %}
 5213 
 5214 // Pointer Register
 5215 operand any_RegP()
 5216 %{
 5217   constraint(ALLOC_IN_RC(any_reg));
 5218   match(RegP);
 5219   match(rax_RegP);
 5220   match(rbx_RegP);
 5221   match(rdi_RegP);
 5222   match(rsi_RegP);
 5223   match(rbp_RegP);
 5224   match(r15_RegP);
 5225   match(rRegP);
 5226 
 5227   format %{ %}
 5228   interface(REG_INTER);
 5229 %}
 5230 
 5231 operand rRegP()
 5232 %{
 5233   constraint(ALLOC_IN_RC(ptr_reg));
 5234   match(RegP);
 5235   match(rax_RegP);
 5236   match(rbx_RegP);
 5237   match(rdi_RegP);
 5238   match(rsi_RegP);
 5239   match(rbp_RegP);  // See Q&A below about
 5240   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5241 
 5242   format %{ %}
 5243   interface(REG_INTER);
 5244 %}
 5245 
 5246 operand rRegN() %{
 5247   constraint(ALLOC_IN_RC(int_reg));
 5248   match(RegN);
 5249 
 5250   format %{ %}
 5251   interface(REG_INTER);
 5252 %}
 5253 
 5254 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5255 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5256 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5257 // The output of an instruction is controlled by the allocator, which respects
 5258 // register class masks, not match rules.  Unless an instruction mentions
 5259 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5260 // by the allocator as an input.
 5261 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5262 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5263 // result, RBP is not included in the output of the instruction either.
 5264 
 5265 // This operand is not allowed to use RBP even if
 5266 // RBP is not used to hold the frame pointer.
 5267 operand no_rbp_RegP()
 5268 %{
 5269   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5270   match(RegP);
 5271   match(rbx_RegP);
 5272   match(rsi_RegP);
 5273   match(rdi_RegP);
 5274 
 5275   format %{ %}
 5276   interface(REG_INTER);
 5277 %}
 5278 
 5279 // Special Registers
 5280 // Return a pointer value
 5281 operand rax_RegP()
 5282 %{
 5283   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5284   match(RegP);
 5285   match(rRegP);
 5286 
 5287   format %{ %}
 5288   interface(REG_INTER);
 5289 %}
 5290 
 5291 // Special Registers
 5292 // Return a compressed pointer value
 5293 operand rax_RegN()
 5294 %{
 5295   constraint(ALLOC_IN_RC(int_rax_reg));
 5296   match(RegN);
 5297   match(rRegN);
 5298 
 5299   format %{ %}
 5300   interface(REG_INTER);
 5301 %}
 5302 
 5303 // Used in AtomicAdd
 5304 operand rbx_RegP()
 5305 %{
 5306   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5307   match(RegP);
 5308   match(rRegP);
 5309 
 5310   format %{ %}
 5311   interface(REG_INTER);
 5312 %}
 5313 
 5314 operand rsi_RegP()
 5315 %{
 5316   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5317   match(RegP);
 5318   match(rRegP);
 5319 
 5320   format %{ %}
 5321   interface(REG_INTER);
 5322 %}
 5323 
 5324 operand rbp_RegP()
 5325 %{
 5326   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5327   match(RegP);
 5328   match(rRegP);
 5329 
 5330   format %{ %}
 5331   interface(REG_INTER);
 5332 %}
 5333 
 5334 // Used in rep stosq
 5335 operand rdi_RegP()
 5336 %{
 5337   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5338   match(RegP);
 5339   match(rRegP);
 5340 
 5341   format %{ %}
 5342   interface(REG_INTER);
 5343 %}
 5344 
 5345 operand r15_RegP()
 5346 %{
 5347   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5348   match(RegP);
 5349   match(rRegP);
 5350 
 5351   format %{ %}
 5352   interface(REG_INTER);
 5353 %}
 5354 
 5355 operand rRegL()
 5356 %{
 5357   constraint(ALLOC_IN_RC(long_reg));
 5358   match(RegL);
 5359   match(rax_RegL);
 5360   match(rdx_RegL);
 5361 
 5362   format %{ %}
 5363   interface(REG_INTER);
 5364 %}
 5365 
 5366 // Special Registers
 5367 operand no_rax_rdx_RegL()
 5368 %{
 5369   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5370   match(RegL);
 5371   match(rRegL);
 5372 
 5373   format %{ %}
 5374   interface(REG_INTER);
 5375 %}
 5376 
 5377 operand rax_RegL()
 5378 %{
 5379   constraint(ALLOC_IN_RC(long_rax_reg));
 5380   match(RegL);
 5381   match(rRegL);
 5382 
 5383   format %{ "RAX" %}
 5384   interface(REG_INTER);
 5385 %}
 5386 
 5387 operand rcx_RegL()
 5388 %{
 5389   constraint(ALLOC_IN_RC(long_rcx_reg));
 5390   match(RegL);
 5391   match(rRegL);
 5392 
 5393   format %{ %}
 5394   interface(REG_INTER);
 5395 %}
 5396 
 5397 operand rdx_RegL()
 5398 %{
 5399   constraint(ALLOC_IN_RC(long_rdx_reg));
 5400   match(RegL);
 5401   match(rRegL);
 5402 
 5403   format %{ %}
 5404   interface(REG_INTER);
 5405 %}
 5406 
 5407 operand r11_RegL()
 5408 %{
 5409   constraint(ALLOC_IN_RC(long_r11_reg));
 5410   match(RegL);
 5411   match(rRegL);
 5412 
 5413   format %{ %}
 5414   interface(REG_INTER);
 5415 %}
 5416 
 5417 operand no_rbp_r13_RegL()
 5418 %{
 5419   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5420   match(RegL);
 5421   match(rRegL);
 5422   match(rax_RegL);
 5423   match(rcx_RegL);
 5424   match(rdx_RegL);
 5425 
 5426   format %{ %}
 5427   interface(REG_INTER);
 5428 %}
 5429 
 5430 // Flags register, used as output of compare instructions
 5431 operand rFlagsReg()
 5432 %{
 5433   constraint(ALLOC_IN_RC(int_flags));
 5434   match(RegFlags);
 5435 
 5436   format %{ "RFLAGS" %}
 5437   interface(REG_INTER);
 5438 %}
 5439 
 5440 // Flags register, used as output of FLOATING POINT compare instructions
 5441 operand rFlagsRegU()
 5442 %{
 5443   constraint(ALLOC_IN_RC(int_flags));
 5444   match(RegFlags);
 5445 
 5446   format %{ "RFLAGS_U" %}
 5447   interface(REG_INTER);
 5448 %}
 5449 
 5450 operand rFlagsRegUCF() %{
 5451   constraint(ALLOC_IN_RC(int_flags));
 5452   match(RegFlags);
 5453   predicate(false);
 5454 
 5455   format %{ "RFLAGS_U_CF" %}
 5456   interface(REG_INTER);
 5457 %}
 5458 
 5459 // Float register operands
 5460 operand regF() %{
 5461    constraint(ALLOC_IN_RC(float_reg));
 5462    match(RegF);
 5463 
 5464    format %{ %}
 5465    interface(REG_INTER);
 5466 %}
 5467 
 5468 // Float register operands
 5469 operand legRegF() %{
 5470    constraint(ALLOC_IN_RC(float_reg_legacy));
 5471    match(RegF);
 5472 
 5473    format %{ %}
 5474    interface(REG_INTER);
 5475 %}
 5476 
 5477 // Float register operands
 5478 operand vlRegF() %{
 5479    constraint(ALLOC_IN_RC(float_reg_vl));
 5480    match(RegF);
 5481 
 5482    format %{ %}
 5483    interface(REG_INTER);
 5484 %}
 5485 
 5486 // Double register operands
 5487 operand regD() %{
 5488    constraint(ALLOC_IN_RC(double_reg));
 5489    match(RegD);
 5490 
 5491    format %{ %}
 5492    interface(REG_INTER);
 5493 %}
 5494 
 5495 // Double register operands
 5496 operand legRegD() %{
 5497    constraint(ALLOC_IN_RC(double_reg_legacy));
 5498    match(RegD);
 5499 
 5500    format %{ %}
 5501    interface(REG_INTER);
 5502 %}
 5503 
 5504 // Double register operands
 5505 operand vlRegD() %{
 5506    constraint(ALLOC_IN_RC(double_reg_vl));
 5507    match(RegD);
 5508 
 5509    format %{ %}
 5510    interface(REG_INTER);
 5511 %}
 5512 
 5513 //----------Memory Operands----------------------------------------------------
 5514 // Direct Memory Operand
 5515 // operand direct(immP addr)
 5516 // %{
 5517 //   match(addr);
 5518 
 5519 //   format %{ "[$addr]" %}
 5520 //   interface(MEMORY_INTER) %{
 5521 //     base(0xFFFFFFFF);
 5522 //     index(0x4);
 5523 //     scale(0x0);
 5524 //     disp($addr);
 5525 //   %}
 5526 // %}
 5527 
 5528 // Indirect Memory Operand
 5529 operand indirect(any_RegP reg)
 5530 %{
 5531   constraint(ALLOC_IN_RC(ptr_reg));
 5532   match(reg);
 5533 
 5534   format %{ "[$reg]" %}
 5535   interface(MEMORY_INTER) %{
 5536     base($reg);
 5537     index(0x4);
 5538     scale(0x0);
 5539     disp(0x0);
 5540   %}
 5541 %}
 5542 
 5543 // Indirect Memory Plus Short Offset Operand
 5544 operand indOffset8(any_RegP reg, immL8 off)
 5545 %{
 5546   constraint(ALLOC_IN_RC(ptr_reg));
 5547   match(AddP reg off);
 5548 
 5549   format %{ "[$reg + $off (8-bit)]" %}
 5550   interface(MEMORY_INTER) %{
 5551     base($reg);
 5552     index(0x4);
 5553     scale(0x0);
 5554     disp($off);
 5555   %}
 5556 %}
 5557 
 5558 // Indirect Memory Plus Long Offset Operand
 5559 operand indOffset32(any_RegP reg, immL32 off)
 5560 %{
 5561   constraint(ALLOC_IN_RC(ptr_reg));
 5562   match(AddP reg off);
 5563 
 5564   format %{ "[$reg + $off (32-bit)]" %}
 5565   interface(MEMORY_INTER) %{
 5566     base($reg);
 5567     index(0x4);
 5568     scale(0x0);
 5569     disp($off);
 5570   %}
 5571 %}
 5572 
 5573 // Indirect Memory Plus Index Register Plus Offset Operand
 5574 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5575 %{
 5576   constraint(ALLOC_IN_RC(ptr_reg));
 5577   match(AddP (AddP reg lreg) off);
 5578 
 5579   op_cost(10);
 5580   format %{"[$reg + $off + $lreg]" %}
 5581   interface(MEMORY_INTER) %{
 5582     base($reg);
 5583     index($lreg);
 5584     scale(0x0);
 5585     disp($off);
 5586   %}
 5587 %}
 5588 
 5589 // Indirect Memory Plus Index Register Plus Offset Operand
 5590 operand indIndex(any_RegP reg, rRegL lreg)
 5591 %{
 5592   constraint(ALLOC_IN_RC(ptr_reg));
 5593   match(AddP reg lreg);
 5594 
 5595   op_cost(10);
 5596   format %{"[$reg + $lreg]" %}
 5597   interface(MEMORY_INTER) %{
 5598     base($reg);
 5599     index($lreg);
 5600     scale(0x0);
 5601     disp(0x0);
 5602   %}
 5603 %}
 5604 
 5605 // Indirect Memory Times Scale Plus Index Register
 5606 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5607 %{
 5608   constraint(ALLOC_IN_RC(ptr_reg));
 5609   match(AddP reg (LShiftL lreg scale));
 5610 
 5611   op_cost(10);
 5612   format %{"[$reg + $lreg << $scale]" %}
 5613   interface(MEMORY_INTER) %{
 5614     base($reg);
 5615     index($lreg);
 5616     scale($scale);
 5617     disp(0x0);
 5618   %}
 5619 %}
 5620 
 5621 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5622 %{
 5623   constraint(ALLOC_IN_RC(ptr_reg));
 5624   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5625   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5626 
 5627   op_cost(10);
 5628   format %{"[$reg + pos $idx << $scale]" %}
 5629   interface(MEMORY_INTER) %{
 5630     base($reg);
 5631     index($idx);
 5632     scale($scale);
 5633     disp(0x0);
 5634   %}
 5635 %}
 5636 
 5637 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5638 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5639 %{
 5640   constraint(ALLOC_IN_RC(ptr_reg));
 5641   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5642 
 5643   op_cost(10);
 5644   format %{"[$reg + $off + $lreg << $scale]" %}
 5645   interface(MEMORY_INTER) %{
 5646     base($reg);
 5647     index($lreg);
 5648     scale($scale);
 5649     disp($off);
 5650   %}
 5651 %}
 5652 
 5653 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5654 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5655 %{
 5656   constraint(ALLOC_IN_RC(ptr_reg));
 5657   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5658   match(AddP (AddP reg (ConvI2L idx)) off);
 5659 
 5660   op_cost(10);
 5661   format %{"[$reg + $off + $idx]" %}
 5662   interface(MEMORY_INTER) %{
 5663     base($reg);
 5664     index($idx);
 5665     scale(0x0);
 5666     disp($off);
 5667   %}
 5668 %}
 5669 
 5670 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5671 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5672 %{
 5673   constraint(ALLOC_IN_RC(ptr_reg));
 5674   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5675   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5676 
 5677   op_cost(10);
 5678   format %{"[$reg + $off + $idx << $scale]" %}
 5679   interface(MEMORY_INTER) %{
 5680     base($reg);
 5681     index($idx);
 5682     scale($scale);
 5683     disp($off);
 5684   %}
 5685 %}
 5686 
 5687 // Indirect Narrow Oop Plus Offset Operand
 5688 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5689 // we can't free r12 even with CompressedOops::base() == nullptr.
 5690 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5691   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5692   constraint(ALLOC_IN_RC(ptr_reg));
 5693   match(AddP (DecodeN reg) off);
 5694 
 5695   op_cost(10);
 5696   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5697   interface(MEMORY_INTER) %{
 5698     base(0xc); // R12
 5699     index($reg);
 5700     scale(0x3);
 5701     disp($off);
 5702   %}
 5703 %}
 5704 
 5705 // Indirect Memory Operand
 5706 operand indirectNarrow(rRegN reg)
 5707 %{
 5708   predicate(CompressedOops::shift() == 0);
 5709   constraint(ALLOC_IN_RC(ptr_reg));
 5710   match(DecodeN reg);
 5711 
 5712   format %{ "[$reg]" %}
 5713   interface(MEMORY_INTER) %{
 5714     base($reg);
 5715     index(0x4);
 5716     scale(0x0);
 5717     disp(0x0);
 5718   %}
 5719 %}
 5720 
 5721 // Indirect Memory Plus Short Offset Operand
 5722 operand indOffset8Narrow(rRegN reg, immL8 off)
 5723 %{
 5724   predicate(CompressedOops::shift() == 0);
 5725   constraint(ALLOC_IN_RC(ptr_reg));
 5726   match(AddP (DecodeN reg) off);
 5727 
 5728   format %{ "[$reg + $off (8-bit)]" %}
 5729   interface(MEMORY_INTER) %{
 5730     base($reg);
 5731     index(0x4);
 5732     scale(0x0);
 5733     disp($off);
 5734   %}
 5735 %}
 5736 
 5737 // Indirect Memory Plus Long Offset Operand
 5738 operand indOffset32Narrow(rRegN reg, immL32 off)
 5739 %{
 5740   predicate(CompressedOops::shift() == 0);
 5741   constraint(ALLOC_IN_RC(ptr_reg));
 5742   match(AddP (DecodeN reg) off);
 5743 
 5744   format %{ "[$reg + $off (32-bit)]" %}
 5745   interface(MEMORY_INTER) %{
 5746     base($reg);
 5747     index(0x4);
 5748     scale(0x0);
 5749     disp($off);
 5750   %}
 5751 %}
 5752 
 5753 // Indirect Memory Plus Index Register Plus Offset Operand
 5754 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5755 %{
 5756   predicate(CompressedOops::shift() == 0);
 5757   constraint(ALLOC_IN_RC(ptr_reg));
 5758   match(AddP (AddP (DecodeN reg) lreg) off);
 5759 
 5760   op_cost(10);
 5761   format %{"[$reg + $off + $lreg]" %}
 5762   interface(MEMORY_INTER) %{
 5763     base($reg);
 5764     index($lreg);
 5765     scale(0x0);
 5766     disp($off);
 5767   %}
 5768 %}
 5769 
 5770 // Indirect Memory Plus Index Register Plus Offset Operand
 5771 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5772 %{
 5773   predicate(CompressedOops::shift() == 0);
 5774   constraint(ALLOC_IN_RC(ptr_reg));
 5775   match(AddP (DecodeN reg) lreg);
 5776 
 5777   op_cost(10);
 5778   format %{"[$reg + $lreg]" %}
 5779   interface(MEMORY_INTER) %{
 5780     base($reg);
 5781     index($lreg);
 5782     scale(0x0);
 5783     disp(0x0);
 5784   %}
 5785 %}
 5786 
 5787 // Indirect Memory Times Scale Plus Index Register
 5788 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5789 %{
 5790   predicate(CompressedOops::shift() == 0);
 5791   constraint(ALLOC_IN_RC(ptr_reg));
 5792   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5793 
 5794   op_cost(10);
 5795   format %{"[$reg + $lreg << $scale]" %}
 5796   interface(MEMORY_INTER) %{
 5797     base($reg);
 5798     index($lreg);
 5799     scale($scale);
 5800     disp(0x0);
 5801   %}
 5802 %}
 5803 
 5804 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5805 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5806 %{
 5807   predicate(CompressedOops::shift() == 0);
 5808   constraint(ALLOC_IN_RC(ptr_reg));
 5809   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5810 
 5811   op_cost(10);
 5812   format %{"[$reg + $off + $lreg << $scale]" %}
 5813   interface(MEMORY_INTER) %{
 5814     base($reg);
 5815     index($lreg);
 5816     scale($scale);
 5817     disp($off);
 5818   %}
 5819 %}
 5820 
 5821 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5822 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5823 %{
 5824   constraint(ALLOC_IN_RC(ptr_reg));
 5825   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5826   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5827 
 5828   op_cost(10);
 5829   format %{"[$reg + $off + $idx]" %}
 5830   interface(MEMORY_INTER) %{
 5831     base($reg);
 5832     index($idx);
 5833     scale(0x0);
 5834     disp($off);
 5835   %}
 5836 %}
 5837 
 5838 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5839 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 5840 %{
 5841   constraint(ALLOC_IN_RC(ptr_reg));
 5842   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5843   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 5844 
 5845   op_cost(10);
 5846   format %{"[$reg + $off + $idx << $scale]" %}
 5847   interface(MEMORY_INTER) %{
 5848     base($reg);
 5849     index($idx);
 5850     scale($scale);
 5851     disp($off);
 5852   %}
 5853 %}
 5854 
 5855 //----------Special Memory Operands--------------------------------------------
 5856 // Stack Slot Operand - This operand is used for loading and storing temporary
 5857 //                      values on the stack where a match requires a value to
 5858 //                      flow through memory.
 5859 operand stackSlotP(sRegP reg)
 5860 %{
 5861   constraint(ALLOC_IN_RC(stack_slots));
 5862   // No match rule because this operand is only generated in matching
 5863 
 5864   format %{ "[$reg]" %}
 5865   interface(MEMORY_INTER) %{
 5866     base(0x4);   // RSP
 5867     index(0x4);  // No Index
 5868     scale(0x0);  // No Scale
 5869     disp($reg);  // Stack Offset
 5870   %}
 5871 %}
 5872 
 5873 operand stackSlotI(sRegI reg)
 5874 %{
 5875   constraint(ALLOC_IN_RC(stack_slots));
 5876   // No match rule because this operand is only generated in matching
 5877 
 5878   format %{ "[$reg]" %}
 5879   interface(MEMORY_INTER) %{
 5880     base(0x4);   // RSP
 5881     index(0x4);  // No Index
 5882     scale(0x0);  // No Scale
 5883     disp($reg);  // Stack Offset
 5884   %}
 5885 %}
 5886 
 5887 operand stackSlotF(sRegF reg)
 5888 %{
 5889   constraint(ALLOC_IN_RC(stack_slots));
 5890   // No match rule because this operand is only generated in matching
 5891 
 5892   format %{ "[$reg]" %}
 5893   interface(MEMORY_INTER) %{
 5894     base(0x4);   // RSP
 5895     index(0x4);  // No Index
 5896     scale(0x0);  // No Scale
 5897     disp($reg);  // Stack Offset
 5898   %}
 5899 %}
 5900 
 5901 operand stackSlotD(sRegD reg)
 5902 %{
 5903   constraint(ALLOC_IN_RC(stack_slots));
 5904   // No match rule because this operand is only generated in matching
 5905 
 5906   format %{ "[$reg]" %}
 5907   interface(MEMORY_INTER) %{
 5908     base(0x4);   // RSP
 5909     index(0x4);  // No Index
 5910     scale(0x0);  // No Scale
 5911     disp($reg);  // Stack Offset
 5912   %}
 5913 %}
 5914 operand stackSlotL(sRegL reg)
 5915 %{
 5916   constraint(ALLOC_IN_RC(stack_slots));
 5917   // No match rule because this operand is only generated in matching
 5918 
 5919   format %{ "[$reg]" %}
 5920   interface(MEMORY_INTER) %{
 5921     base(0x4);   // RSP
 5922     index(0x4);  // No Index
 5923     scale(0x0);  // No Scale
 5924     disp($reg);  // Stack Offset
 5925   %}
 5926 %}
 5927 
 5928 //----------Conditional Branch Operands----------------------------------------
 5929 // Comparison Op  - This is the operation of the comparison, and is limited to
 5930 //                  the following set of codes:
 5931 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 5932 //
 5933 // Other attributes of the comparison, such as unsignedness, are specified
 5934 // by the comparison instruction that sets a condition code flags register.
 5935 // That result is represented by a flags operand whose subtype is appropriate
 5936 // to the unsignedness (etc.) of the comparison.
 5937 //
 5938 // Later, the instruction which matches both the Comparison Op (a Bool) and
 5939 // the flags (produced by the Cmp) specifies the coding of the comparison op
 5940 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 5941 
 5942 // Comparison Code
 5943 operand cmpOp()
 5944 %{
 5945   match(Bool);
 5946 
 5947   format %{ "" %}
 5948   interface(COND_INTER) %{
 5949     equal(0x4, "e");
 5950     not_equal(0x5, "ne");
 5951     less(0xC, "l");
 5952     greater_equal(0xD, "ge");
 5953     less_equal(0xE, "le");
 5954     greater(0xF, "g");
 5955     overflow(0x0, "o");
 5956     no_overflow(0x1, "no");
 5957   %}
 5958 %}
 5959 
 5960 // Comparison Code, unsigned compare.  Used by FP also, with
 5961 // C2 (unordered) turned into GT or LT already.  The other bits
 5962 // C0 and C3 are turned into Carry & Zero flags.
 5963 operand cmpOpU()
 5964 %{
 5965   match(Bool);
 5966 
 5967   format %{ "" %}
 5968   interface(COND_INTER) %{
 5969     equal(0x4, "e");
 5970     not_equal(0x5, "ne");
 5971     less(0x2, "b");
 5972     greater_equal(0x3, "ae");
 5973     less_equal(0x6, "be");
 5974     greater(0x7, "a");
 5975     overflow(0x0, "o");
 5976     no_overflow(0x1, "no");
 5977   %}
 5978 %}
 5979 
 5980 
 5981 // Floating comparisons that don't require any fixup for the unordered case,
 5982 // If both inputs of the comparison are the same, ZF is always set so we
 5983 // don't need to use cmpOpUCF2 for eq/ne
 5984 operand cmpOpUCF() %{
 5985   match(Bool);
 5986   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 5987             n->as_Bool()->_test._test == BoolTest::ge ||
 5988             n->as_Bool()->_test._test == BoolTest::le ||
 5989             n->as_Bool()->_test._test == BoolTest::gt ||
 5990             n->in(1)->in(1) == n->in(1)->in(2));
 5991   format %{ "" %}
 5992   interface(COND_INTER) %{
 5993     equal(0xb, "np");
 5994     not_equal(0xa, "p");
 5995     less(0x2, "b");
 5996     greater_equal(0x3, "ae");
 5997     less_equal(0x6, "be");
 5998     greater(0x7, "a");
 5999     overflow(0x0, "o");
 6000     no_overflow(0x1, "no");
 6001   %}
 6002 %}
 6003 
 6004 
 6005 // Floating comparisons that can be fixed up with extra conditional jumps
 6006 operand cmpOpUCF2() %{
 6007   match(Bool);
 6008   predicate((n->as_Bool()->_test._test == BoolTest::ne ||
 6009              n->as_Bool()->_test._test == BoolTest::eq) &&
 6010             n->in(1)->in(1) != n->in(1)->in(2));
 6011   format %{ "" %}
 6012   interface(COND_INTER) %{
 6013     equal(0x4, "e");
 6014     not_equal(0x5, "ne");
 6015     less(0x2, "b");
 6016     greater_equal(0x3, "ae");
 6017     less_equal(0x6, "be");
 6018     greater(0x7, "a");
 6019     overflow(0x0, "o");
 6020     no_overflow(0x1, "no");
 6021   %}
 6022 %}
 6023 
 6024 // Operands for bound floating pointer register arguments
 6025 operand rxmm0() %{
 6026   constraint(ALLOC_IN_RC(xmm0_reg));
 6027   match(VecX);
 6028   format%{%}
 6029   interface(REG_INTER);
 6030 %}
 6031 
 6032 // Vectors
 6033 
 6034 // Dummy generic vector class. Should be used for all vector operands.
 6035 // Replaced with vec[SDXYZ] during post-selection pass.
 6036 operand vec() %{
 6037   constraint(ALLOC_IN_RC(dynamic));
 6038   match(VecX);
 6039   match(VecY);
 6040   match(VecZ);
 6041   match(VecS);
 6042   match(VecD);
 6043 
 6044   format %{ %}
 6045   interface(REG_INTER);
 6046 %}
 6047 
 6048 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6049 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6050 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6051 // runtime code generation via reg_class_dynamic.
 6052 operand legVec() %{
 6053   constraint(ALLOC_IN_RC(dynamic));
 6054   match(VecX);
 6055   match(VecY);
 6056   match(VecZ);
 6057   match(VecS);
 6058   match(VecD);
 6059 
 6060   format %{ %}
 6061   interface(REG_INTER);
 6062 %}
 6063 
 6064 // Replaces vec during post-selection cleanup. See above.
 6065 operand vecS() %{
 6066   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6067   match(VecS);
 6068 
 6069   format %{ %}
 6070   interface(REG_INTER);
 6071 %}
 6072 
 6073 // Replaces legVec during post-selection cleanup. See above.
 6074 operand legVecS() %{
 6075   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6076   match(VecS);
 6077 
 6078   format %{ %}
 6079   interface(REG_INTER);
 6080 %}
 6081 
 6082 // Replaces vec during post-selection cleanup. See above.
 6083 operand vecD() %{
 6084   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6085   match(VecD);
 6086 
 6087   format %{ %}
 6088   interface(REG_INTER);
 6089 %}
 6090 
 6091 // Replaces legVec during post-selection cleanup. See above.
 6092 operand legVecD() %{
 6093   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6094   match(VecD);
 6095 
 6096   format %{ %}
 6097   interface(REG_INTER);
 6098 %}
 6099 
 6100 // Replaces vec during post-selection cleanup. See above.
 6101 operand vecX() %{
 6102   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6103   match(VecX);
 6104 
 6105   format %{ %}
 6106   interface(REG_INTER);
 6107 %}
 6108 
 6109 // Replaces legVec during post-selection cleanup. See above.
 6110 operand legVecX() %{
 6111   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6112   match(VecX);
 6113 
 6114   format %{ %}
 6115   interface(REG_INTER);
 6116 %}
 6117 
 6118 // Replaces vec during post-selection cleanup. See above.
 6119 operand vecY() %{
 6120   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6121   match(VecY);
 6122 
 6123   format %{ %}
 6124   interface(REG_INTER);
 6125 %}
 6126 
 6127 // Replaces legVec during post-selection cleanup. See above.
 6128 operand legVecY() %{
 6129   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6130   match(VecY);
 6131 
 6132   format %{ %}
 6133   interface(REG_INTER);
 6134 %}
 6135 
 6136 // Replaces vec during post-selection cleanup. See above.
 6137 operand vecZ() %{
 6138   constraint(ALLOC_IN_RC(vectorz_reg));
 6139   match(VecZ);
 6140 
 6141   format %{ %}
 6142   interface(REG_INTER);
 6143 %}
 6144 
 6145 // Replaces legVec during post-selection cleanup. See above.
 6146 operand legVecZ() %{
 6147   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6148   match(VecZ);
 6149 
 6150   format %{ %}
 6151   interface(REG_INTER);
 6152 %}
 6153 
 6154 //----------OPERAND CLASSES----------------------------------------------------
 6155 // Operand Classes are groups of operands that are used as to simplify
 6156 // instruction definitions by not requiring the AD writer to specify separate
 6157 // instructions for every form of operand when the instruction accepts
 6158 // multiple operand types with the same basic encoding and format.  The classic
 6159 // case of this is memory operands.
 6160 
 6161 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6162                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6163                indCompressedOopOffset,
 6164                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6165                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6166                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6167 
 6168 //----------PIPELINE-----------------------------------------------------------
 6169 // Rules which define the behavior of the target architectures pipeline.
 6170 pipeline %{
 6171 
 6172 //----------ATTRIBUTES---------------------------------------------------------
 6173 attributes %{
 6174   variable_size_instructions;        // Fixed size instructions
 6175   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6176   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6177   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6178   instruction_fetch_units = 1;       // of 16 bytes
 6179 %}
 6180 
 6181 //----------RESOURCES----------------------------------------------------------
 6182 // Resources are the functional units available to the machine
 6183 
 6184 // Generic P2/P3 pipeline
 6185 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6186 // 3 instructions decoded per cycle.
 6187 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6188 // 3 ALU op, only ALU0 handles mul instructions.
 6189 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6190            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6191            BR, FPU,
 6192            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6193 
 6194 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6195 // Pipeline Description specifies the stages in the machine's pipeline
 6196 
 6197 // Generic P2/P3 pipeline
 6198 pipe_desc(S0, S1, S2, S3, S4, S5);
 6199 
 6200 //----------PIPELINE CLASSES---------------------------------------------------
 6201 // Pipeline Classes describe the stages in which input and output are
 6202 // referenced by the hardware pipeline.
 6203 
 6204 // Naming convention: ialu or fpu
 6205 // Then: _reg
 6206 // Then: _reg if there is a 2nd register
 6207 // Then: _long if it's a pair of instructions implementing a long
 6208 // Then: _fat if it requires the big decoder
 6209 //   Or: _mem if it requires the big decoder and a memory unit.
 6210 
 6211 // Integer ALU reg operation
 6212 pipe_class ialu_reg(rRegI dst)
 6213 %{
 6214     single_instruction;
 6215     dst    : S4(write);
 6216     dst    : S3(read);
 6217     DECODE : S0;        // any decoder
 6218     ALU    : S3;        // any alu
 6219 %}
 6220 
 6221 // Long ALU reg operation
 6222 pipe_class ialu_reg_long(rRegL dst)
 6223 %{
 6224     instruction_count(2);
 6225     dst    : S4(write);
 6226     dst    : S3(read);
 6227     DECODE : S0(2);     // any 2 decoders
 6228     ALU    : S3(2);     // both alus
 6229 %}
 6230 
 6231 // Integer ALU reg operation using big decoder
 6232 pipe_class ialu_reg_fat(rRegI dst)
 6233 %{
 6234     single_instruction;
 6235     dst    : S4(write);
 6236     dst    : S3(read);
 6237     D0     : S0;        // big decoder only
 6238     ALU    : S3;        // any alu
 6239 %}
 6240 
 6241 // Integer ALU reg-reg operation
 6242 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6243 %{
 6244     single_instruction;
 6245     dst    : S4(write);
 6246     src    : S3(read);
 6247     DECODE : S0;        // any decoder
 6248     ALU    : S3;        // any alu
 6249 %}
 6250 
 6251 // Integer ALU reg-reg operation
 6252 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6253 %{
 6254     single_instruction;
 6255     dst    : S4(write);
 6256     src    : S3(read);
 6257     D0     : S0;        // big decoder only
 6258     ALU    : S3;        // any alu
 6259 %}
 6260 
 6261 // Integer ALU reg-mem operation
 6262 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6263 %{
 6264     single_instruction;
 6265     dst    : S5(write);
 6266     mem    : S3(read);
 6267     D0     : S0;        // big decoder only
 6268     ALU    : S4;        // any alu
 6269     MEM    : S3;        // any mem
 6270 %}
 6271 
 6272 // Integer mem operation (prefetch)
 6273 pipe_class ialu_mem(memory mem)
 6274 %{
 6275     single_instruction;
 6276     mem    : S3(read);
 6277     D0     : S0;        // big decoder only
 6278     MEM    : S3;        // any mem
 6279 %}
 6280 
 6281 // Integer Store to Memory
 6282 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6283 %{
 6284     single_instruction;
 6285     mem    : S3(read);
 6286     src    : S5(read);
 6287     D0     : S0;        // big decoder only
 6288     ALU    : S4;        // any alu
 6289     MEM    : S3;
 6290 %}
 6291 
 6292 // // Long Store to Memory
 6293 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6294 // %{
 6295 //     instruction_count(2);
 6296 //     mem    : S3(read);
 6297 //     src    : S5(read);
 6298 //     D0     : S0(2);          // big decoder only; twice
 6299 //     ALU    : S4(2);     // any 2 alus
 6300 //     MEM    : S3(2);  // Both mems
 6301 // %}
 6302 
 6303 // Integer Store to Memory
 6304 pipe_class ialu_mem_imm(memory mem)
 6305 %{
 6306     single_instruction;
 6307     mem    : S3(read);
 6308     D0     : S0;        // big decoder only
 6309     ALU    : S4;        // any alu
 6310     MEM    : S3;
 6311 %}
 6312 
 6313 // Integer ALU0 reg-reg operation
 6314 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6315 %{
 6316     single_instruction;
 6317     dst    : S4(write);
 6318     src    : S3(read);
 6319     D0     : S0;        // Big decoder only
 6320     ALU0   : S3;        // only alu0
 6321 %}
 6322 
 6323 // Integer ALU0 reg-mem operation
 6324 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6325 %{
 6326     single_instruction;
 6327     dst    : S5(write);
 6328     mem    : S3(read);
 6329     D0     : S0;        // big decoder only
 6330     ALU0   : S4;        // ALU0 only
 6331     MEM    : S3;        // any mem
 6332 %}
 6333 
 6334 // Integer ALU reg-reg operation
 6335 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6336 %{
 6337     single_instruction;
 6338     cr     : S4(write);
 6339     src1   : S3(read);
 6340     src2   : S3(read);
 6341     DECODE : S0;        // any decoder
 6342     ALU    : S3;        // any alu
 6343 %}
 6344 
 6345 // Integer ALU reg-imm operation
 6346 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6347 %{
 6348     single_instruction;
 6349     cr     : S4(write);
 6350     src1   : S3(read);
 6351     DECODE : S0;        // any decoder
 6352     ALU    : S3;        // any alu
 6353 %}
 6354 
 6355 // Integer ALU reg-mem operation
 6356 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6357 %{
 6358     single_instruction;
 6359     cr     : S4(write);
 6360     src1   : S3(read);
 6361     src2   : S3(read);
 6362     D0     : S0;        // big decoder only
 6363     ALU    : S4;        // any alu
 6364     MEM    : S3;
 6365 %}
 6366 
 6367 // Conditional move reg-reg
 6368 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6369 %{
 6370     instruction_count(4);
 6371     y      : S4(read);
 6372     q      : S3(read);
 6373     p      : S3(read);
 6374     DECODE : S0(4);     // any decoder
 6375 %}
 6376 
 6377 // Conditional move reg-reg
 6378 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6379 %{
 6380     single_instruction;
 6381     dst    : S4(write);
 6382     src    : S3(read);
 6383     cr     : S3(read);
 6384     DECODE : S0;        // any decoder
 6385 %}
 6386 
 6387 // Conditional move reg-mem
 6388 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6389 %{
 6390     single_instruction;
 6391     dst    : S4(write);
 6392     src    : S3(read);
 6393     cr     : S3(read);
 6394     DECODE : S0;        // any decoder
 6395     MEM    : S3;
 6396 %}
 6397 
 6398 // Conditional move reg-reg long
 6399 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6400 %{
 6401     single_instruction;
 6402     dst    : S4(write);
 6403     src    : S3(read);
 6404     cr     : S3(read);
 6405     DECODE : S0(2);     // any 2 decoders
 6406 %}
 6407 
 6408 // Float reg-reg operation
 6409 pipe_class fpu_reg(regD dst)
 6410 %{
 6411     instruction_count(2);
 6412     dst    : S3(read);
 6413     DECODE : S0(2);     // any 2 decoders
 6414     FPU    : S3;
 6415 %}
 6416 
 6417 // Float reg-reg operation
 6418 pipe_class fpu_reg_reg(regD dst, regD src)
 6419 %{
 6420     instruction_count(2);
 6421     dst    : S4(write);
 6422     src    : S3(read);
 6423     DECODE : S0(2);     // any 2 decoders
 6424     FPU    : S3;
 6425 %}
 6426 
 6427 // Float reg-reg operation
 6428 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6429 %{
 6430     instruction_count(3);
 6431     dst    : S4(write);
 6432     src1   : S3(read);
 6433     src2   : S3(read);
 6434     DECODE : S0(3);     // any 3 decoders
 6435     FPU    : S3(2);
 6436 %}
 6437 
 6438 // Float reg-reg operation
 6439 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6440 %{
 6441     instruction_count(4);
 6442     dst    : S4(write);
 6443     src1   : S3(read);
 6444     src2   : S3(read);
 6445     src3   : S3(read);
 6446     DECODE : S0(4);     // any 3 decoders
 6447     FPU    : S3(2);
 6448 %}
 6449 
 6450 // Float reg-reg operation
 6451 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6452 %{
 6453     instruction_count(4);
 6454     dst    : S4(write);
 6455     src1   : S3(read);
 6456     src2   : S3(read);
 6457     src3   : S3(read);
 6458     DECODE : S1(3);     // any 3 decoders
 6459     D0     : S0;        // Big decoder only
 6460     FPU    : S3(2);
 6461     MEM    : S3;
 6462 %}
 6463 
 6464 // Float reg-mem operation
 6465 pipe_class fpu_reg_mem(regD dst, memory mem)
 6466 %{
 6467     instruction_count(2);
 6468     dst    : S5(write);
 6469     mem    : S3(read);
 6470     D0     : S0;        // big decoder only
 6471     DECODE : S1;        // any decoder for FPU POP
 6472     FPU    : S4;
 6473     MEM    : S3;        // any mem
 6474 %}
 6475 
 6476 // Float reg-mem operation
 6477 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6478 %{
 6479     instruction_count(3);
 6480     dst    : S5(write);
 6481     src1   : S3(read);
 6482     mem    : S3(read);
 6483     D0     : S0;        // big decoder only
 6484     DECODE : S1(2);     // any decoder for FPU POP
 6485     FPU    : S4;
 6486     MEM    : S3;        // any mem
 6487 %}
 6488 
 6489 // Float mem-reg operation
 6490 pipe_class fpu_mem_reg(memory mem, regD src)
 6491 %{
 6492     instruction_count(2);
 6493     src    : S5(read);
 6494     mem    : S3(read);
 6495     DECODE : S0;        // any decoder for FPU PUSH
 6496     D0     : S1;        // big decoder only
 6497     FPU    : S4;
 6498     MEM    : S3;        // any mem
 6499 %}
 6500 
 6501 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6502 %{
 6503     instruction_count(3);
 6504     src1   : S3(read);
 6505     src2   : S3(read);
 6506     mem    : S3(read);
 6507     DECODE : S0(2);     // any decoder for FPU PUSH
 6508     D0     : S1;        // big decoder only
 6509     FPU    : S4;
 6510     MEM    : S3;        // any mem
 6511 %}
 6512 
 6513 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6514 %{
 6515     instruction_count(3);
 6516     src1   : S3(read);
 6517     src2   : S3(read);
 6518     mem    : S4(read);
 6519     DECODE : S0;        // any decoder for FPU PUSH
 6520     D0     : S0(2);     // big decoder only
 6521     FPU    : S4;
 6522     MEM    : S3(2);     // any mem
 6523 %}
 6524 
 6525 pipe_class fpu_mem_mem(memory dst, memory src1)
 6526 %{
 6527     instruction_count(2);
 6528     src1   : S3(read);
 6529     dst    : S4(read);
 6530     D0     : S0(2);     // big decoder only
 6531     MEM    : S3(2);     // any mem
 6532 %}
 6533 
 6534 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6535 %{
 6536     instruction_count(3);
 6537     src1   : S3(read);
 6538     src2   : S3(read);
 6539     dst    : S4(read);
 6540     D0     : S0(3);     // big decoder only
 6541     FPU    : S4;
 6542     MEM    : S3(3);     // any mem
 6543 %}
 6544 
 6545 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6546 %{
 6547     instruction_count(3);
 6548     src1   : S4(read);
 6549     mem    : S4(read);
 6550     DECODE : S0;        // any decoder for FPU PUSH
 6551     D0     : S0(2);     // big decoder only
 6552     FPU    : S4;
 6553     MEM    : S3(2);     // any mem
 6554 %}
 6555 
 6556 // Float load constant
 6557 pipe_class fpu_reg_con(regD dst)
 6558 %{
 6559     instruction_count(2);
 6560     dst    : S5(write);
 6561     D0     : S0;        // big decoder only for the load
 6562     DECODE : S1;        // any decoder for FPU POP
 6563     FPU    : S4;
 6564     MEM    : S3;        // any mem
 6565 %}
 6566 
 6567 // Float load constant
 6568 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6569 %{
 6570     instruction_count(3);
 6571     dst    : S5(write);
 6572     src    : S3(read);
 6573     D0     : S0;        // big decoder only for the load
 6574     DECODE : S1(2);     // any decoder for FPU POP
 6575     FPU    : S4;
 6576     MEM    : S3;        // any mem
 6577 %}
 6578 
 6579 // UnConditional branch
 6580 pipe_class pipe_jmp(label labl)
 6581 %{
 6582     single_instruction;
 6583     BR   : S3;
 6584 %}
 6585 
 6586 // Conditional branch
 6587 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6588 %{
 6589     single_instruction;
 6590     cr    : S1(read);
 6591     BR    : S3;
 6592 %}
 6593 
 6594 // Allocation idiom
 6595 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6596 %{
 6597     instruction_count(1); force_serialization;
 6598     fixed_latency(6);
 6599     heap_ptr : S3(read);
 6600     DECODE   : S0(3);
 6601     D0       : S2;
 6602     MEM      : S3;
 6603     ALU      : S3(2);
 6604     dst      : S5(write);
 6605     BR       : S5;
 6606 %}
 6607 
 6608 // Generic big/slow expanded idiom
 6609 pipe_class pipe_slow()
 6610 %{
 6611     instruction_count(10); multiple_bundles; force_serialization;
 6612     fixed_latency(100);
 6613     D0  : S0(2);
 6614     MEM : S3(2);
 6615 %}
 6616 
 6617 // The real do-nothing guy
 6618 pipe_class empty()
 6619 %{
 6620     instruction_count(0);
 6621 %}
 6622 
 6623 // Define the class for the Nop node
 6624 define
 6625 %{
 6626    MachNop = empty;
 6627 %}
 6628 
 6629 %}
 6630 
 6631 //----------INSTRUCTIONS-------------------------------------------------------
 6632 //
 6633 // match      -- States which machine-independent subtree may be replaced
 6634 //               by this instruction.
 6635 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6636 //               selection to identify a minimum cost tree of machine
 6637 //               instructions that matches a tree of machine-independent
 6638 //               instructions.
 6639 // format     -- A string providing the disassembly for this instruction.
 6640 //               The value of an instruction's operand may be inserted
 6641 //               by referring to it with a '$' prefix.
 6642 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6643 //               to within an encode class as $primary, $secondary, and $tertiary
 6644 //               rrspectively.  The primary opcode is commonly used to
 6645 //               indicate the type of machine instruction, while secondary
 6646 //               and tertiary are often used for prefix options or addressing
 6647 //               modes.
 6648 // ins_encode -- A list of encode classes with parameters. The encode class
 6649 //               name must have been defined in an 'enc_class' specification
 6650 //               in the encode section of the architecture description.
 6651 
 6652 // ============================================================================
 6653 
 6654 instruct ShouldNotReachHere() %{
 6655   match(Halt);
 6656   format %{ "stop\t# ShouldNotReachHere" %}
 6657   ins_encode %{
 6658     if (is_reachable()) {
 6659       const char* str = __ code_string(_halt_reason);
 6660       __ stop(str);
 6661     }
 6662   %}
 6663   ins_pipe(pipe_slow);
 6664 %}
 6665 
 6666 // ============================================================================
 6667 
 6668 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6669 // Load Float
 6670 instruct MoveF2VL(vlRegF dst, regF src) %{
 6671   match(Set dst src);
 6672   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6673   ins_encode %{
 6674     ShouldNotReachHere();
 6675   %}
 6676   ins_pipe( fpu_reg_reg );
 6677 %}
 6678 
 6679 // Load Float
 6680 instruct MoveF2LEG(legRegF dst, regF src) %{
 6681   match(Set dst src);
 6682   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6683   ins_encode %{
 6684     ShouldNotReachHere();
 6685   %}
 6686   ins_pipe( fpu_reg_reg );
 6687 %}
 6688 
 6689 // Load Float
 6690 instruct MoveVL2F(regF dst, vlRegF src) %{
 6691   match(Set dst src);
 6692   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6693   ins_encode %{
 6694     ShouldNotReachHere();
 6695   %}
 6696   ins_pipe( fpu_reg_reg );
 6697 %}
 6698 
 6699 // Load Float
 6700 instruct MoveLEG2F(regF dst, legRegF src) %{
 6701   match(Set dst src);
 6702   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6703   ins_encode %{
 6704     ShouldNotReachHere();
 6705   %}
 6706   ins_pipe( fpu_reg_reg );
 6707 %}
 6708 
 6709 // Load Double
 6710 instruct MoveD2VL(vlRegD dst, regD src) %{
 6711   match(Set dst src);
 6712   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6713   ins_encode %{
 6714     ShouldNotReachHere();
 6715   %}
 6716   ins_pipe( fpu_reg_reg );
 6717 %}
 6718 
 6719 // Load Double
 6720 instruct MoveD2LEG(legRegD dst, regD src) %{
 6721   match(Set dst src);
 6722   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6723   ins_encode %{
 6724     ShouldNotReachHere();
 6725   %}
 6726   ins_pipe( fpu_reg_reg );
 6727 %}
 6728 
 6729 // Load Double
 6730 instruct MoveVL2D(regD dst, vlRegD src) %{
 6731   match(Set dst src);
 6732   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6733   ins_encode %{
 6734     ShouldNotReachHere();
 6735   %}
 6736   ins_pipe( fpu_reg_reg );
 6737 %}
 6738 
 6739 // Load Double
 6740 instruct MoveLEG2D(regD dst, legRegD src) %{
 6741   match(Set dst src);
 6742   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6743   ins_encode %{
 6744     ShouldNotReachHere();
 6745   %}
 6746   ins_pipe( fpu_reg_reg );
 6747 %}
 6748 
 6749 //----------Load/Store/Move Instructions---------------------------------------
 6750 //----------Load Instructions--------------------------------------------------
 6751 
 6752 // Load Byte (8 bit signed)
 6753 instruct loadB(rRegI dst, memory mem)
 6754 %{
 6755   match(Set dst (LoadB mem));
 6756 
 6757   ins_cost(125);
 6758   format %{ "movsbl  $dst, $mem\t# byte" %}
 6759 
 6760   ins_encode %{
 6761     __ movsbl($dst$$Register, $mem$$Address);
 6762   %}
 6763 
 6764   ins_pipe(ialu_reg_mem);
 6765 %}
 6766 
 6767 // Load Byte (8 bit signed) into Long Register
 6768 instruct loadB2L(rRegL dst, memory mem)
 6769 %{
 6770   match(Set dst (ConvI2L (LoadB mem)));
 6771 
 6772   ins_cost(125);
 6773   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6774 
 6775   ins_encode %{
 6776     __ movsbq($dst$$Register, $mem$$Address);
 6777   %}
 6778 
 6779   ins_pipe(ialu_reg_mem);
 6780 %}
 6781 
 6782 // Load Unsigned Byte (8 bit UNsigned)
 6783 instruct loadUB(rRegI dst, memory mem)
 6784 %{
 6785   match(Set dst (LoadUB mem));
 6786 
 6787   ins_cost(125);
 6788   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6789 
 6790   ins_encode %{
 6791     __ movzbl($dst$$Register, $mem$$Address);
 6792   %}
 6793 
 6794   ins_pipe(ialu_reg_mem);
 6795 %}
 6796 
 6797 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6798 instruct loadUB2L(rRegL dst, memory mem)
 6799 %{
 6800   match(Set dst (ConvI2L (LoadUB mem)));
 6801 
 6802   ins_cost(125);
 6803   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 6804 
 6805   ins_encode %{
 6806     __ movzbq($dst$$Register, $mem$$Address);
 6807   %}
 6808 
 6809   ins_pipe(ialu_reg_mem);
 6810 %}
 6811 
 6812 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 6813 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 6814   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 6815   effect(KILL cr);
 6816 
 6817   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 6818             "andl    $dst, right_n_bits($mask, 8)" %}
 6819   ins_encode %{
 6820     Register Rdst = $dst$$Register;
 6821     __ movzbq(Rdst, $mem$$Address);
 6822     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 6823   %}
 6824   ins_pipe(ialu_reg_mem);
 6825 %}
 6826 
 6827 // Load Short (16 bit signed)
 6828 instruct loadS(rRegI dst, memory mem)
 6829 %{
 6830   match(Set dst (LoadS mem));
 6831 
 6832   ins_cost(125);
 6833   format %{ "movswl $dst, $mem\t# short" %}
 6834 
 6835   ins_encode %{
 6836     __ movswl($dst$$Register, $mem$$Address);
 6837   %}
 6838 
 6839   ins_pipe(ialu_reg_mem);
 6840 %}
 6841 
 6842 // Load Short (16 bit signed) to Byte (8 bit signed)
 6843 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6844   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 6845 
 6846   ins_cost(125);
 6847   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 6848   ins_encode %{
 6849     __ movsbl($dst$$Register, $mem$$Address);
 6850   %}
 6851   ins_pipe(ialu_reg_mem);
 6852 %}
 6853 
 6854 // Load Short (16 bit signed) into Long Register
 6855 instruct loadS2L(rRegL dst, memory mem)
 6856 %{
 6857   match(Set dst (ConvI2L (LoadS mem)));
 6858 
 6859   ins_cost(125);
 6860   format %{ "movswq $dst, $mem\t# short -> long" %}
 6861 
 6862   ins_encode %{
 6863     __ movswq($dst$$Register, $mem$$Address);
 6864   %}
 6865 
 6866   ins_pipe(ialu_reg_mem);
 6867 %}
 6868 
 6869 // Load Unsigned Short/Char (16 bit UNsigned)
 6870 instruct loadUS(rRegI dst, memory mem)
 6871 %{
 6872   match(Set dst (LoadUS mem));
 6873 
 6874   ins_cost(125);
 6875   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 6876 
 6877   ins_encode %{
 6878     __ movzwl($dst$$Register, $mem$$Address);
 6879   %}
 6880 
 6881   ins_pipe(ialu_reg_mem);
 6882 %}
 6883 
 6884 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 6885 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6886   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 6887 
 6888   ins_cost(125);
 6889   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 6890   ins_encode %{
 6891     __ movsbl($dst$$Register, $mem$$Address);
 6892   %}
 6893   ins_pipe(ialu_reg_mem);
 6894 %}
 6895 
 6896 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 6897 instruct loadUS2L(rRegL dst, memory mem)
 6898 %{
 6899   match(Set dst (ConvI2L (LoadUS mem)));
 6900 
 6901   ins_cost(125);
 6902   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 6903 
 6904   ins_encode %{
 6905     __ movzwq($dst$$Register, $mem$$Address);
 6906   %}
 6907 
 6908   ins_pipe(ialu_reg_mem);
 6909 %}
 6910 
 6911 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 6912 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 6913   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 6914 
 6915   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 6916   ins_encode %{
 6917     __ movzbq($dst$$Register, $mem$$Address);
 6918   %}
 6919   ins_pipe(ialu_reg_mem);
 6920 %}
 6921 
 6922 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 6923 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 6924   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 6925   effect(KILL cr);
 6926 
 6927   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 6928             "andl    $dst, right_n_bits($mask, 16)" %}
 6929   ins_encode %{
 6930     Register Rdst = $dst$$Register;
 6931     __ movzwq(Rdst, $mem$$Address);
 6932     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 6933   %}
 6934   ins_pipe(ialu_reg_mem);
 6935 %}
 6936 
 6937 // Load Integer
 6938 instruct loadI(rRegI dst, memory mem)
 6939 %{
 6940   match(Set dst (LoadI mem));
 6941 
 6942   ins_cost(125);
 6943   format %{ "movl    $dst, $mem\t# int" %}
 6944 
 6945   ins_encode %{
 6946     __ movl($dst$$Register, $mem$$Address);
 6947   %}
 6948 
 6949   ins_pipe(ialu_reg_mem);
 6950 %}
 6951 
 6952 // Load Integer (32 bit signed) to Byte (8 bit signed)
 6953 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6954   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 6955 
 6956   ins_cost(125);
 6957   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 6958   ins_encode %{
 6959     __ movsbl($dst$$Register, $mem$$Address);
 6960   %}
 6961   ins_pipe(ialu_reg_mem);
 6962 %}
 6963 
 6964 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 6965 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 6966   match(Set dst (AndI (LoadI mem) mask));
 6967 
 6968   ins_cost(125);
 6969   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 6970   ins_encode %{
 6971     __ movzbl($dst$$Register, $mem$$Address);
 6972   %}
 6973   ins_pipe(ialu_reg_mem);
 6974 %}
 6975 
 6976 // Load Integer (32 bit signed) to Short (16 bit signed)
 6977 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 6978   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 6979 
 6980   ins_cost(125);
 6981   format %{ "movswl  $dst, $mem\t# int -> short" %}
 6982   ins_encode %{
 6983     __ movswl($dst$$Register, $mem$$Address);
 6984   %}
 6985   ins_pipe(ialu_reg_mem);
 6986 %}
 6987 
 6988 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 6989 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 6990   match(Set dst (AndI (LoadI mem) mask));
 6991 
 6992   ins_cost(125);
 6993   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 6994   ins_encode %{
 6995     __ movzwl($dst$$Register, $mem$$Address);
 6996   %}
 6997   ins_pipe(ialu_reg_mem);
 6998 %}
 6999 
 7000 // Load Integer into Long Register
 7001 instruct loadI2L(rRegL dst, memory mem)
 7002 %{
 7003   match(Set dst (ConvI2L (LoadI mem)));
 7004 
 7005   ins_cost(125);
 7006   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7007 
 7008   ins_encode %{
 7009     __ movslq($dst$$Register, $mem$$Address);
 7010   %}
 7011 
 7012   ins_pipe(ialu_reg_mem);
 7013 %}
 7014 
 7015 // Load Integer with mask 0xFF into Long Register
 7016 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7017   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7018 
 7019   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7020   ins_encode %{
 7021     __ movzbq($dst$$Register, $mem$$Address);
 7022   %}
 7023   ins_pipe(ialu_reg_mem);
 7024 %}
 7025 
 7026 // Load Integer with mask 0xFFFF into Long Register
 7027 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7028   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7029 
 7030   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7031   ins_encode %{
 7032     __ movzwq($dst$$Register, $mem$$Address);
 7033   %}
 7034   ins_pipe(ialu_reg_mem);
 7035 %}
 7036 
 7037 // Load Integer with a 31-bit mask into Long Register
 7038 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7039   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7040   effect(KILL cr);
 7041 
 7042   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7043             "andl    $dst, $mask" %}
 7044   ins_encode %{
 7045     Register Rdst = $dst$$Register;
 7046     __ movl(Rdst, $mem$$Address);
 7047     __ andl(Rdst, $mask$$constant);
 7048   %}
 7049   ins_pipe(ialu_reg_mem);
 7050 %}
 7051 
 7052 // Load Unsigned Integer into Long Register
 7053 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7054 %{
 7055   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7056 
 7057   ins_cost(125);
 7058   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7059 
 7060   ins_encode %{
 7061     __ movl($dst$$Register, $mem$$Address);
 7062   %}
 7063 
 7064   ins_pipe(ialu_reg_mem);
 7065 %}
 7066 
 7067 // Load Long
 7068 instruct loadL(rRegL dst, memory mem)
 7069 %{
 7070   match(Set dst (LoadL mem));
 7071 
 7072   ins_cost(125);
 7073   format %{ "movq    $dst, $mem\t# long" %}
 7074 
 7075   ins_encode %{
 7076     __ movq($dst$$Register, $mem$$Address);
 7077   %}
 7078 
 7079   ins_pipe(ialu_reg_mem); // XXX
 7080 %}
 7081 
 7082 // Load Range
 7083 instruct loadRange(rRegI dst, memory mem)
 7084 %{
 7085   match(Set dst (LoadRange mem));
 7086 
 7087   ins_cost(125); // XXX
 7088   format %{ "movl    $dst, $mem\t# range" %}
 7089   ins_encode %{
 7090     __ movl($dst$$Register, $mem$$Address);
 7091   %}
 7092   ins_pipe(ialu_reg_mem);
 7093 %}
 7094 
 7095 // Load Pointer
 7096 instruct loadP(rRegP dst, memory mem)
 7097 %{
 7098   match(Set dst (LoadP mem));
 7099   predicate(n->as_Load()->barrier_data() == 0);
 7100 
 7101   ins_cost(125); // XXX
 7102   format %{ "movq    $dst, $mem\t# ptr" %}
 7103   ins_encode %{
 7104     __ movq($dst$$Register, $mem$$Address);
 7105   %}
 7106   ins_pipe(ialu_reg_mem); // XXX
 7107 %}
 7108 
 7109 // Load Compressed Pointer
 7110 instruct loadN(rRegN dst, memory mem)
 7111 %{
 7112    predicate(n->as_Load()->barrier_data() == 0);
 7113    match(Set dst (LoadN mem));
 7114 
 7115    ins_cost(125); // XXX
 7116    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7117    ins_encode %{
 7118      __ movl($dst$$Register, $mem$$Address);
 7119    %}
 7120    ins_pipe(ialu_reg_mem); // XXX
 7121 %}
 7122 
 7123 
 7124 // Load Klass Pointer
 7125 instruct loadKlass(rRegP dst, memory mem)
 7126 %{
 7127   match(Set dst (LoadKlass mem));
 7128 
 7129   ins_cost(125); // XXX
 7130   format %{ "movq    $dst, $mem\t# class" %}
 7131   ins_encode %{
 7132     __ movq($dst$$Register, $mem$$Address);
 7133   %}
 7134   ins_pipe(ialu_reg_mem); // XXX
 7135 %}
 7136 
 7137 // Load narrow Klass Pointer
 7138 instruct loadNKlass(rRegN dst, memory mem)
 7139 %{
 7140   predicate(!UseCompactObjectHeaders);
 7141   match(Set dst (LoadNKlass mem));
 7142 
 7143   ins_cost(125); // XXX
 7144   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7145   ins_encode %{
 7146     __ movl($dst$$Register, $mem$$Address);
 7147   %}
 7148   ins_pipe(ialu_reg_mem); // XXX
 7149 %}
 7150 
 7151 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7152 %{
 7153   predicate(UseCompactObjectHeaders);
 7154   match(Set dst (LoadNKlass mem));
 7155   effect(KILL cr);
 7156   ins_cost(125);
 7157   format %{
 7158     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7159     "shrl    $dst, markWord::klass_shift_at_offset"
 7160   %}
 7161   ins_encode %{
 7162     if (UseAPX) {
 7163       __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
 7164     }
 7165     else {
 7166       __ movl($dst$$Register, $mem$$Address);
 7167       __ shrl($dst$$Register, markWord::klass_shift_at_offset);
 7168     }
 7169   %}
 7170   ins_pipe(ialu_reg_mem);
 7171 %}
 7172 
 7173 // Load Float
 7174 instruct loadF(regF dst, memory mem)
 7175 %{
 7176   match(Set dst (LoadF mem));
 7177 
 7178   ins_cost(145); // XXX
 7179   format %{ "movss   $dst, $mem\t# float" %}
 7180   ins_encode %{
 7181     __ movflt($dst$$XMMRegister, $mem$$Address);
 7182   %}
 7183   ins_pipe(pipe_slow); // XXX
 7184 %}
 7185 
 7186 // Load Double
 7187 instruct loadD_partial(regD dst, memory mem)
 7188 %{
 7189   predicate(!UseXmmLoadAndClearUpper);
 7190   match(Set dst (LoadD mem));
 7191 
 7192   ins_cost(145); // XXX
 7193   format %{ "movlpd  $dst, $mem\t# double" %}
 7194   ins_encode %{
 7195     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7196   %}
 7197   ins_pipe(pipe_slow); // XXX
 7198 %}
 7199 
 7200 instruct loadD(regD dst, memory mem)
 7201 %{
 7202   predicate(UseXmmLoadAndClearUpper);
 7203   match(Set dst (LoadD mem));
 7204 
 7205   ins_cost(145); // XXX
 7206   format %{ "movsd   $dst, $mem\t# double" %}
 7207   ins_encode %{
 7208     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7209   %}
 7210   ins_pipe(pipe_slow); // XXX
 7211 %}
 7212 
 7213 // max = java.lang.Math.max(float a, float b)
 7214 instruct maxF_avx10_reg(regF dst, regF a, regF b) %{
 7215   predicate(VM_Version::supports_avx10_2());
 7216   match(Set dst (MaxF a b));
 7217   format %{ "maxF $dst, $a, $b" %}
 7218   ins_encode %{
 7219     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN);
 7220   %}
 7221   ins_pipe( pipe_slow );
 7222 %}
 7223 
 7224 // max = java.lang.Math.max(float a, float b)
 7225 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7226   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7227   match(Set dst (MaxF a b));
 7228   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7229   format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7230   ins_encode %{
 7231     __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7232   %}
 7233   ins_pipe( pipe_slow );
 7234 %}
 7235 
 7236 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7237   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7238   match(Set dst (MaxF a b));
 7239   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7240 
 7241   format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
 7242   ins_encode %{
 7243     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7244                     false /*min*/, true /*single*/);
 7245   %}
 7246   ins_pipe( pipe_slow );
 7247 %}
 7248 
 7249 // max = java.lang.Math.max(double a, double b)
 7250 instruct maxD_avx10_reg(regD dst, regD a, regD b) %{
 7251   predicate(VM_Version::supports_avx10_2());
 7252   match(Set dst (MaxD a b));
 7253   format %{ "maxD $dst, $a, $b" %}
 7254   ins_encode %{
 7255     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN);
 7256   %}
 7257   ins_pipe( pipe_slow );
 7258 %}
 7259 
 7260 // max = java.lang.Math.max(double a, double b)
 7261 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7262   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7263   match(Set dst (MaxD a b));
 7264   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7265   format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7266   ins_encode %{
 7267     __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7268   %}
 7269   ins_pipe( pipe_slow );
 7270 %}
 7271 
 7272 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7273   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7274   match(Set dst (MaxD a b));
 7275   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7276 
 7277   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7278   ins_encode %{
 7279     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7280                     false /*min*/, false /*single*/);
 7281   %}
 7282   ins_pipe( pipe_slow );
 7283 %}
 7284 
 7285 // max = java.lang.Math.min(float a, float b)
 7286 instruct minF_avx10_reg(regF dst, regF a, regF b) %{
 7287   predicate(VM_Version::supports_avx10_2());
 7288   match(Set dst (MinF a b));
 7289   format %{ "minF $dst, $a, $b" %}
 7290   ins_encode %{
 7291     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN);
 7292   %}
 7293   ins_pipe( pipe_slow );
 7294 %}
 7295 
 7296 // min = java.lang.Math.min(float a, float b)
 7297 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7298   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7299   match(Set dst (MinF a b));
 7300   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7301   format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7302   ins_encode %{
 7303     __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7304   %}
 7305   ins_pipe( pipe_slow );
 7306 %}
 7307 
 7308 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7309   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7310   match(Set dst (MinF a b));
 7311   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7312 
 7313   format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7314   ins_encode %{
 7315     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7316                     true /*min*/, true /*single*/);
 7317   %}
 7318   ins_pipe( pipe_slow );
 7319 %}
 7320 
 7321 // max = java.lang.Math.min(double a, double b)
 7322 instruct minD_avx10_reg(regD dst, regD a, regD b) %{
 7323   predicate(VM_Version::supports_avx10_2());
 7324   match(Set dst (MinD a b));
 7325   format %{ "minD $dst, $a, $b" %}
 7326   ins_encode %{
 7327     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN);
 7328   %}
 7329   ins_pipe( pipe_slow );
 7330 %}
 7331 
 7332 // min = java.lang.Math.min(double a, double b)
 7333 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7334   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7335   match(Set dst (MinD a b));
 7336   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7337     format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7338   ins_encode %{
 7339     __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7340   %}
 7341   ins_pipe( pipe_slow );
 7342 %}
 7343 
 7344 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7345   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7346   match(Set dst (MinD a b));
 7347   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7348 
 7349   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7350   ins_encode %{
 7351     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7352                     true /*min*/, false /*single*/);
 7353   %}
 7354   ins_pipe( pipe_slow );
 7355 %}
 7356 
 7357 // Load Effective Address
 7358 instruct leaP8(rRegP dst, indOffset8 mem)
 7359 %{
 7360   match(Set dst mem);
 7361 
 7362   ins_cost(110); // XXX
 7363   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7364   ins_encode %{
 7365     __ leaq($dst$$Register, $mem$$Address);
 7366   %}
 7367   ins_pipe(ialu_reg_reg_fat);
 7368 %}
 7369 
 7370 instruct leaP32(rRegP dst, indOffset32 mem)
 7371 %{
 7372   match(Set dst mem);
 7373 
 7374   ins_cost(110);
 7375   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7376   ins_encode %{
 7377     __ leaq($dst$$Register, $mem$$Address);
 7378   %}
 7379   ins_pipe(ialu_reg_reg_fat);
 7380 %}
 7381 
 7382 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7383 %{
 7384   match(Set dst mem);
 7385 
 7386   ins_cost(110);
 7387   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7388   ins_encode %{
 7389     __ leaq($dst$$Register, $mem$$Address);
 7390   %}
 7391   ins_pipe(ialu_reg_reg_fat);
 7392 %}
 7393 
 7394 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7395 %{
 7396   match(Set dst mem);
 7397 
 7398   ins_cost(110);
 7399   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7400   ins_encode %{
 7401     __ leaq($dst$$Register, $mem$$Address);
 7402   %}
 7403   ins_pipe(ialu_reg_reg_fat);
 7404 %}
 7405 
 7406 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7407 %{
 7408   match(Set dst mem);
 7409 
 7410   ins_cost(110);
 7411   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7412   ins_encode %{
 7413     __ leaq($dst$$Register, $mem$$Address);
 7414   %}
 7415   ins_pipe(ialu_reg_reg_fat);
 7416 %}
 7417 
 7418 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7419 %{
 7420   match(Set dst mem);
 7421 
 7422   ins_cost(110);
 7423   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7424   ins_encode %{
 7425     __ leaq($dst$$Register, $mem$$Address);
 7426   %}
 7427   ins_pipe(ialu_reg_reg_fat);
 7428 %}
 7429 
 7430 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7431 %{
 7432   match(Set dst mem);
 7433 
 7434   ins_cost(110);
 7435   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7436   ins_encode %{
 7437     __ leaq($dst$$Register, $mem$$Address);
 7438   %}
 7439   ins_pipe(ialu_reg_reg_fat);
 7440 %}
 7441 
 7442 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7443 %{
 7444   match(Set dst mem);
 7445 
 7446   ins_cost(110);
 7447   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7448   ins_encode %{
 7449     __ leaq($dst$$Register, $mem$$Address);
 7450   %}
 7451   ins_pipe(ialu_reg_reg_fat);
 7452 %}
 7453 
 7454 // Load Effective Address which uses Narrow (32-bits) oop
 7455 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7456 %{
 7457   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7458   match(Set dst mem);
 7459 
 7460   ins_cost(110);
 7461   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7462   ins_encode %{
 7463     __ leaq($dst$$Register, $mem$$Address);
 7464   %}
 7465   ins_pipe(ialu_reg_reg_fat);
 7466 %}
 7467 
 7468 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7469 %{
 7470   predicate(CompressedOops::shift() == 0);
 7471   match(Set dst mem);
 7472 
 7473   ins_cost(110); // XXX
 7474   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7475   ins_encode %{
 7476     __ leaq($dst$$Register, $mem$$Address);
 7477   %}
 7478   ins_pipe(ialu_reg_reg_fat);
 7479 %}
 7480 
 7481 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7482 %{
 7483   predicate(CompressedOops::shift() == 0);
 7484   match(Set dst mem);
 7485 
 7486   ins_cost(110);
 7487   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7488   ins_encode %{
 7489     __ leaq($dst$$Register, $mem$$Address);
 7490   %}
 7491   ins_pipe(ialu_reg_reg_fat);
 7492 %}
 7493 
 7494 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7495 %{
 7496   predicate(CompressedOops::shift() == 0);
 7497   match(Set dst mem);
 7498 
 7499   ins_cost(110);
 7500   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7501   ins_encode %{
 7502     __ leaq($dst$$Register, $mem$$Address);
 7503   %}
 7504   ins_pipe(ialu_reg_reg_fat);
 7505 %}
 7506 
 7507 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7508 %{
 7509   predicate(CompressedOops::shift() == 0);
 7510   match(Set dst mem);
 7511 
 7512   ins_cost(110);
 7513   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7514   ins_encode %{
 7515     __ leaq($dst$$Register, $mem$$Address);
 7516   %}
 7517   ins_pipe(ialu_reg_reg_fat);
 7518 %}
 7519 
 7520 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7521 %{
 7522   predicate(CompressedOops::shift() == 0);
 7523   match(Set dst mem);
 7524 
 7525   ins_cost(110);
 7526   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7527   ins_encode %{
 7528     __ leaq($dst$$Register, $mem$$Address);
 7529   %}
 7530   ins_pipe(ialu_reg_reg_fat);
 7531 %}
 7532 
 7533 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7534 %{
 7535   predicate(CompressedOops::shift() == 0);
 7536   match(Set dst mem);
 7537 
 7538   ins_cost(110);
 7539   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7540   ins_encode %{
 7541     __ leaq($dst$$Register, $mem$$Address);
 7542   %}
 7543   ins_pipe(ialu_reg_reg_fat);
 7544 %}
 7545 
 7546 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7547 %{
 7548   predicate(CompressedOops::shift() == 0);
 7549   match(Set dst mem);
 7550 
 7551   ins_cost(110);
 7552   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7553   ins_encode %{
 7554     __ leaq($dst$$Register, $mem$$Address);
 7555   %}
 7556   ins_pipe(ialu_reg_reg_fat);
 7557 %}
 7558 
 7559 instruct loadConI(rRegI dst, immI src)
 7560 %{
 7561   match(Set dst src);
 7562 
 7563   format %{ "movl    $dst, $src\t# int" %}
 7564   ins_encode %{
 7565     __ movl($dst$$Register, $src$$constant);
 7566   %}
 7567   ins_pipe(ialu_reg_fat); // XXX
 7568 %}
 7569 
 7570 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7571 %{
 7572   match(Set dst src);
 7573   effect(KILL cr);
 7574 
 7575   ins_cost(50);
 7576   format %{ "xorl    $dst, $dst\t# int" %}
 7577   ins_encode %{
 7578     __ xorl($dst$$Register, $dst$$Register);
 7579   %}
 7580   ins_pipe(ialu_reg);
 7581 %}
 7582 
 7583 instruct loadConL(rRegL dst, immL src)
 7584 %{
 7585   match(Set dst src);
 7586 
 7587   ins_cost(150);
 7588   format %{ "movq    $dst, $src\t# long" %}
 7589   ins_encode %{
 7590     __ mov64($dst$$Register, $src$$constant);
 7591   %}
 7592   ins_pipe(ialu_reg);
 7593 %}
 7594 
 7595 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7596 %{
 7597   match(Set dst src);
 7598   effect(KILL cr);
 7599 
 7600   ins_cost(50);
 7601   format %{ "xorl    $dst, $dst\t# long" %}
 7602   ins_encode %{
 7603     __ xorl($dst$$Register, $dst$$Register);
 7604   %}
 7605   ins_pipe(ialu_reg); // XXX
 7606 %}
 7607 
 7608 instruct loadConUL32(rRegL dst, immUL32 src)
 7609 %{
 7610   match(Set dst src);
 7611 
 7612   ins_cost(60);
 7613   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7614   ins_encode %{
 7615     __ movl($dst$$Register, $src$$constant);
 7616   %}
 7617   ins_pipe(ialu_reg);
 7618 %}
 7619 
 7620 instruct loadConL32(rRegL dst, immL32 src)
 7621 %{
 7622   match(Set dst src);
 7623 
 7624   ins_cost(70);
 7625   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7626   ins_encode %{
 7627     __ movq($dst$$Register, $src$$constant);
 7628   %}
 7629   ins_pipe(ialu_reg);
 7630 %}
 7631 
 7632 instruct loadConP(rRegP dst, immP con) %{
 7633   match(Set dst con);
 7634 
 7635   format %{ "movq    $dst, $con\t# ptr" %}
 7636   ins_encode %{
 7637     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7638   %}
 7639   ins_pipe(ialu_reg_fat); // XXX
 7640 %}
 7641 
 7642 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7643 %{
 7644   match(Set dst src);
 7645   effect(KILL cr);
 7646 
 7647   ins_cost(50);
 7648   format %{ "xorl    $dst, $dst\t# ptr" %}
 7649   ins_encode %{
 7650     __ xorl($dst$$Register, $dst$$Register);
 7651   %}
 7652   ins_pipe(ialu_reg);
 7653 %}
 7654 
 7655 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7656 %{
 7657   match(Set dst src);
 7658   effect(KILL cr);
 7659 
 7660   ins_cost(60);
 7661   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7662   ins_encode %{
 7663     __ movl($dst$$Register, $src$$constant);
 7664   %}
 7665   ins_pipe(ialu_reg);
 7666 %}
 7667 
 7668 instruct loadConF(regF dst, immF con) %{
 7669   match(Set dst con);
 7670   ins_cost(125);
 7671   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7672   ins_encode %{
 7673     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7674   %}
 7675   ins_pipe(pipe_slow);
 7676 %}
 7677 
 7678 instruct loadConH(regF dst, immH con) %{
 7679   match(Set dst con);
 7680   ins_cost(125);
 7681   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7682   ins_encode %{
 7683     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7684   %}
 7685   ins_pipe(pipe_slow);
 7686 %}
 7687 
 7688 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7689   match(Set dst src);
 7690   effect(KILL cr);
 7691   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7692   ins_encode %{
 7693     __ xorq($dst$$Register, $dst$$Register);
 7694   %}
 7695   ins_pipe(ialu_reg);
 7696 %}
 7697 
 7698 instruct loadConN(rRegN dst, immN src) %{
 7699   match(Set dst src);
 7700 
 7701   ins_cost(125);
 7702   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7703   ins_encode %{
 7704     address con = (address)$src$$constant;
 7705     if (con == nullptr) {
 7706       ShouldNotReachHere();
 7707     } else {
 7708       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7709     }
 7710   %}
 7711   ins_pipe(ialu_reg_fat); // XXX
 7712 %}
 7713 
 7714 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7715   match(Set dst src);
 7716 
 7717   ins_cost(125);
 7718   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7719   ins_encode %{
 7720     address con = (address)$src$$constant;
 7721     if (con == nullptr) {
 7722       ShouldNotReachHere();
 7723     } else {
 7724       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7725     }
 7726   %}
 7727   ins_pipe(ialu_reg_fat); // XXX
 7728 %}
 7729 
 7730 instruct loadConF0(regF dst, immF0 src)
 7731 %{
 7732   match(Set dst src);
 7733   ins_cost(100);
 7734 
 7735   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7736   ins_encode %{
 7737     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7738   %}
 7739   ins_pipe(pipe_slow);
 7740 %}
 7741 
 7742 // Use the same format since predicate() can not be used here.
 7743 instruct loadConD(regD dst, immD con) %{
 7744   match(Set dst con);
 7745   ins_cost(125);
 7746   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7747   ins_encode %{
 7748     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7749   %}
 7750   ins_pipe(pipe_slow);
 7751 %}
 7752 
 7753 instruct loadConD0(regD dst, immD0 src)
 7754 %{
 7755   match(Set dst src);
 7756   ins_cost(100);
 7757 
 7758   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7759   ins_encode %{
 7760     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7761   %}
 7762   ins_pipe(pipe_slow);
 7763 %}
 7764 
 7765 instruct loadSSI(rRegI dst, stackSlotI src)
 7766 %{
 7767   match(Set dst src);
 7768 
 7769   ins_cost(125);
 7770   format %{ "movl    $dst, $src\t# int stk" %}
 7771   ins_encode %{
 7772     __ movl($dst$$Register, $src$$Address);
 7773   %}
 7774   ins_pipe(ialu_reg_mem);
 7775 %}
 7776 
 7777 instruct loadSSL(rRegL dst, stackSlotL src)
 7778 %{
 7779   match(Set dst src);
 7780 
 7781   ins_cost(125);
 7782   format %{ "movq    $dst, $src\t# long stk" %}
 7783   ins_encode %{
 7784     __ movq($dst$$Register, $src$$Address);
 7785   %}
 7786   ins_pipe(ialu_reg_mem);
 7787 %}
 7788 
 7789 instruct loadSSP(rRegP dst, stackSlotP src)
 7790 %{
 7791   match(Set dst src);
 7792 
 7793   ins_cost(125);
 7794   format %{ "movq    $dst, $src\t# ptr stk" %}
 7795   ins_encode %{
 7796     __ movq($dst$$Register, $src$$Address);
 7797   %}
 7798   ins_pipe(ialu_reg_mem);
 7799 %}
 7800 
 7801 instruct loadSSF(regF dst, stackSlotF src)
 7802 %{
 7803   match(Set dst src);
 7804 
 7805   ins_cost(125);
 7806   format %{ "movss   $dst, $src\t# float stk" %}
 7807   ins_encode %{
 7808     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 7809   %}
 7810   ins_pipe(pipe_slow); // XXX
 7811 %}
 7812 
 7813 // Use the same format since predicate() can not be used here.
 7814 instruct loadSSD(regD dst, stackSlotD src)
 7815 %{
 7816   match(Set dst src);
 7817 
 7818   ins_cost(125);
 7819   format %{ "movsd   $dst, $src\t# double stk" %}
 7820   ins_encode  %{
 7821     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 7822   %}
 7823   ins_pipe(pipe_slow); // XXX
 7824 %}
 7825 
 7826 // Prefetch instructions for allocation.
 7827 // Must be safe to execute with invalid address (cannot fault).
 7828 
 7829 instruct prefetchAlloc( memory mem ) %{
 7830   predicate(AllocatePrefetchInstr==3);
 7831   match(PrefetchAllocation mem);
 7832   ins_cost(125);
 7833 
 7834   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 7835   ins_encode %{
 7836     __ prefetchw($mem$$Address);
 7837   %}
 7838   ins_pipe(ialu_mem);
 7839 %}
 7840 
 7841 instruct prefetchAllocNTA( memory mem ) %{
 7842   predicate(AllocatePrefetchInstr==0);
 7843   match(PrefetchAllocation mem);
 7844   ins_cost(125);
 7845 
 7846   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 7847   ins_encode %{
 7848     __ prefetchnta($mem$$Address);
 7849   %}
 7850   ins_pipe(ialu_mem);
 7851 %}
 7852 
 7853 instruct prefetchAllocT0( memory mem ) %{
 7854   predicate(AllocatePrefetchInstr==1);
 7855   match(PrefetchAllocation mem);
 7856   ins_cost(125);
 7857 
 7858   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 7859   ins_encode %{
 7860     __ prefetcht0($mem$$Address);
 7861   %}
 7862   ins_pipe(ialu_mem);
 7863 %}
 7864 
 7865 instruct prefetchAllocT2( memory mem ) %{
 7866   predicate(AllocatePrefetchInstr==2);
 7867   match(PrefetchAllocation mem);
 7868   ins_cost(125);
 7869 
 7870   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 7871   ins_encode %{
 7872     __ prefetcht2($mem$$Address);
 7873   %}
 7874   ins_pipe(ialu_mem);
 7875 %}
 7876 
 7877 //----------Store Instructions-------------------------------------------------
 7878 
 7879 // Store Byte
 7880 instruct storeB(memory mem, rRegI src)
 7881 %{
 7882   match(Set mem (StoreB mem src));
 7883 
 7884   ins_cost(125); // XXX
 7885   format %{ "movb    $mem, $src\t# byte" %}
 7886   ins_encode %{
 7887     __ movb($mem$$Address, $src$$Register);
 7888   %}
 7889   ins_pipe(ialu_mem_reg);
 7890 %}
 7891 
 7892 // Store Char/Short
 7893 instruct storeC(memory mem, rRegI src)
 7894 %{
 7895   match(Set mem (StoreC mem src));
 7896 
 7897   ins_cost(125); // XXX
 7898   format %{ "movw    $mem, $src\t# char/short" %}
 7899   ins_encode %{
 7900     __ movw($mem$$Address, $src$$Register);
 7901   %}
 7902   ins_pipe(ialu_mem_reg);
 7903 %}
 7904 
 7905 // Store Integer
 7906 instruct storeI(memory mem, rRegI src)
 7907 %{
 7908   match(Set mem (StoreI mem src));
 7909 
 7910   ins_cost(125); // XXX
 7911   format %{ "movl    $mem, $src\t# int" %}
 7912   ins_encode %{
 7913     __ movl($mem$$Address, $src$$Register);
 7914   %}
 7915   ins_pipe(ialu_mem_reg);
 7916 %}
 7917 
 7918 // Store Long
 7919 instruct storeL(memory mem, rRegL src)
 7920 %{
 7921   match(Set mem (StoreL mem src));
 7922 
 7923   ins_cost(125); // XXX
 7924   format %{ "movq    $mem, $src\t# long" %}
 7925   ins_encode %{
 7926     __ movq($mem$$Address, $src$$Register);
 7927   %}
 7928   ins_pipe(ialu_mem_reg); // XXX
 7929 %}
 7930 
 7931 // Store Pointer
 7932 instruct storeP(memory mem, any_RegP src)
 7933 %{
 7934   predicate(n->as_Store()->barrier_data() == 0);
 7935   match(Set mem (StoreP mem src));
 7936 
 7937   ins_cost(125); // XXX
 7938   format %{ "movq    $mem, $src\t# ptr" %}
 7939   ins_encode %{
 7940     __ movq($mem$$Address, $src$$Register);
 7941   %}
 7942   ins_pipe(ialu_mem_reg);
 7943 %}
 7944 
 7945 instruct storeImmP0(memory mem, immP0 zero)
 7946 %{
 7947   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 7948   match(Set mem (StoreP mem zero));
 7949 
 7950   ins_cost(125); // XXX
 7951   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 7952   ins_encode %{
 7953     __ movq($mem$$Address, r12);
 7954   %}
 7955   ins_pipe(ialu_mem_reg);
 7956 %}
 7957 
 7958 // Store Null Pointer, mark word, or other simple pointer constant.
 7959 instruct storeImmP(memory mem, immP31 src)
 7960 %{
 7961   predicate(n->as_Store()->barrier_data() == 0);
 7962   match(Set mem (StoreP mem src));
 7963 
 7964   ins_cost(150); // XXX
 7965   format %{ "movq    $mem, $src\t# ptr" %}
 7966   ins_encode %{
 7967     __ movq($mem$$Address, $src$$constant);
 7968   %}
 7969   ins_pipe(ialu_mem_imm);
 7970 %}
 7971 
 7972 // Store Compressed Pointer
 7973 instruct storeN(memory mem, rRegN src)
 7974 %{
 7975   predicate(n->as_Store()->barrier_data() == 0);
 7976   match(Set mem (StoreN mem src));
 7977 
 7978   ins_cost(125); // XXX
 7979   format %{ "movl    $mem, $src\t# compressed ptr" %}
 7980   ins_encode %{
 7981     __ movl($mem$$Address, $src$$Register);
 7982   %}
 7983   ins_pipe(ialu_mem_reg);
 7984 %}
 7985 
 7986 instruct storeNKlass(memory mem, rRegN src)
 7987 %{
 7988   match(Set mem (StoreNKlass mem src));
 7989 
 7990   ins_cost(125); // XXX
 7991   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 7992   ins_encode %{
 7993     __ movl($mem$$Address, $src$$Register);
 7994   %}
 7995   ins_pipe(ialu_mem_reg);
 7996 %}
 7997 
 7998 instruct storeImmN0(memory mem, immN0 zero)
 7999 %{
 8000   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8001   match(Set mem (StoreN mem zero));
 8002 
 8003   ins_cost(125); // XXX
 8004   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8005   ins_encode %{
 8006     __ movl($mem$$Address, r12);
 8007   %}
 8008   ins_pipe(ialu_mem_reg);
 8009 %}
 8010 
 8011 instruct storeImmN(memory mem, immN src)
 8012 %{
 8013   predicate(n->as_Store()->barrier_data() == 0);
 8014   match(Set mem (StoreN mem src));
 8015 
 8016   ins_cost(150); // XXX
 8017   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8018   ins_encode %{
 8019     address con = (address)$src$$constant;
 8020     if (con == nullptr) {
 8021       __ movl($mem$$Address, 0);
 8022     } else {
 8023       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8024     }
 8025   %}
 8026   ins_pipe(ialu_mem_imm);
 8027 %}
 8028 
 8029 instruct storeImmNKlass(memory mem, immNKlass src)
 8030 %{
 8031   match(Set mem (StoreNKlass mem src));
 8032 
 8033   ins_cost(150); // XXX
 8034   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8035   ins_encode %{
 8036     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8037   %}
 8038   ins_pipe(ialu_mem_imm);
 8039 %}
 8040 
 8041 // Store Integer Immediate
 8042 instruct storeImmI0(memory mem, immI_0 zero)
 8043 %{
 8044   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8045   match(Set mem (StoreI mem zero));
 8046 
 8047   ins_cost(125); // XXX
 8048   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8049   ins_encode %{
 8050     __ movl($mem$$Address, r12);
 8051   %}
 8052   ins_pipe(ialu_mem_reg);
 8053 %}
 8054 
 8055 instruct storeImmI(memory mem, immI src)
 8056 %{
 8057   match(Set mem (StoreI mem src));
 8058 
 8059   ins_cost(150);
 8060   format %{ "movl    $mem, $src\t# int" %}
 8061   ins_encode %{
 8062     __ movl($mem$$Address, $src$$constant);
 8063   %}
 8064   ins_pipe(ialu_mem_imm);
 8065 %}
 8066 
 8067 // Store Long Immediate
 8068 instruct storeImmL0(memory mem, immL0 zero)
 8069 %{
 8070   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8071   match(Set mem (StoreL mem zero));
 8072 
 8073   ins_cost(125); // XXX
 8074   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8075   ins_encode %{
 8076     __ movq($mem$$Address, r12);
 8077   %}
 8078   ins_pipe(ialu_mem_reg);
 8079 %}
 8080 
 8081 instruct storeImmL(memory mem, immL32 src)
 8082 %{
 8083   match(Set mem (StoreL mem src));
 8084 
 8085   ins_cost(150);
 8086   format %{ "movq    $mem, $src\t# long" %}
 8087   ins_encode %{
 8088     __ movq($mem$$Address, $src$$constant);
 8089   %}
 8090   ins_pipe(ialu_mem_imm);
 8091 %}
 8092 
 8093 // Store Short/Char Immediate
 8094 instruct storeImmC0(memory mem, immI_0 zero)
 8095 %{
 8096   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8097   match(Set mem (StoreC mem zero));
 8098 
 8099   ins_cost(125); // XXX
 8100   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8101   ins_encode %{
 8102     __ movw($mem$$Address, r12);
 8103   %}
 8104   ins_pipe(ialu_mem_reg);
 8105 %}
 8106 
 8107 instruct storeImmI16(memory mem, immI16 src)
 8108 %{
 8109   predicate(UseStoreImmI16);
 8110   match(Set mem (StoreC mem src));
 8111 
 8112   ins_cost(150);
 8113   format %{ "movw    $mem, $src\t# short/char" %}
 8114   ins_encode %{
 8115     __ movw($mem$$Address, $src$$constant);
 8116   %}
 8117   ins_pipe(ialu_mem_imm);
 8118 %}
 8119 
 8120 // Store Byte Immediate
 8121 instruct storeImmB0(memory mem, immI_0 zero)
 8122 %{
 8123   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8124   match(Set mem (StoreB mem zero));
 8125 
 8126   ins_cost(125); // XXX
 8127   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8128   ins_encode %{
 8129     __ movb($mem$$Address, r12);
 8130   %}
 8131   ins_pipe(ialu_mem_reg);
 8132 %}
 8133 
 8134 instruct storeImmB(memory mem, immI8 src)
 8135 %{
 8136   match(Set mem (StoreB mem src));
 8137 
 8138   ins_cost(150); // XXX
 8139   format %{ "movb    $mem, $src\t# byte" %}
 8140   ins_encode %{
 8141     __ movb($mem$$Address, $src$$constant);
 8142   %}
 8143   ins_pipe(ialu_mem_imm);
 8144 %}
 8145 
 8146 // Store Float
 8147 instruct storeF(memory mem, regF src)
 8148 %{
 8149   match(Set mem (StoreF mem src));
 8150 
 8151   ins_cost(95); // XXX
 8152   format %{ "movss   $mem, $src\t# float" %}
 8153   ins_encode %{
 8154     __ movflt($mem$$Address, $src$$XMMRegister);
 8155   %}
 8156   ins_pipe(pipe_slow); // XXX
 8157 %}
 8158 
 8159 // Store immediate Float value (it is faster than store from XMM register)
 8160 instruct storeF0(memory mem, immF0 zero)
 8161 %{
 8162   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8163   match(Set mem (StoreF mem zero));
 8164 
 8165   ins_cost(25); // XXX
 8166   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8167   ins_encode %{
 8168     __ movl($mem$$Address, r12);
 8169   %}
 8170   ins_pipe(ialu_mem_reg);
 8171 %}
 8172 
 8173 instruct storeF_imm(memory mem, immF src)
 8174 %{
 8175   match(Set mem (StoreF mem src));
 8176 
 8177   ins_cost(50);
 8178   format %{ "movl    $mem, $src\t# float" %}
 8179   ins_encode %{
 8180     __ movl($mem$$Address, jint_cast($src$$constant));
 8181   %}
 8182   ins_pipe(ialu_mem_imm);
 8183 %}
 8184 
 8185 // Store Double
 8186 instruct storeD(memory mem, regD src)
 8187 %{
 8188   match(Set mem (StoreD mem src));
 8189 
 8190   ins_cost(95); // XXX
 8191   format %{ "movsd   $mem, $src\t# double" %}
 8192   ins_encode %{
 8193     __ movdbl($mem$$Address, $src$$XMMRegister);
 8194   %}
 8195   ins_pipe(pipe_slow); // XXX
 8196 %}
 8197 
 8198 // Store immediate double 0.0 (it is faster than store from XMM register)
 8199 instruct storeD0_imm(memory mem, immD0 src)
 8200 %{
 8201   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8202   match(Set mem (StoreD mem src));
 8203 
 8204   ins_cost(50);
 8205   format %{ "movq    $mem, $src\t# double 0." %}
 8206   ins_encode %{
 8207     __ movq($mem$$Address, $src$$constant);
 8208   %}
 8209   ins_pipe(ialu_mem_imm);
 8210 %}
 8211 
 8212 instruct storeD0(memory mem, immD0 zero)
 8213 %{
 8214   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8215   match(Set mem (StoreD mem zero));
 8216 
 8217   ins_cost(25); // XXX
 8218   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8219   ins_encode %{
 8220     __ movq($mem$$Address, r12);
 8221   %}
 8222   ins_pipe(ialu_mem_reg);
 8223 %}
 8224 
 8225 instruct storeSSI(stackSlotI dst, rRegI src)
 8226 %{
 8227   match(Set dst src);
 8228 
 8229   ins_cost(100);
 8230   format %{ "movl    $dst, $src\t# int stk" %}
 8231   ins_encode %{
 8232     __ movl($dst$$Address, $src$$Register);
 8233   %}
 8234   ins_pipe( ialu_mem_reg );
 8235 %}
 8236 
 8237 instruct storeSSL(stackSlotL dst, rRegL src)
 8238 %{
 8239   match(Set dst src);
 8240 
 8241   ins_cost(100);
 8242   format %{ "movq    $dst, $src\t# long stk" %}
 8243   ins_encode %{
 8244     __ movq($dst$$Address, $src$$Register);
 8245   %}
 8246   ins_pipe(ialu_mem_reg);
 8247 %}
 8248 
 8249 instruct storeSSP(stackSlotP dst, rRegP src)
 8250 %{
 8251   match(Set dst src);
 8252 
 8253   ins_cost(100);
 8254   format %{ "movq    $dst, $src\t# ptr stk" %}
 8255   ins_encode %{
 8256     __ movq($dst$$Address, $src$$Register);
 8257   %}
 8258   ins_pipe(ialu_mem_reg);
 8259 %}
 8260 
 8261 instruct storeSSF(stackSlotF dst, regF src)
 8262 %{
 8263   match(Set dst src);
 8264 
 8265   ins_cost(95); // XXX
 8266   format %{ "movss   $dst, $src\t# float stk" %}
 8267   ins_encode %{
 8268     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8269   %}
 8270   ins_pipe(pipe_slow); // XXX
 8271 %}
 8272 
 8273 instruct storeSSD(stackSlotD dst, regD src)
 8274 %{
 8275   match(Set dst src);
 8276 
 8277   ins_cost(95); // XXX
 8278   format %{ "movsd   $dst, $src\t# double stk" %}
 8279   ins_encode %{
 8280     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8281   %}
 8282   ins_pipe(pipe_slow); // XXX
 8283 %}
 8284 
 8285 instruct cacheWB(indirect addr)
 8286 %{
 8287   predicate(VM_Version::supports_data_cache_line_flush());
 8288   match(CacheWB addr);
 8289 
 8290   ins_cost(100);
 8291   format %{"cache wb $addr" %}
 8292   ins_encode %{
 8293     assert($addr->index_position() < 0, "should be");
 8294     assert($addr$$disp == 0, "should be");
 8295     __ cache_wb(Address($addr$$base$$Register, 0));
 8296   %}
 8297   ins_pipe(pipe_slow); // XXX
 8298 %}
 8299 
 8300 instruct cacheWBPreSync()
 8301 %{
 8302   predicate(VM_Version::supports_data_cache_line_flush());
 8303   match(CacheWBPreSync);
 8304 
 8305   ins_cost(100);
 8306   format %{"cache wb presync" %}
 8307   ins_encode %{
 8308     __ cache_wbsync(true);
 8309   %}
 8310   ins_pipe(pipe_slow); // XXX
 8311 %}
 8312 
 8313 instruct cacheWBPostSync()
 8314 %{
 8315   predicate(VM_Version::supports_data_cache_line_flush());
 8316   match(CacheWBPostSync);
 8317 
 8318   ins_cost(100);
 8319   format %{"cache wb postsync" %}
 8320   ins_encode %{
 8321     __ cache_wbsync(false);
 8322   %}
 8323   ins_pipe(pipe_slow); // XXX
 8324 %}
 8325 
 8326 //----------BSWAP Instructions-------------------------------------------------
 8327 instruct bytes_reverse_int(rRegI dst) %{
 8328   match(Set dst (ReverseBytesI dst));
 8329 
 8330   format %{ "bswapl  $dst" %}
 8331   ins_encode %{
 8332     __ bswapl($dst$$Register);
 8333   %}
 8334   ins_pipe( ialu_reg );
 8335 %}
 8336 
 8337 instruct bytes_reverse_long(rRegL dst) %{
 8338   match(Set dst (ReverseBytesL dst));
 8339 
 8340   format %{ "bswapq  $dst" %}
 8341   ins_encode %{
 8342     __ bswapq($dst$$Register);
 8343   %}
 8344   ins_pipe( ialu_reg);
 8345 %}
 8346 
 8347 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8348   match(Set dst (ReverseBytesUS dst));
 8349   effect(KILL cr);
 8350 
 8351   format %{ "bswapl  $dst\n\t"
 8352             "shrl    $dst,16\n\t" %}
 8353   ins_encode %{
 8354     __ bswapl($dst$$Register);
 8355     __ shrl($dst$$Register, 16);
 8356   %}
 8357   ins_pipe( ialu_reg );
 8358 %}
 8359 
 8360 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8361   match(Set dst (ReverseBytesS dst));
 8362   effect(KILL cr);
 8363 
 8364   format %{ "bswapl  $dst\n\t"
 8365             "sar     $dst,16\n\t" %}
 8366   ins_encode %{
 8367     __ bswapl($dst$$Register);
 8368     __ sarl($dst$$Register, 16);
 8369   %}
 8370   ins_pipe( ialu_reg );
 8371 %}
 8372 
 8373 //---------- Zeros Count Instructions ------------------------------------------
 8374 
 8375 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8376   predicate(UseCountLeadingZerosInstruction);
 8377   match(Set dst (CountLeadingZerosI src));
 8378   effect(KILL cr);
 8379 
 8380   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8381   ins_encode %{
 8382     __ lzcntl($dst$$Register, $src$$Register);
 8383   %}
 8384   ins_pipe(ialu_reg);
 8385 %}
 8386 
 8387 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8388   predicate(UseCountLeadingZerosInstruction);
 8389   match(Set dst (CountLeadingZerosI (LoadI src)));
 8390   effect(KILL cr);
 8391   ins_cost(175);
 8392   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8393   ins_encode %{
 8394     __ lzcntl($dst$$Register, $src$$Address);
 8395   %}
 8396   ins_pipe(ialu_reg_mem);
 8397 %}
 8398 
 8399 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8400   predicate(!UseCountLeadingZerosInstruction);
 8401   match(Set dst (CountLeadingZerosI src));
 8402   effect(KILL cr);
 8403 
 8404   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8405             "jnz     skip\n\t"
 8406             "movl    $dst, -1\n"
 8407       "skip:\n\t"
 8408             "negl    $dst\n\t"
 8409             "addl    $dst, 31" %}
 8410   ins_encode %{
 8411     Register Rdst = $dst$$Register;
 8412     Register Rsrc = $src$$Register;
 8413     Label skip;
 8414     __ bsrl(Rdst, Rsrc);
 8415     __ jccb(Assembler::notZero, skip);
 8416     __ movl(Rdst, -1);
 8417     __ bind(skip);
 8418     __ negl(Rdst);
 8419     __ addl(Rdst, BitsPerInt - 1);
 8420   %}
 8421   ins_pipe(ialu_reg);
 8422 %}
 8423 
 8424 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8425   predicate(UseCountLeadingZerosInstruction);
 8426   match(Set dst (CountLeadingZerosL src));
 8427   effect(KILL cr);
 8428 
 8429   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8430   ins_encode %{
 8431     __ lzcntq($dst$$Register, $src$$Register);
 8432   %}
 8433   ins_pipe(ialu_reg);
 8434 %}
 8435 
 8436 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8437   predicate(UseCountLeadingZerosInstruction);
 8438   match(Set dst (CountLeadingZerosL (LoadL src)));
 8439   effect(KILL cr);
 8440   ins_cost(175);
 8441   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8442   ins_encode %{
 8443     __ lzcntq($dst$$Register, $src$$Address);
 8444   %}
 8445   ins_pipe(ialu_reg_mem);
 8446 %}
 8447 
 8448 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8449   predicate(!UseCountLeadingZerosInstruction);
 8450   match(Set dst (CountLeadingZerosL src));
 8451   effect(KILL cr);
 8452 
 8453   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8454             "jnz     skip\n\t"
 8455             "movl    $dst, -1\n"
 8456       "skip:\n\t"
 8457             "negl    $dst\n\t"
 8458             "addl    $dst, 63" %}
 8459   ins_encode %{
 8460     Register Rdst = $dst$$Register;
 8461     Register Rsrc = $src$$Register;
 8462     Label skip;
 8463     __ bsrq(Rdst, Rsrc);
 8464     __ jccb(Assembler::notZero, skip);
 8465     __ movl(Rdst, -1);
 8466     __ bind(skip);
 8467     __ negl(Rdst);
 8468     __ addl(Rdst, BitsPerLong - 1);
 8469   %}
 8470   ins_pipe(ialu_reg);
 8471 %}
 8472 
 8473 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8474   predicate(UseCountTrailingZerosInstruction);
 8475   match(Set dst (CountTrailingZerosI src));
 8476   effect(KILL cr);
 8477 
 8478   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8479   ins_encode %{
 8480     __ tzcntl($dst$$Register, $src$$Register);
 8481   %}
 8482   ins_pipe(ialu_reg);
 8483 %}
 8484 
 8485 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8486   predicate(UseCountTrailingZerosInstruction);
 8487   match(Set dst (CountTrailingZerosI (LoadI src)));
 8488   effect(KILL cr);
 8489   ins_cost(175);
 8490   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8491   ins_encode %{
 8492     __ tzcntl($dst$$Register, $src$$Address);
 8493   %}
 8494   ins_pipe(ialu_reg_mem);
 8495 %}
 8496 
 8497 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8498   predicate(!UseCountTrailingZerosInstruction);
 8499   match(Set dst (CountTrailingZerosI src));
 8500   effect(KILL cr);
 8501 
 8502   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8503             "jnz     done\n\t"
 8504             "movl    $dst, 32\n"
 8505       "done:" %}
 8506   ins_encode %{
 8507     Register Rdst = $dst$$Register;
 8508     Label done;
 8509     __ bsfl(Rdst, $src$$Register);
 8510     __ jccb(Assembler::notZero, done);
 8511     __ movl(Rdst, BitsPerInt);
 8512     __ bind(done);
 8513   %}
 8514   ins_pipe(ialu_reg);
 8515 %}
 8516 
 8517 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8518   predicate(UseCountTrailingZerosInstruction);
 8519   match(Set dst (CountTrailingZerosL src));
 8520   effect(KILL cr);
 8521 
 8522   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8523   ins_encode %{
 8524     __ tzcntq($dst$$Register, $src$$Register);
 8525   %}
 8526   ins_pipe(ialu_reg);
 8527 %}
 8528 
 8529 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8530   predicate(UseCountTrailingZerosInstruction);
 8531   match(Set dst (CountTrailingZerosL (LoadL src)));
 8532   effect(KILL cr);
 8533   ins_cost(175);
 8534   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8535   ins_encode %{
 8536     __ tzcntq($dst$$Register, $src$$Address);
 8537   %}
 8538   ins_pipe(ialu_reg_mem);
 8539 %}
 8540 
 8541 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8542   predicate(!UseCountTrailingZerosInstruction);
 8543   match(Set dst (CountTrailingZerosL src));
 8544   effect(KILL cr);
 8545 
 8546   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8547             "jnz     done\n\t"
 8548             "movl    $dst, 64\n"
 8549       "done:" %}
 8550   ins_encode %{
 8551     Register Rdst = $dst$$Register;
 8552     Label done;
 8553     __ bsfq(Rdst, $src$$Register);
 8554     __ jccb(Assembler::notZero, done);
 8555     __ movl(Rdst, BitsPerLong);
 8556     __ bind(done);
 8557   %}
 8558   ins_pipe(ialu_reg);
 8559 %}
 8560 
 8561 //--------------- Reverse Operation Instructions ----------------
 8562 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8563   predicate(!VM_Version::supports_gfni());
 8564   match(Set dst (ReverseI src));
 8565   effect(TEMP dst, TEMP rtmp, KILL cr);
 8566   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8567   ins_encode %{
 8568     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8569   %}
 8570   ins_pipe( ialu_reg );
 8571 %}
 8572 
 8573 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8574   predicate(VM_Version::supports_gfni());
 8575   match(Set dst (ReverseI src));
 8576   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8577   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8578   ins_encode %{
 8579     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8580   %}
 8581   ins_pipe( ialu_reg );
 8582 %}
 8583 
 8584 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8585   predicate(!VM_Version::supports_gfni());
 8586   match(Set dst (ReverseL src));
 8587   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8588   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8589   ins_encode %{
 8590     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8591   %}
 8592   ins_pipe( ialu_reg );
 8593 %}
 8594 
 8595 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8596   predicate(VM_Version::supports_gfni());
 8597   match(Set dst (ReverseL src));
 8598   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8599   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8600   ins_encode %{
 8601     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8602   %}
 8603   ins_pipe( ialu_reg );
 8604 %}
 8605 
 8606 //---------- Population Count Instructions -------------------------------------
 8607 
 8608 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8609   predicate(UsePopCountInstruction);
 8610   match(Set dst (PopCountI src));
 8611   effect(KILL cr);
 8612 
 8613   format %{ "popcnt  $dst, $src" %}
 8614   ins_encode %{
 8615     __ popcntl($dst$$Register, $src$$Register);
 8616   %}
 8617   ins_pipe(ialu_reg);
 8618 %}
 8619 
 8620 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8621   predicate(UsePopCountInstruction);
 8622   match(Set dst (PopCountI (LoadI mem)));
 8623   effect(KILL cr);
 8624 
 8625   format %{ "popcnt  $dst, $mem" %}
 8626   ins_encode %{
 8627     __ popcntl($dst$$Register, $mem$$Address);
 8628   %}
 8629   ins_pipe(ialu_reg);
 8630 %}
 8631 
 8632 // Note: Long.bitCount(long) returns an int.
 8633 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8634   predicate(UsePopCountInstruction);
 8635   match(Set dst (PopCountL src));
 8636   effect(KILL cr);
 8637 
 8638   format %{ "popcnt  $dst, $src" %}
 8639   ins_encode %{
 8640     __ popcntq($dst$$Register, $src$$Register);
 8641   %}
 8642   ins_pipe(ialu_reg);
 8643 %}
 8644 
 8645 // Note: Long.bitCount(long) returns an int.
 8646 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8647   predicate(UsePopCountInstruction);
 8648   match(Set dst (PopCountL (LoadL mem)));
 8649   effect(KILL cr);
 8650 
 8651   format %{ "popcnt  $dst, $mem" %}
 8652   ins_encode %{
 8653     __ popcntq($dst$$Register, $mem$$Address);
 8654   %}
 8655   ins_pipe(ialu_reg);
 8656 %}
 8657 
 8658 
 8659 //----------MemBar Instructions-----------------------------------------------
 8660 // Memory barrier flavors
 8661 
 8662 instruct membar_acquire()
 8663 %{
 8664   match(MemBarAcquire);
 8665   match(LoadFence);
 8666   ins_cost(0);
 8667 
 8668   size(0);
 8669   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8670   ins_encode();
 8671   ins_pipe(empty);
 8672 %}
 8673 
 8674 instruct membar_acquire_lock()
 8675 %{
 8676   match(MemBarAcquireLock);
 8677   ins_cost(0);
 8678 
 8679   size(0);
 8680   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8681   ins_encode();
 8682   ins_pipe(empty);
 8683 %}
 8684 
 8685 instruct membar_release()
 8686 %{
 8687   match(MemBarRelease);
 8688   match(StoreFence);
 8689   ins_cost(0);
 8690 
 8691   size(0);
 8692   format %{ "MEMBAR-release ! (empty encoding)" %}
 8693   ins_encode();
 8694   ins_pipe(empty);
 8695 %}
 8696 
 8697 instruct membar_release_lock()
 8698 %{
 8699   match(MemBarReleaseLock);
 8700   ins_cost(0);
 8701 
 8702   size(0);
 8703   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8704   ins_encode();
 8705   ins_pipe(empty);
 8706 %}
 8707 
 8708 instruct membar_volatile(rFlagsReg cr) %{
 8709   match(MemBarVolatile);
 8710   effect(KILL cr);
 8711   ins_cost(400);
 8712 
 8713   format %{
 8714     $$template
 8715     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8716   %}
 8717   ins_encode %{
 8718     __ membar(Assembler::StoreLoad);
 8719   %}
 8720   ins_pipe(pipe_slow);
 8721 %}
 8722 
 8723 instruct unnecessary_membar_volatile()
 8724 %{
 8725   match(MemBarVolatile);
 8726   predicate(Matcher::post_store_load_barrier(n));
 8727   ins_cost(0);
 8728 
 8729   size(0);
 8730   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8731   ins_encode();
 8732   ins_pipe(empty);
 8733 %}
 8734 
 8735 instruct membar_storestore() %{
 8736   match(MemBarStoreStore);
 8737   match(StoreStoreFence);
 8738   ins_cost(0);
 8739 
 8740   size(0);
 8741   format %{ "MEMBAR-storestore (empty encoding)" %}
 8742   ins_encode( );
 8743   ins_pipe(empty);
 8744 %}
 8745 
 8746 //----------Move Instructions--------------------------------------------------
 8747 
 8748 instruct castX2P(rRegP dst, rRegL src)
 8749 %{
 8750   match(Set dst (CastX2P src));
 8751 
 8752   format %{ "movq    $dst, $src\t# long->ptr" %}
 8753   ins_encode %{
 8754     if ($dst$$reg != $src$$reg) {
 8755       __ movptr($dst$$Register, $src$$Register);
 8756     }
 8757   %}
 8758   ins_pipe(ialu_reg_reg); // XXX
 8759 %}
 8760 
 8761 instruct castP2X(rRegL dst, rRegP src)
 8762 %{
 8763   match(Set dst (CastP2X src));
 8764 
 8765   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8766   ins_encode %{
 8767     if ($dst$$reg != $src$$reg) {
 8768       __ movptr($dst$$Register, $src$$Register);
 8769     }
 8770   %}
 8771   ins_pipe(ialu_reg_reg); // XXX
 8772 %}
 8773 
 8774 // Convert oop into int for vectors alignment masking
 8775 instruct convP2I(rRegI dst, rRegP src)
 8776 %{
 8777   match(Set dst (ConvL2I (CastP2X src)));
 8778 
 8779   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8780   ins_encode %{
 8781     __ movl($dst$$Register, $src$$Register);
 8782   %}
 8783   ins_pipe(ialu_reg_reg); // XXX
 8784 %}
 8785 
 8786 // Convert compressed oop into int for vectors alignment masking
 8787 // in case of 32bit oops (heap < 4Gb).
 8788 instruct convN2I(rRegI dst, rRegN src)
 8789 %{
 8790   predicate(CompressedOops::shift() == 0);
 8791   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 8792 
 8793   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 8794   ins_encode %{
 8795     __ movl($dst$$Register, $src$$Register);
 8796   %}
 8797   ins_pipe(ialu_reg_reg); // XXX
 8798 %}
 8799 
 8800 // Convert oop pointer into compressed form
 8801 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 8802   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 8803   match(Set dst (EncodeP src));
 8804   effect(KILL cr);
 8805   format %{ "encode_heap_oop $dst,$src" %}
 8806   ins_encode %{
 8807     Register s = $src$$Register;
 8808     Register d = $dst$$Register;
 8809     if (s != d) {
 8810       __ movq(d, s);
 8811     }
 8812     __ encode_heap_oop(d);
 8813   %}
 8814   ins_pipe(ialu_reg_long);
 8815 %}
 8816 
 8817 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8818   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 8819   match(Set dst (EncodeP src));
 8820   effect(KILL cr);
 8821   format %{ "encode_heap_oop_not_null $dst,$src" %}
 8822   ins_encode %{
 8823     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 8824   %}
 8825   ins_pipe(ialu_reg_long);
 8826 %}
 8827 
 8828 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 8829   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 8830             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 8831   match(Set dst (DecodeN src));
 8832   effect(KILL cr);
 8833   format %{ "decode_heap_oop $dst,$src" %}
 8834   ins_encode %{
 8835     Register s = $src$$Register;
 8836     Register d = $dst$$Register;
 8837     if (s != d) {
 8838       __ movq(d, s);
 8839     }
 8840     __ decode_heap_oop(d);
 8841   %}
 8842   ins_pipe(ialu_reg_long);
 8843 %}
 8844 
 8845 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 8846   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 8847             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 8848   match(Set dst (DecodeN src));
 8849   effect(KILL cr);
 8850   format %{ "decode_heap_oop_not_null $dst,$src" %}
 8851   ins_encode %{
 8852     Register s = $src$$Register;
 8853     Register d = $dst$$Register;
 8854     if (s != d) {
 8855       __ decode_heap_oop_not_null(d, s);
 8856     } else {
 8857       __ decode_heap_oop_not_null(d);
 8858     }
 8859   %}
 8860   ins_pipe(ialu_reg_long);
 8861 %}
 8862 
 8863 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8864   match(Set dst (EncodePKlass src));
 8865   effect(TEMP dst, KILL cr);
 8866   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 8867   ins_encode %{
 8868     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 8869   %}
 8870   ins_pipe(ialu_reg_long);
 8871 %}
 8872 
 8873 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 8874   match(Set dst (DecodeNKlass src));
 8875   effect(TEMP dst, KILL cr);
 8876   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 8877   ins_encode %{
 8878     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 8879   %}
 8880   ins_pipe(ialu_reg_long);
 8881 %}
 8882 
 8883 //----------Conditional Move---------------------------------------------------
 8884 // Jump
 8885 // dummy instruction for generating temp registers
 8886 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 8887   match(Jump (LShiftL switch_val shift));
 8888   ins_cost(350);
 8889   predicate(false);
 8890   effect(TEMP dest);
 8891 
 8892   format %{ "leaq    $dest, [$constantaddress]\n\t"
 8893             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 8894   ins_encode %{
 8895     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 8896     // to do that and the compiler is using that register as one it can allocate.
 8897     // So we build it all by hand.
 8898     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 8899     // ArrayAddress dispatch(table, index);
 8900     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 8901     __ lea($dest$$Register, $constantaddress);
 8902     __ jmp(dispatch);
 8903   %}
 8904   ins_pipe(pipe_jmp);
 8905 %}
 8906 
 8907 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 8908   match(Jump (AddL (LShiftL switch_val shift) offset));
 8909   ins_cost(350);
 8910   effect(TEMP dest);
 8911 
 8912   format %{ "leaq    $dest, [$constantaddress]\n\t"
 8913             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 8914   ins_encode %{
 8915     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 8916     // to do that and the compiler is using that register as one it can allocate.
 8917     // So we build it all by hand.
 8918     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 8919     // ArrayAddress dispatch(table, index);
 8920     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 8921     __ lea($dest$$Register, $constantaddress);
 8922     __ jmp(dispatch);
 8923   %}
 8924   ins_pipe(pipe_jmp);
 8925 %}
 8926 
 8927 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 8928   match(Jump switch_val);
 8929   ins_cost(350);
 8930   effect(TEMP dest);
 8931 
 8932   format %{ "leaq    $dest, [$constantaddress]\n\t"
 8933             "jmp     [$dest + $switch_val]\n\t" %}
 8934   ins_encode %{
 8935     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 8936     // to do that and the compiler is using that register as one it can allocate.
 8937     // So we build it all by hand.
 8938     // Address index(noreg, switch_reg, Address::times_1);
 8939     // ArrayAddress dispatch(table, index);
 8940     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 8941     __ lea($dest$$Register, $constantaddress);
 8942     __ jmp(dispatch);
 8943   %}
 8944   ins_pipe(pipe_jmp);
 8945 %}
 8946 
 8947 // Conditional move
 8948 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 8949 %{
 8950   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 8951   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 8952 
 8953   ins_cost(100); // XXX
 8954   format %{ "setbn$cop $dst\t# signed, int" %}
 8955   ins_encode %{
 8956     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 8957     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 8958   %}
 8959   ins_pipe(ialu_reg);
 8960 %}
 8961 
 8962 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 8963 %{
 8964   predicate(!UseAPX);
 8965   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 8966 
 8967   ins_cost(200); // XXX
 8968   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 8969   ins_encode %{
 8970     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 8971   %}
 8972   ins_pipe(pipe_cmov_reg);
 8973 %}
 8974 
 8975 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 8976 %{
 8977   predicate(UseAPX);
 8978   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 8979 
 8980   ins_cost(200);
 8981   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 8982   ins_encode %{
 8983     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 8984   %}
 8985   ins_pipe(pipe_cmov_reg);
 8986 %}
 8987 
 8988 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 8989 %{
 8990   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 8991   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 8992 
 8993   ins_cost(100); // XXX
 8994   format %{ "setbn$cop $dst\t# unsigned, int" %}
 8995   ins_encode %{
 8996     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 8997     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 8998   %}
 8999   ins_pipe(ialu_reg);
 9000 %}
 9001 
 9002 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9003   predicate(!UseAPX);
 9004   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9005 
 9006   ins_cost(200); // XXX
 9007   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9008   ins_encode %{
 9009     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9010   %}
 9011   ins_pipe(pipe_cmov_reg);
 9012 %}
 9013 
 9014 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9015   predicate(UseAPX);
 9016   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9017 
 9018   ins_cost(200);
 9019   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9020   ins_encode %{
 9021     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9022   %}
 9023   ins_pipe(pipe_cmov_reg);
 9024 %}
 9025 
 9026 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9027 %{
 9028   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9029   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9030 
 9031   ins_cost(100); // XXX
 9032   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9033   ins_encode %{
 9034     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9035     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9036   %}
 9037   ins_pipe(ialu_reg);
 9038 %}
 9039 
 9040 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9041   predicate(!UseAPX);
 9042   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9043   ins_cost(200);
 9044   expand %{
 9045     cmovI_regU(cop, cr, dst, src);
 9046   %}
 9047 %}
 9048 
 9049 instruct cmovI_regUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, rRegI src2) %{
 9050   predicate(UseAPX);
 9051   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9052   ins_cost(200);
 9053   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9054   ins_encode %{
 9055     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9056   %}
 9057   ins_pipe(pipe_cmov_reg);
 9058 %}
 9059 
 9060 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9061   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9062   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9063 
 9064   ins_cost(200); // XXX
 9065   format %{ "cmovpl  $dst, $src\n\t"
 9066             "cmovnel $dst, $src" %}
 9067   ins_encode %{
 9068     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9069     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9070   %}
 9071   ins_pipe(pipe_cmov_reg);
 9072 %}
 9073 
 9074 instruct cmovI_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
 9075   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9076   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9077   effect(TEMP dst);
 9078 
 9079   ins_cost(200);
 9080   format %{ "ecmovpl  $dst, $src1, $src2\n\t"
 9081             "cmovnel  $dst, $src2" %}
 9082   ins_encode %{
 9083     __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9084     __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9085   %}
 9086   ins_pipe(pipe_cmov_reg);
 9087 %}
 9088 
 9089 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9090 // inputs of the CMove
 9091 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9092   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9093   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9094   effect(TEMP dst);
 9095 
 9096   ins_cost(200); // XXX
 9097   format %{ "cmovpl  $dst, $src\n\t"
 9098             "cmovnel $dst, $src" %}
 9099   ins_encode %{
 9100     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9101     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9102   %}
 9103   ins_pipe(pipe_cmov_reg);
 9104 %}
 9105 
 9106 // We need this special handling for only eq / neq comparison since NaN == NaN is false,
 9107 // and parity flag bit is set if any of the operand is a NaN.
 9108 instruct cmovI_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
 9109   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9110   match(Set dst (CMoveI (Binary cop cr) (Binary src2 src1)));
 9111   effect(TEMP dst);
 9112 
 9113   ins_cost(200);
 9114   format %{ "ecmovpl  $dst, $src1, $src2\n\t"
 9115             "cmovnel  $dst, $src2" %}
 9116   ins_encode %{
 9117     __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9118     __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9119   %}
 9120   ins_pipe(pipe_cmov_reg);
 9121 %}
 9122 
 9123 // Conditional move
 9124 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9125   predicate(!UseAPX);
 9126   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9127 
 9128   ins_cost(250); // XXX
 9129   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9130   ins_encode %{
 9131     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9132   %}
 9133   ins_pipe(pipe_cmov_mem);
 9134 %}
 9135 
 9136 // Conditional move
 9137 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
 9138 %{
 9139   predicate(UseAPX);
 9140   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9141 
 9142   ins_cost(250);
 9143   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9144   ins_encode %{
 9145     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9146   %}
 9147   ins_pipe(pipe_cmov_mem);
 9148 %}
 9149 
 9150 // Conditional move
 9151 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9152 %{
 9153   predicate(!UseAPX);
 9154   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9155 
 9156   ins_cost(250); // XXX
 9157   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9158   ins_encode %{
 9159     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9160   %}
 9161   ins_pipe(pipe_cmov_mem);
 9162 %}
 9163 
 9164 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9165   predicate(!UseAPX);
 9166   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9167   ins_cost(250);
 9168   expand %{
 9169     cmovI_memU(cop, cr, dst, src);
 9170   %}
 9171 %}
 9172 
 9173 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
 9174 %{
 9175   predicate(UseAPX);
 9176   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9177 
 9178   ins_cost(250);
 9179   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9180   ins_encode %{
 9181     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9182   %}
 9183   ins_pipe(pipe_cmov_mem);
 9184 %}
 9185 
 9186 instruct cmovI_rReg_rReg_memUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, memory src2)
 9187 %{
 9188   predicate(UseAPX);
 9189   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9190   ins_cost(250);
 9191   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9192   ins_encode %{
 9193     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9194   %}
 9195   ins_pipe(pipe_cmov_mem);
 9196 %}
 9197 
 9198 // Conditional move
 9199 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9200 %{
 9201   predicate(!UseAPX);
 9202   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9203 
 9204   ins_cost(200); // XXX
 9205   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9206   ins_encode %{
 9207     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9208   %}
 9209   ins_pipe(pipe_cmov_reg);
 9210 %}
 9211 
 9212 // Conditional move ndd
 9213 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9214 %{
 9215   predicate(UseAPX);
 9216   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9217 
 9218   ins_cost(200);
 9219   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9220   ins_encode %{
 9221     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9222   %}
 9223   ins_pipe(pipe_cmov_reg);
 9224 %}
 9225 
 9226 // Conditional move
 9227 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9228 %{
 9229   predicate(!UseAPX);
 9230   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9231 
 9232   ins_cost(200); // XXX
 9233   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9234   ins_encode %{
 9235     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9236   %}
 9237   ins_pipe(pipe_cmov_reg);
 9238 %}
 9239 
 9240 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9241   predicate(!UseAPX);
 9242   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9243   ins_cost(200);
 9244   expand %{
 9245     cmovN_regU(cop, cr, dst, src);
 9246   %}
 9247 %}
 9248 
 9249 // Conditional move ndd
 9250 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9251 %{
 9252   predicate(UseAPX);
 9253   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9254 
 9255   ins_cost(200);
 9256   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9257   ins_encode %{
 9258     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9259   %}
 9260   ins_pipe(pipe_cmov_reg);
 9261 %}
 9262 
 9263 instruct cmovN_regUCF_ndd(rRegN dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegN src1, rRegN src2) %{
 9264   predicate(UseAPX);
 9265   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9266   ins_cost(200);
 9267   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9268   ins_encode %{
 9269     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9270   %}
 9271   ins_pipe(pipe_cmov_reg);
 9272 %}
 9273 
 9274 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9275   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9276   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9277 
 9278   ins_cost(200); // XXX
 9279   format %{ "cmovpl  $dst, $src\n\t"
 9280             "cmovnel $dst, $src" %}
 9281   ins_encode %{
 9282     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9283     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9284   %}
 9285   ins_pipe(pipe_cmov_reg);
 9286 %}
 9287 
 9288 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9289 // inputs of the CMove
 9290 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9291   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9292   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9293 
 9294   ins_cost(200); // XXX
 9295   format %{ "cmovpl  $dst, $src\n\t"
 9296             "cmovnel $dst, $src" %}
 9297   ins_encode %{
 9298     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9299     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9300   %}
 9301   ins_pipe(pipe_cmov_reg);
 9302 %}
 9303 
 9304 // Conditional move
 9305 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9306 %{
 9307   predicate(!UseAPX);
 9308   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9309 
 9310   ins_cost(200); // XXX
 9311   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9312   ins_encode %{
 9313     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9314   %}
 9315   ins_pipe(pipe_cmov_reg);  // XXX
 9316 %}
 9317 
 9318 // Conditional move ndd
 9319 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9320 %{
 9321   predicate(UseAPX);
 9322   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9323 
 9324   ins_cost(200);
 9325   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9326   ins_encode %{
 9327     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9328   %}
 9329   ins_pipe(pipe_cmov_reg);
 9330 %}
 9331 
 9332 // Conditional move
 9333 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9334 %{
 9335   predicate(!UseAPX);
 9336   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9337 
 9338   ins_cost(200); // XXX
 9339   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9340   ins_encode %{
 9341     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9342   %}
 9343   ins_pipe(pipe_cmov_reg); // XXX
 9344 %}
 9345 
 9346 // Conditional move ndd
 9347 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9348 %{
 9349   predicate(UseAPX);
 9350   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9351 
 9352   ins_cost(200);
 9353   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9354   ins_encode %{
 9355     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9356   %}
 9357   ins_pipe(pipe_cmov_reg);
 9358 %}
 9359 
 9360 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9361   predicate(!UseAPX);
 9362   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9363   ins_cost(200);
 9364   expand %{
 9365     cmovP_regU(cop, cr, dst, src);
 9366   %}
 9367 %}
 9368 
 9369 instruct cmovP_regUCF_ndd(rRegP dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegP src1, rRegP src2) %{
 9370   predicate(UseAPX);
 9371   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9372   ins_cost(200);
 9373   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9374   ins_encode %{
 9375     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9376   %}
 9377   ins_pipe(pipe_cmov_reg);
 9378 %}
 9379 
 9380 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9381   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9382   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9383 
 9384   ins_cost(200); // XXX
 9385   format %{ "cmovpq  $dst, $src\n\t"
 9386             "cmovneq $dst, $src" %}
 9387   ins_encode %{
 9388     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9389     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9390   %}
 9391   ins_pipe(pipe_cmov_reg);
 9392 %}
 9393 
 9394 instruct cmovP_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
 9395   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9396   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9397   effect(TEMP dst);
 9398 
 9399   ins_cost(200);
 9400   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9401             "cmovneq  $dst, $src2" %}
 9402   ins_encode %{
 9403     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9404     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9405   %}
 9406   ins_pipe(pipe_cmov_reg);
 9407 %}
 9408 
 9409 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9410 // inputs of the CMove
 9411 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9412   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9413   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9414 
 9415   ins_cost(200); // XXX
 9416   format %{ "cmovpq  $dst, $src\n\t"
 9417             "cmovneq $dst, $src" %}
 9418   ins_encode %{
 9419     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9420     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9421   %}
 9422   ins_pipe(pipe_cmov_reg);
 9423 %}
 9424 
 9425 instruct cmovP_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
 9426   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9427   match(Set dst (CMoveP (Binary cop cr) (Binary src2 src1)));
 9428   effect(TEMP dst);
 9429 
 9430   ins_cost(200);
 9431   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9432             "cmovneq  $dst, $src2" %}
 9433   ins_encode %{
 9434     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9435     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9436   %}
 9437   ins_pipe(pipe_cmov_reg);
 9438 %}
 9439 
 9440 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9441 %{
 9442   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9443   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9444 
 9445   ins_cost(100); // XXX
 9446   format %{ "setbn$cop $dst\t# signed, long" %}
 9447   ins_encode %{
 9448     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9449     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9450   %}
 9451   ins_pipe(ialu_reg);
 9452 %}
 9453 
 9454 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9455 %{
 9456   predicate(!UseAPX);
 9457   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9458 
 9459   ins_cost(200); // XXX
 9460   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9461   ins_encode %{
 9462     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9463   %}
 9464   ins_pipe(pipe_cmov_reg);  // XXX
 9465 %}
 9466 
 9467 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9468 %{
 9469   predicate(UseAPX);
 9470   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9471 
 9472   ins_cost(200);
 9473   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9474   ins_encode %{
 9475     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9476   %}
 9477   ins_pipe(pipe_cmov_reg);
 9478 %}
 9479 
 9480 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9481 %{
 9482   predicate(!UseAPX);
 9483   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9484 
 9485   ins_cost(200); // XXX
 9486   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9487   ins_encode %{
 9488     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9489   %}
 9490   ins_pipe(pipe_cmov_mem);  // XXX
 9491 %}
 9492 
 9493 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
 9494 %{
 9495   predicate(UseAPX);
 9496   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9497 
 9498   ins_cost(200);
 9499   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9500   ins_encode %{
 9501     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9502   %}
 9503   ins_pipe(pipe_cmov_mem);
 9504 %}
 9505 
 9506 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9507 %{
 9508   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9509   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9510 
 9511   ins_cost(100); // XXX
 9512   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9513   ins_encode %{
 9514     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9515     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9516   %}
 9517   ins_pipe(ialu_reg);
 9518 %}
 9519 
 9520 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9521 %{
 9522   predicate(!UseAPX);
 9523   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9524 
 9525   ins_cost(200); // XXX
 9526   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9527   ins_encode %{
 9528     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9529   %}
 9530   ins_pipe(pipe_cmov_reg); // XXX
 9531 %}
 9532 
 9533 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9534 %{
 9535   predicate(UseAPX);
 9536   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9537 
 9538   ins_cost(200);
 9539   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9540   ins_encode %{
 9541     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9542   %}
 9543   ins_pipe(pipe_cmov_reg);
 9544 %}
 9545 
 9546 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9547 %{
 9548   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9549   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9550 
 9551   ins_cost(100); // XXX
 9552   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9553   ins_encode %{
 9554     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9555     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9556   %}
 9557   ins_pipe(ialu_reg);
 9558 %}
 9559 
 9560 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9561   predicate(!UseAPX);
 9562   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9563   ins_cost(200);
 9564   expand %{
 9565     cmovL_regU(cop, cr, dst, src);
 9566   %}
 9567 %}
 9568 
 9569 instruct cmovL_regUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, rRegL src2)
 9570 %{
 9571   predicate(UseAPX);
 9572   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9573   ins_cost(200);
 9574   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9575   ins_encode %{
 9576     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9577   %}
 9578   ins_pipe(pipe_cmov_reg);
 9579 %}
 9580 
 9581 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9582   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9583   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9584 
 9585   ins_cost(200); // XXX
 9586   format %{ "cmovpq  $dst, $src\n\t"
 9587             "cmovneq $dst, $src" %}
 9588   ins_encode %{
 9589     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9590     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9591   %}
 9592   ins_pipe(pipe_cmov_reg);
 9593 %}
 9594 
 9595 instruct cmovL_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
 9596   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9597   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9598   effect(TEMP dst);
 9599 
 9600   ins_cost(200);
 9601   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9602             "cmovneq  $dst, $src2" %}
 9603   ins_encode %{
 9604     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9605     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9606   %}
 9607   ins_pipe(pipe_cmov_reg);
 9608 %}
 9609 
 9610 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9611 // inputs of the CMove
 9612 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9613   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9614   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9615 
 9616   ins_cost(200); // XXX
 9617   format %{ "cmovpq  $dst, $src\n\t"
 9618             "cmovneq $dst, $src" %}
 9619   ins_encode %{
 9620     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9621     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9622   %}
 9623   ins_pipe(pipe_cmov_reg);
 9624 %}
 9625 
 9626 instruct cmovL_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
 9627   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9628   match(Set dst (CMoveL (Binary cop cr) (Binary src2 src1)));
 9629   effect(TEMP dst);
 9630 
 9631   ins_cost(200);
 9632   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9633             "cmovneq $dst, $src2" %}
 9634   ins_encode %{
 9635     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9636     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9637   %}
 9638   ins_pipe(pipe_cmov_reg);
 9639 %}
 9640 
 9641 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9642 %{
 9643   predicate(!UseAPX);
 9644   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9645 
 9646   ins_cost(200); // XXX
 9647   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9648   ins_encode %{
 9649     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9650   %}
 9651   ins_pipe(pipe_cmov_mem); // XXX
 9652 %}
 9653 
 9654 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9655   predicate(!UseAPX);
 9656   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9657   ins_cost(200);
 9658   expand %{
 9659     cmovL_memU(cop, cr, dst, src);
 9660   %}
 9661 %}
 9662 
 9663 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
 9664 %{
 9665   predicate(UseAPX);
 9666   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9667 
 9668   ins_cost(200);
 9669   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9670   ins_encode %{
 9671     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9672   %}
 9673   ins_pipe(pipe_cmov_mem);
 9674 %}
 9675 
 9676 instruct cmovL_rReg_rReg_memUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, memory src2)
 9677 %{
 9678   predicate(UseAPX);
 9679   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9680   ins_cost(200);
 9681   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9682   ins_encode %{
 9683     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9684   %}
 9685   ins_pipe(pipe_cmov_mem);
 9686 %}
 9687 
 9688 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9689 %{
 9690   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9691 
 9692   ins_cost(200); // XXX
 9693   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9694             "movss     $dst, $src\n"
 9695     "skip:" %}
 9696   ins_encode %{
 9697     Label Lskip;
 9698     // Invert sense of branch from sense of CMOV
 9699     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9700     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9701     __ bind(Lskip);
 9702   %}
 9703   ins_pipe(pipe_slow);
 9704 %}
 9705 
 9706 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9707 %{
 9708   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9709 
 9710   ins_cost(200); // XXX
 9711   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9712             "movss     $dst, $src\n"
 9713     "skip:" %}
 9714   ins_encode %{
 9715     Label Lskip;
 9716     // Invert sense of branch from sense of CMOV
 9717     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9718     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9719     __ bind(Lskip);
 9720   %}
 9721   ins_pipe(pipe_slow);
 9722 %}
 9723 
 9724 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9725   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9726   ins_cost(200);
 9727   expand %{
 9728     cmovF_regU(cop, cr, dst, src);
 9729   %}
 9730 %}
 9731 
 9732 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9733 %{
 9734   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9735 
 9736   ins_cost(200); // XXX
 9737   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9738             "movsd     $dst, $src\n"
 9739     "skip:" %}
 9740   ins_encode %{
 9741     Label Lskip;
 9742     // Invert sense of branch from sense of CMOV
 9743     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9744     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9745     __ bind(Lskip);
 9746   %}
 9747   ins_pipe(pipe_slow);
 9748 %}
 9749 
 9750 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9751 %{
 9752   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9753 
 9754   ins_cost(200); // XXX
 9755   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9756             "movsd     $dst, $src\n"
 9757     "skip:" %}
 9758   ins_encode %{
 9759     Label Lskip;
 9760     // Invert sense of branch from sense of CMOV
 9761     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9762     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9763     __ bind(Lskip);
 9764   %}
 9765   ins_pipe(pipe_slow);
 9766 %}
 9767 
 9768 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9769   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9770   ins_cost(200);
 9771   expand %{
 9772     cmovD_regU(cop, cr, dst, src);
 9773   %}
 9774 %}
 9775 
 9776 //----------Arithmetic Instructions--------------------------------------------
 9777 //----------Addition Instructions----------------------------------------------
 9778 
 9779 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9780 %{
 9781   predicate(!UseAPX);
 9782   match(Set dst (AddI dst src));
 9783   effect(KILL cr);
 9784   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9785   format %{ "addl    $dst, $src\t# int" %}
 9786   ins_encode %{
 9787     __ addl($dst$$Register, $src$$Register);
 9788   %}
 9789   ins_pipe(ialu_reg_reg);
 9790 %}
 9791 
 9792 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
 9793 %{
 9794   predicate(UseAPX);
 9795   match(Set dst (AddI src1 src2));
 9796   effect(KILL cr);
 9797   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9798 
 9799   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9800   ins_encode %{
 9801     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
 9802   %}
 9803   ins_pipe(ialu_reg_reg);
 9804 %}
 9805 
 9806 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9807 %{
 9808   predicate(!UseAPX);
 9809   match(Set dst (AddI dst src));
 9810   effect(KILL cr);
 9811   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9812 
 9813   format %{ "addl    $dst, $src\t# int" %}
 9814   ins_encode %{
 9815     __ addl($dst$$Register, $src$$constant);
 9816   %}
 9817   ins_pipe( ialu_reg );
 9818 %}
 9819 
 9820 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
 9821 %{
 9822   predicate(UseAPX);
 9823   match(Set dst (AddI src1 src2));
 9824   effect(KILL cr);
 9825   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9826 
 9827   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9828   ins_encode %{
 9829     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
 9830   %}
 9831   ins_pipe( ialu_reg );
 9832 %}
 9833 
 9834 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
 9835 %{
 9836   predicate(UseAPX);
 9837   match(Set dst (AddI (LoadI src1) src2));
 9838   effect(KILL cr);
 9839   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9840 
 9841   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9842   ins_encode %{
 9843     __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
 9844   %}
 9845   ins_pipe( ialu_reg );
 9846 %}
 9847 
 9848 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 9849 %{
 9850   predicate(!UseAPX);
 9851   match(Set dst (AddI dst (LoadI src)));
 9852   effect(KILL cr);
 9853   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9854 
 9855   ins_cost(150); // XXX
 9856   format %{ "addl    $dst, $src\t# int" %}
 9857   ins_encode %{
 9858     __ addl($dst$$Register, $src$$Address);
 9859   %}
 9860   ins_pipe(ialu_reg_mem);
 9861 %}
 9862 
 9863 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
 9864 %{
 9865   predicate(UseAPX);
 9866   match(Set dst (AddI src1 (LoadI src2)));
 9867   effect(KILL cr);
 9868   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9869 
 9870   ins_cost(150);
 9871   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9872   ins_encode %{
 9873     __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
 9874   %}
 9875   ins_pipe(ialu_reg_mem);
 9876 %}
 9877 
 9878 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9879 %{
 9880   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 9881   effect(KILL cr);
 9882   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9883 
 9884   ins_cost(150); // XXX
 9885   format %{ "addl    $dst, $src\t# int" %}
 9886   ins_encode %{
 9887     __ addl($dst$$Address, $src$$Register);
 9888   %}
 9889   ins_pipe(ialu_mem_reg);
 9890 %}
 9891 
 9892 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
 9893 %{
 9894   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 9895   effect(KILL cr);
 9896   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9897 
 9898 
 9899   ins_cost(125); // XXX
 9900   format %{ "addl    $dst, $src\t# int" %}
 9901   ins_encode %{
 9902     __ addl($dst$$Address, $src$$constant);
 9903   %}
 9904   ins_pipe(ialu_mem_imm);
 9905 %}
 9906 
 9907 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
 9908 %{
 9909   predicate(!UseAPX && UseIncDec);
 9910   match(Set dst (AddI dst src));
 9911   effect(KILL cr);
 9912 
 9913   format %{ "incl    $dst\t# int" %}
 9914   ins_encode %{
 9915     __ incrementl($dst$$Register);
 9916   %}
 9917   ins_pipe(ialu_reg);
 9918 %}
 9919 
 9920 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
 9921 %{
 9922   predicate(UseAPX && UseIncDec);
 9923   match(Set dst (AddI src val));
 9924   effect(KILL cr);
 9925 
 9926   format %{ "eincl    $dst, $src\t# int ndd" %}
 9927   ins_encode %{
 9928     __ eincl($dst$$Register, $src$$Register, false);
 9929   %}
 9930   ins_pipe(ialu_reg);
 9931 %}
 9932 
 9933 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
 9934 %{
 9935   predicate(UseAPX && UseIncDec);
 9936   match(Set dst (AddI (LoadI src) val));
 9937   effect(KILL cr);
 9938 
 9939   format %{ "eincl    $dst, $src\t# int ndd" %}
 9940   ins_encode %{
 9941     __ eincl($dst$$Register, $src$$Address, false);
 9942   %}
 9943   ins_pipe(ialu_reg);
 9944 %}
 9945 
 9946 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
 9947 %{
 9948   predicate(UseIncDec);
 9949   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 9950   effect(KILL cr);
 9951 
 9952   ins_cost(125); // XXX
 9953   format %{ "incl    $dst\t# int" %}
 9954   ins_encode %{
 9955     __ incrementl($dst$$Address);
 9956   %}
 9957   ins_pipe(ialu_mem_imm);
 9958 %}
 9959 
 9960 // XXX why does that use AddI
 9961 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
 9962 %{
 9963   predicate(!UseAPX && UseIncDec);
 9964   match(Set dst (AddI dst src));
 9965   effect(KILL cr);
 9966 
 9967   format %{ "decl    $dst\t# int" %}
 9968   ins_encode %{
 9969     __ decrementl($dst$$Register);
 9970   %}
 9971   ins_pipe(ialu_reg);
 9972 %}
 9973 
 9974 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
 9975 %{
 9976   predicate(UseAPX && UseIncDec);
 9977   match(Set dst (AddI src val));
 9978   effect(KILL cr);
 9979 
 9980   format %{ "edecl    $dst, $src\t# int ndd" %}
 9981   ins_encode %{
 9982     __ edecl($dst$$Register, $src$$Register, false);
 9983   %}
 9984   ins_pipe(ialu_reg);
 9985 %}
 9986 
 9987 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
 9988 %{
 9989   predicate(UseAPX && UseIncDec);
 9990   match(Set dst (AddI (LoadI src) val));
 9991   effect(KILL cr);
 9992 
 9993   format %{ "edecl    $dst, $src\t# int ndd" %}
 9994   ins_encode %{
 9995     __ edecl($dst$$Register, $src$$Address, false);
 9996   %}
 9997   ins_pipe(ialu_reg);
 9998 %}
 9999 
10000 // XXX why does that use AddI
10001 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10002 %{
10003   predicate(UseIncDec);
10004   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10005   effect(KILL cr);
10006 
10007   ins_cost(125); // XXX
10008   format %{ "decl    $dst\t# int" %}
10009   ins_encode %{
10010     __ decrementl($dst$$Address);
10011   %}
10012   ins_pipe(ialu_mem_imm);
10013 %}
10014 
10015 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10016 %{
10017   predicate(VM_Version::supports_fast_2op_lea());
10018   match(Set dst (AddI (LShiftI index scale) disp));
10019 
10020   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10021   ins_encode %{
10022     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10023     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10024   %}
10025   ins_pipe(ialu_reg_reg);
10026 %}
10027 
10028 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10029 %{
10030   predicate(VM_Version::supports_fast_3op_lea());
10031   match(Set dst (AddI (AddI base index) disp));
10032 
10033   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10034   ins_encode %{
10035     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10036   %}
10037   ins_pipe(ialu_reg_reg);
10038 %}
10039 
10040 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10041 %{
10042   predicate(VM_Version::supports_fast_2op_lea());
10043   match(Set dst (AddI base (LShiftI index scale)));
10044 
10045   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10046   ins_encode %{
10047     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10048     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10049   %}
10050   ins_pipe(ialu_reg_reg);
10051 %}
10052 
10053 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10054 %{
10055   predicate(VM_Version::supports_fast_3op_lea());
10056   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10057 
10058   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10059   ins_encode %{
10060     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10061     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10062   %}
10063   ins_pipe(ialu_reg_reg);
10064 %}
10065 
10066 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10067 %{
10068   predicate(!UseAPX);
10069   match(Set dst (AddL dst src));
10070   effect(KILL cr);
10071   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10072 
10073   format %{ "addq    $dst, $src\t# long" %}
10074   ins_encode %{
10075     __ addq($dst$$Register, $src$$Register);
10076   %}
10077   ins_pipe(ialu_reg_reg);
10078 %}
10079 
10080 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10081 %{
10082   predicate(UseAPX);
10083   match(Set dst (AddL src1 src2));
10084   effect(KILL cr);
10085   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10086 
10087   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10088   ins_encode %{
10089     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10090   %}
10091   ins_pipe(ialu_reg_reg);
10092 %}
10093 
10094 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10095 %{
10096   predicate(!UseAPX);
10097   match(Set dst (AddL dst src));
10098   effect(KILL cr);
10099   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10100 
10101   format %{ "addq    $dst, $src\t# long" %}
10102   ins_encode %{
10103     __ addq($dst$$Register, $src$$constant);
10104   %}
10105   ins_pipe( ialu_reg );
10106 %}
10107 
10108 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10109 %{
10110   predicate(UseAPX);
10111   match(Set dst (AddL src1 src2));
10112   effect(KILL cr);
10113   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10114 
10115   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10116   ins_encode %{
10117     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10118   %}
10119   ins_pipe( ialu_reg );
10120 %}
10121 
10122 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10123 %{
10124   predicate(UseAPX);
10125   match(Set dst (AddL (LoadL src1) src2));
10126   effect(KILL cr);
10127   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10128 
10129   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10130   ins_encode %{
10131     __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10132   %}
10133   ins_pipe( ialu_reg );
10134 %}
10135 
10136 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10137 %{
10138   predicate(!UseAPX);
10139   match(Set dst (AddL dst (LoadL src)));
10140   effect(KILL cr);
10141   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10142 
10143   ins_cost(150); // XXX
10144   format %{ "addq    $dst, $src\t# long" %}
10145   ins_encode %{
10146     __ addq($dst$$Register, $src$$Address);
10147   %}
10148   ins_pipe(ialu_reg_mem);
10149 %}
10150 
10151 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10152 %{
10153   predicate(UseAPX);
10154   match(Set dst (AddL src1 (LoadL src2)));
10155   effect(KILL cr);
10156   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10157 
10158   ins_cost(150);
10159   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10160   ins_encode %{
10161     __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10162   %}
10163   ins_pipe(ialu_reg_mem);
10164 %}
10165 
10166 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10167 %{
10168   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10169   effect(KILL cr);
10170   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10171 
10172   ins_cost(150); // XXX
10173   format %{ "addq    $dst, $src\t# long" %}
10174   ins_encode %{
10175     __ addq($dst$$Address, $src$$Register);
10176   %}
10177   ins_pipe(ialu_mem_reg);
10178 %}
10179 
10180 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10181 %{
10182   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10183   effect(KILL cr);
10184   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10185 
10186   ins_cost(125); // XXX
10187   format %{ "addq    $dst, $src\t# long" %}
10188   ins_encode %{
10189     __ addq($dst$$Address, $src$$constant);
10190   %}
10191   ins_pipe(ialu_mem_imm);
10192 %}
10193 
10194 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10195 %{
10196   predicate(!UseAPX && UseIncDec);
10197   match(Set dst (AddL dst src));
10198   effect(KILL cr);
10199 
10200   format %{ "incq    $dst\t# long" %}
10201   ins_encode %{
10202     __ incrementq($dst$$Register);
10203   %}
10204   ins_pipe(ialu_reg);
10205 %}
10206 
10207 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10208 %{
10209   predicate(UseAPX && UseIncDec);
10210   match(Set dst (AddL src val));
10211   effect(KILL cr);
10212 
10213   format %{ "eincq    $dst, $src\t# long ndd" %}
10214   ins_encode %{
10215     __ eincq($dst$$Register, $src$$Register, false);
10216   %}
10217   ins_pipe(ialu_reg);
10218 %}
10219 
10220 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10221 %{
10222   predicate(UseAPX && UseIncDec);
10223   match(Set dst (AddL (LoadL src) val));
10224   effect(KILL cr);
10225 
10226   format %{ "eincq    $dst, $src\t# long ndd" %}
10227   ins_encode %{
10228     __ eincq($dst$$Register, $src$$Address, false);
10229   %}
10230   ins_pipe(ialu_reg);
10231 %}
10232 
10233 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10234 %{
10235   predicate(UseIncDec);
10236   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10237   effect(KILL cr);
10238 
10239   ins_cost(125); // XXX
10240   format %{ "incq    $dst\t# long" %}
10241   ins_encode %{
10242     __ incrementq($dst$$Address);
10243   %}
10244   ins_pipe(ialu_mem_imm);
10245 %}
10246 
10247 // XXX why does that use AddL
10248 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10249 %{
10250   predicate(!UseAPX && UseIncDec);
10251   match(Set dst (AddL dst src));
10252   effect(KILL cr);
10253 
10254   format %{ "decq    $dst\t# long" %}
10255   ins_encode %{
10256     __ decrementq($dst$$Register);
10257   %}
10258   ins_pipe(ialu_reg);
10259 %}
10260 
10261 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10262 %{
10263   predicate(UseAPX && UseIncDec);
10264   match(Set dst (AddL src val));
10265   effect(KILL cr);
10266 
10267   format %{ "edecq    $dst, $src\t# long ndd" %}
10268   ins_encode %{
10269     __ edecq($dst$$Register, $src$$Register, false);
10270   %}
10271   ins_pipe(ialu_reg);
10272 %}
10273 
10274 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10275 %{
10276   predicate(UseAPX && UseIncDec);
10277   match(Set dst (AddL (LoadL src) val));
10278   effect(KILL cr);
10279 
10280   format %{ "edecq    $dst, $src\t# long ndd" %}
10281   ins_encode %{
10282     __ edecq($dst$$Register, $src$$Address, false);
10283   %}
10284   ins_pipe(ialu_reg);
10285 %}
10286 
10287 // XXX why does that use AddL
10288 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10289 %{
10290   predicate(UseIncDec);
10291   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10292   effect(KILL cr);
10293 
10294   ins_cost(125); // XXX
10295   format %{ "decq    $dst\t# long" %}
10296   ins_encode %{
10297     __ decrementq($dst$$Address);
10298   %}
10299   ins_pipe(ialu_mem_imm);
10300 %}
10301 
10302 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10303 %{
10304   predicate(VM_Version::supports_fast_2op_lea());
10305   match(Set dst (AddL (LShiftL index scale) disp));
10306 
10307   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10308   ins_encode %{
10309     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10310     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10311   %}
10312   ins_pipe(ialu_reg_reg);
10313 %}
10314 
10315 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10316 %{
10317   predicate(VM_Version::supports_fast_3op_lea());
10318   match(Set dst (AddL (AddL base index) disp));
10319 
10320   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10321   ins_encode %{
10322     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10323   %}
10324   ins_pipe(ialu_reg_reg);
10325 %}
10326 
10327 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10328 %{
10329   predicate(VM_Version::supports_fast_2op_lea());
10330   match(Set dst (AddL base (LShiftL index scale)));
10331 
10332   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10333   ins_encode %{
10334     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10335     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10336   %}
10337   ins_pipe(ialu_reg_reg);
10338 %}
10339 
10340 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10341 %{
10342   predicate(VM_Version::supports_fast_3op_lea());
10343   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10344 
10345   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10346   ins_encode %{
10347     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10348     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10349   %}
10350   ins_pipe(ialu_reg_reg);
10351 %}
10352 
10353 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10354 %{
10355   match(Set dst (AddP dst src));
10356   effect(KILL cr);
10357   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10358 
10359   format %{ "addq    $dst, $src\t# ptr" %}
10360   ins_encode %{
10361     __ addq($dst$$Register, $src$$Register);
10362   %}
10363   ins_pipe(ialu_reg_reg);
10364 %}
10365 
10366 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10367 %{
10368   match(Set dst (AddP dst src));
10369   effect(KILL cr);
10370   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10371 
10372   format %{ "addq    $dst, $src\t# ptr" %}
10373   ins_encode %{
10374     __ addq($dst$$Register, $src$$constant);
10375   %}
10376   ins_pipe( ialu_reg );
10377 %}
10378 
10379 // XXX addP mem ops ????
10380 
10381 instruct checkCastPP(rRegP dst)
10382 %{
10383   match(Set dst (CheckCastPP dst));
10384 
10385   size(0);
10386   format %{ "# checkcastPP of $dst" %}
10387   ins_encode(/* empty encoding */);
10388   ins_pipe(empty);
10389 %}
10390 
10391 instruct castPP(rRegP dst)
10392 %{
10393   match(Set dst (CastPP dst));
10394 
10395   size(0);
10396   format %{ "# castPP of $dst" %}
10397   ins_encode(/* empty encoding */);
10398   ins_pipe(empty);
10399 %}
10400 
10401 instruct castII(rRegI dst)
10402 %{
10403   predicate(VerifyConstraintCasts == 0);
10404   match(Set dst (CastII dst));
10405 
10406   size(0);
10407   format %{ "# castII of $dst" %}
10408   ins_encode(/* empty encoding */);
10409   ins_cost(0);
10410   ins_pipe(empty);
10411 %}
10412 
10413 instruct castII_checked(rRegI dst, rFlagsReg cr)
10414 %{
10415   predicate(VerifyConstraintCasts > 0);
10416   match(Set dst (CastII dst));
10417 
10418   effect(KILL cr);
10419   format %{ "# cast_checked_II $dst" %}
10420   ins_encode %{
10421     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10422   %}
10423   ins_pipe(pipe_slow);
10424 %}
10425 
10426 instruct castLL(rRegL dst)
10427 %{
10428   predicate(VerifyConstraintCasts == 0);
10429   match(Set dst (CastLL dst));
10430 
10431   size(0);
10432   format %{ "# castLL of $dst" %}
10433   ins_encode(/* empty encoding */);
10434   ins_cost(0);
10435   ins_pipe(empty);
10436 %}
10437 
10438 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10439 %{
10440   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10441   match(Set dst (CastLL dst));
10442 
10443   effect(KILL cr);
10444   format %{ "# cast_checked_LL $dst" %}
10445   ins_encode %{
10446     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10447   %}
10448   ins_pipe(pipe_slow);
10449 %}
10450 
10451 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10452 %{
10453   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10454   match(Set dst (CastLL dst));
10455 
10456   effect(KILL cr, TEMP tmp);
10457   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10458   ins_encode %{
10459     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10460   %}
10461   ins_pipe(pipe_slow);
10462 %}
10463 
10464 instruct castFF(regF dst)
10465 %{
10466   match(Set dst (CastFF dst));
10467 
10468   size(0);
10469   format %{ "# castFF of $dst" %}
10470   ins_encode(/* empty encoding */);
10471   ins_cost(0);
10472   ins_pipe(empty);
10473 %}
10474 
10475 instruct castHH(regF dst)
10476 %{
10477   match(Set dst (CastHH dst));
10478 
10479   size(0);
10480   format %{ "# castHH of $dst" %}
10481   ins_encode(/* empty encoding */);
10482   ins_cost(0);
10483   ins_pipe(empty);
10484 %}
10485 
10486 instruct castDD(regD dst)
10487 %{
10488   match(Set dst (CastDD dst));
10489 
10490   size(0);
10491   format %{ "# castDD of $dst" %}
10492   ins_encode(/* empty encoding */);
10493   ins_cost(0);
10494   ins_pipe(empty);
10495 %}
10496 
10497 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10498 instruct compareAndSwapP(rRegI res,
10499                          memory mem_ptr,
10500                          rax_RegP oldval, rRegP newval,
10501                          rFlagsReg cr)
10502 %{
10503   predicate(n->as_LoadStore()->barrier_data() == 0);
10504   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10505   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10506   effect(KILL cr, KILL oldval);
10507 
10508   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10509             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10510             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10511   ins_encode %{
10512     __ lock();
10513     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10514     __ setcc(Assembler::equal, $res$$Register);
10515   %}
10516   ins_pipe( pipe_cmpxchg );
10517 %}
10518 
10519 instruct compareAndSwapL(rRegI res,
10520                          memory mem_ptr,
10521                          rax_RegL oldval, rRegL newval,
10522                          rFlagsReg cr)
10523 %{
10524   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10525   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10526   effect(KILL cr, KILL oldval);
10527 
10528   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10529             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10530             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10531   ins_encode %{
10532     __ lock();
10533     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10534     __ setcc(Assembler::equal, $res$$Register);
10535   %}
10536   ins_pipe( pipe_cmpxchg );
10537 %}
10538 
10539 instruct compareAndSwapI(rRegI res,
10540                          memory mem_ptr,
10541                          rax_RegI oldval, rRegI newval,
10542                          rFlagsReg cr)
10543 %{
10544   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10545   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10546   effect(KILL cr, KILL oldval);
10547 
10548   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10549             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10550             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10551   ins_encode %{
10552     __ lock();
10553     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10554     __ setcc(Assembler::equal, $res$$Register);
10555   %}
10556   ins_pipe( pipe_cmpxchg );
10557 %}
10558 
10559 instruct compareAndSwapB(rRegI res,
10560                          memory mem_ptr,
10561                          rax_RegI oldval, rRegI newval,
10562                          rFlagsReg cr)
10563 %{
10564   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10565   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10566   effect(KILL cr, KILL oldval);
10567 
10568   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10569             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10570             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10571   ins_encode %{
10572     __ lock();
10573     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10574     __ setcc(Assembler::equal, $res$$Register);
10575   %}
10576   ins_pipe( pipe_cmpxchg );
10577 %}
10578 
10579 instruct compareAndSwapS(rRegI res,
10580                          memory mem_ptr,
10581                          rax_RegI oldval, rRegI newval,
10582                          rFlagsReg cr)
10583 %{
10584   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10585   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10586   effect(KILL cr, KILL oldval);
10587 
10588   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10589             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10590             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10591   ins_encode %{
10592     __ lock();
10593     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10594     __ setcc(Assembler::equal, $res$$Register);
10595   %}
10596   ins_pipe( pipe_cmpxchg );
10597 %}
10598 
10599 instruct compareAndSwapN(rRegI res,
10600                           memory mem_ptr,
10601                           rax_RegN oldval, rRegN newval,
10602                           rFlagsReg cr) %{
10603   predicate(n->as_LoadStore()->barrier_data() == 0);
10604   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10605   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10606   effect(KILL cr, KILL oldval);
10607 
10608   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10609             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10610             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10611   ins_encode %{
10612     __ lock();
10613     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10614     __ setcc(Assembler::equal, $res$$Register);
10615   %}
10616   ins_pipe( pipe_cmpxchg );
10617 %}
10618 
10619 instruct compareAndExchangeB(
10620                          memory mem_ptr,
10621                          rax_RegI oldval, rRegI newval,
10622                          rFlagsReg cr)
10623 %{
10624   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10625   effect(KILL cr);
10626 
10627   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10628             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10629   ins_encode %{
10630     __ lock();
10631     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10632   %}
10633   ins_pipe( pipe_cmpxchg );
10634 %}
10635 
10636 instruct compareAndExchangeS(
10637                          memory mem_ptr,
10638                          rax_RegI oldval, rRegI newval,
10639                          rFlagsReg cr)
10640 %{
10641   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10642   effect(KILL cr);
10643 
10644   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10645             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10646   ins_encode %{
10647     __ lock();
10648     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10649   %}
10650   ins_pipe( pipe_cmpxchg );
10651 %}
10652 
10653 instruct compareAndExchangeI(
10654                          memory mem_ptr,
10655                          rax_RegI oldval, rRegI newval,
10656                          rFlagsReg cr)
10657 %{
10658   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10659   effect(KILL cr);
10660 
10661   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10662             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10663   ins_encode %{
10664     __ lock();
10665     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10666   %}
10667   ins_pipe( pipe_cmpxchg );
10668 %}
10669 
10670 instruct compareAndExchangeL(
10671                          memory mem_ptr,
10672                          rax_RegL oldval, rRegL newval,
10673                          rFlagsReg cr)
10674 %{
10675   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10676   effect(KILL cr);
10677 
10678   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10679             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10680   ins_encode %{
10681     __ lock();
10682     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10683   %}
10684   ins_pipe( pipe_cmpxchg );
10685 %}
10686 
10687 instruct compareAndExchangeN(
10688                           memory mem_ptr,
10689                           rax_RegN oldval, rRegN newval,
10690                           rFlagsReg cr) %{
10691   predicate(n->as_LoadStore()->barrier_data() == 0);
10692   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10693   effect(KILL cr);
10694 
10695   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10696             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10697   ins_encode %{
10698     __ lock();
10699     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10700   %}
10701   ins_pipe( pipe_cmpxchg );
10702 %}
10703 
10704 instruct compareAndExchangeP(
10705                          memory mem_ptr,
10706                          rax_RegP oldval, rRegP newval,
10707                          rFlagsReg cr)
10708 %{
10709   predicate(n->as_LoadStore()->barrier_data() == 0);
10710   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10711   effect(KILL cr);
10712 
10713   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10714             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10715   ins_encode %{
10716     __ lock();
10717     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10718   %}
10719   ins_pipe( pipe_cmpxchg );
10720 %}
10721 
10722 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10723   predicate(n->as_LoadStore()->result_not_used());
10724   match(Set dummy (GetAndAddB mem add));
10725   effect(KILL cr);
10726   format %{ "addb_lock   $mem, $add" %}
10727   ins_encode %{
10728     __ lock();
10729     __ addb($mem$$Address, $add$$Register);
10730   %}
10731   ins_pipe(pipe_cmpxchg);
10732 %}
10733 
10734 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10735   predicate(n->as_LoadStore()->result_not_used());
10736   match(Set dummy (GetAndAddB mem add));
10737   effect(KILL cr);
10738   format %{ "addb_lock   $mem, $add" %}
10739   ins_encode %{
10740     __ lock();
10741     __ addb($mem$$Address, $add$$constant);
10742   %}
10743   ins_pipe(pipe_cmpxchg);
10744 %}
10745 
10746 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10747   predicate(!n->as_LoadStore()->result_not_used());
10748   match(Set newval (GetAndAddB mem newval));
10749   effect(KILL cr);
10750   format %{ "xaddb_lock  $mem, $newval" %}
10751   ins_encode %{
10752     __ lock();
10753     __ xaddb($mem$$Address, $newval$$Register);
10754   %}
10755   ins_pipe(pipe_cmpxchg);
10756 %}
10757 
10758 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10759   predicate(n->as_LoadStore()->result_not_used());
10760   match(Set dummy (GetAndAddS mem add));
10761   effect(KILL cr);
10762   format %{ "addw_lock   $mem, $add" %}
10763   ins_encode %{
10764     __ lock();
10765     __ addw($mem$$Address, $add$$Register);
10766   %}
10767   ins_pipe(pipe_cmpxchg);
10768 %}
10769 
10770 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10771   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10772   match(Set dummy (GetAndAddS mem add));
10773   effect(KILL cr);
10774   format %{ "addw_lock   $mem, $add" %}
10775   ins_encode %{
10776     __ lock();
10777     __ addw($mem$$Address, $add$$constant);
10778   %}
10779   ins_pipe(pipe_cmpxchg);
10780 %}
10781 
10782 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10783   predicate(!n->as_LoadStore()->result_not_used());
10784   match(Set newval (GetAndAddS mem newval));
10785   effect(KILL cr);
10786   format %{ "xaddw_lock  $mem, $newval" %}
10787   ins_encode %{
10788     __ lock();
10789     __ xaddw($mem$$Address, $newval$$Register);
10790   %}
10791   ins_pipe(pipe_cmpxchg);
10792 %}
10793 
10794 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10795   predicate(n->as_LoadStore()->result_not_used());
10796   match(Set dummy (GetAndAddI mem add));
10797   effect(KILL cr);
10798   format %{ "addl_lock   $mem, $add" %}
10799   ins_encode %{
10800     __ lock();
10801     __ addl($mem$$Address, $add$$Register);
10802   %}
10803   ins_pipe(pipe_cmpxchg);
10804 %}
10805 
10806 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10807   predicate(n->as_LoadStore()->result_not_used());
10808   match(Set dummy (GetAndAddI mem add));
10809   effect(KILL cr);
10810   format %{ "addl_lock   $mem, $add" %}
10811   ins_encode %{
10812     __ lock();
10813     __ addl($mem$$Address, $add$$constant);
10814   %}
10815   ins_pipe(pipe_cmpxchg);
10816 %}
10817 
10818 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10819   predicate(!n->as_LoadStore()->result_not_used());
10820   match(Set newval (GetAndAddI mem newval));
10821   effect(KILL cr);
10822   format %{ "xaddl_lock  $mem, $newval" %}
10823   ins_encode %{
10824     __ lock();
10825     __ xaddl($mem$$Address, $newval$$Register);
10826   %}
10827   ins_pipe(pipe_cmpxchg);
10828 %}
10829 
10830 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10831   predicate(n->as_LoadStore()->result_not_used());
10832   match(Set dummy (GetAndAddL mem add));
10833   effect(KILL cr);
10834   format %{ "addq_lock   $mem, $add" %}
10835   ins_encode %{
10836     __ lock();
10837     __ addq($mem$$Address, $add$$Register);
10838   %}
10839   ins_pipe(pipe_cmpxchg);
10840 %}
10841 
10842 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10843   predicate(n->as_LoadStore()->result_not_used());
10844   match(Set dummy (GetAndAddL mem add));
10845   effect(KILL cr);
10846   format %{ "addq_lock   $mem, $add" %}
10847   ins_encode %{
10848     __ lock();
10849     __ addq($mem$$Address, $add$$constant);
10850   %}
10851   ins_pipe(pipe_cmpxchg);
10852 %}
10853 
10854 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10855   predicate(!n->as_LoadStore()->result_not_used());
10856   match(Set newval (GetAndAddL mem newval));
10857   effect(KILL cr);
10858   format %{ "xaddq_lock  $mem, $newval" %}
10859   ins_encode %{
10860     __ lock();
10861     __ xaddq($mem$$Address, $newval$$Register);
10862   %}
10863   ins_pipe(pipe_cmpxchg);
10864 %}
10865 
10866 instruct xchgB( memory mem, rRegI newval) %{
10867   match(Set newval (GetAndSetB mem newval));
10868   format %{ "XCHGB  $newval,[$mem]" %}
10869   ins_encode %{
10870     __ xchgb($newval$$Register, $mem$$Address);
10871   %}
10872   ins_pipe( pipe_cmpxchg );
10873 %}
10874 
10875 instruct xchgS( memory mem, rRegI newval) %{
10876   match(Set newval (GetAndSetS mem newval));
10877   format %{ "XCHGW  $newval,[$mem]" %}
10878   ins_encode %{
10879     __ xchgw($newval$$Register, $mem$$Address);
10880   %}
10881   ins_pipe( pipe_cmpxchg );
10882 %}
10883 
10884 instruct xchgI( memory mem, rRegI newval) %{
10885   match(Set newval (GetAndSetI mem newval));
10886   format %{ "XCHGL  $newval,[$mem]" %}
10887   ins_encode %{
10888     __ xchgl($newval$$Register, $mem$$Address);
10889   %}
10890   ins_pipe( pipe_cmpxchg );
10891 %}
10892 
10893 instruct xchgL( memory mem, rRegL newval) %{
10894   match(Set newval (GetAndSetL mem newval));
10895   format %{ "XCHGL  $newval,[$mem]" %}
10896   ins_encode %{
10897     __ xchgq($newval$$Register, $mem$$Address);
10898   %}
10899   ins_pipe( pipe_cmpxchg );
10900 %}
10901 
10902 instruct xchgP( memory mem, rRegP newval) %{
10903   match(Set newval (GetAndSetP mem newval));
10904   predicate(n->as_LoadStore()->barrier_data() == 0);
10905   format %{ "XCHGQ  $newval,[$mem]" %}
10906   ins_encode %{
10907     __ xchgq($newval$$Register, $mem$$Address);
10908   %}
10909   ins_pipe( pipe_cmpxchg );
10910 %}
10911 
10912 instruct xchgN( memory mem, rRegN newval) %{
10913   predicate(n->as_LoadStore()->barrier_data() == 0);
10914   match(Set newval (GetAndSetN mem newval));
10915   format %{ "XCHGL  $newval,$mem]" %}
10916   ins_encode %{
10917     __ xchgl($newval$$Register, $mem$$Address);
10918   %}
10919   ins_pipe( pipe_cmpxchg );
10920 %}
10921 
10922 //----------Abs Instructions-------------------------------------------
10923 
10924 // Integer Absolute Instructions
10925 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10926 %{
10927   match(Set dst (AbsI src));
10928   effect(TEMP dst, KILL cr);
10929   format %{ "xorl    $dst, $dst\t# abs int\n\t"
10930             "subl    $dst, $src\n\t"
10931             "cmovll  $dst, $src" %}
10932   ins_encode %{
10933     __ xorl($dst$$Register, $dst$$Register);
10934     __ subl($dst$$Register, $src$$Register);
10935     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
10936   %}
10937 
10938   ins_pipe(ialu_reg_reg);
10939 %}
10940 
10941 // Long Absolute Instructions
10942 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10943 %{
10944   match(Set dst (AbsL src));
10945   effect(TEMP dst, KILL cr);
10946   format %{ "xorl    $dst, $dst\t# abs long\n\t"
10947             "subq    $dst, $src\n\t"
10948             "cmovlq  $dst, $src" %}
10949   ins_encode %{
10950     __ xorl($dst$$Register, $dst$$Register);
10951     __ subq($dst$$Register, $src$$Register);
10952     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
10953   %}
10954 
10955   ins_pipe(ialu_reg_reg);
10956 %}
10957 
10958 //----------Subtraction Instructions-------------------------------------------
10959 
10960 // Integer Subtraction Instructions
10961 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10962 %{
10963   predicate(!UseAPX);
10964   match(Set dst (SubI dst src));
10965   effect(KILL cr);
10966   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10967 
10968   format %{ "subl    $dst, $src\t# int" %}
10969   ins_encode %{
10970     __ subl($dst$$Register, $src$$Register);
10971   %}
10972   ins_pipe(ialu_reg_reg);
10973 %}
10974 
10975 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
10976 %{
10977   predicate(UseAPX);
10978   match(Set dst (SubI src1 src2));
10979   effect(KILL cr);
10980   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10981 
10982   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
10983   ins_encode %{
10984     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
10985   %}
10986   ins_pipe(ialu_reg_reg);
10987 %}
10988 
10989 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
10990 %{
10991   predicate(UseAPX);
10992   match(Set dst (SubI src1 src2));
10993   effect(KILL cr);
10994   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10995 
10996   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
10997   ins_encode %{
10998     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
10999   %}
11000   ins_pipe(ialu_reg_reg);
11001 %}
11002 
11003 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11004 %{
11005   predicate(UseAPX);
11006   match(Set dst (SubI (LoadI src1) src2));
11007   effect(KILL cr);
11008   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11009 
11010   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11011   ins_encode %{
11012     __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11013   %}
11014   ins_pipe(ialu_reg_reg);
11015 %}
11016 
11017 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11018 %{
11019   predicate(!UseAPX);
11020   match(Set dst (SubI dst (LoadI src)));
11021   effect(KILL cr);
11022   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11023 
11024   ins_cost(150);
11025   format %{ "subl    $dst, $src\t# int" %}
11026   ins_encode %{
11027     __ subl($dst$$Register, $src$$Address);
11028   %}
11029   ins_pipe(ialu_reg_mem);
11030 %}
11031 
11032 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11033 %{
11034   predicate(UseAPX);
11035   match(Set dst (SubI src1 (LoadI src2)));
11036   effect(KILL cr);
11037   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11038 
11039   ins_cost(150);
11040   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11041   ins_encode %{
11042     __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11043   %}
11044   ins_pipe(ialu_reg_mem);
11045 %}
11046 
11047 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11048 %{
11049   predicate(UseAPX);
11050   match(Set dst (SubI (LoadI src1) src2));
11051   effect(KILL cr);
11052   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11053 
11054   ins_cost(150);
11055   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11056   ins_encode %{
11057     __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11058   %}
11059   ins_pipe(ialu_reg_mem);
11060 %}
11061 
11062 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11063 %{
11064   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11065   effect(KILL cr);
11066   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11067 
11068   ins_cost(150);
11069   format %{ "subl    $dst, $src\t# int" %}
11070   ins_encode %{
11071     __ subl($dst$$Address, $src$$Register);
11072   %}
11073   ins_pipe(ialu_mem_reg);
11074 %}
11075 
11076 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11077 %{
11078   predicate(!UseAPX);
11079   match(Set dst (SubL dst src));
11080   effect(KILL cr);
11081   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11082 
11083   format %{ "subq    $dst, $src\t# long" %}
11084   ins_encode %{
11085     __ subq($dst$$Register, $src$$Register);
11086   %}
11087   ins_pipe(ialu_reg_reg);
11088 %}
11089 
11090 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11091 %{
11092   predicate(UseAPX);
11093   match(Set dst (SubL src1 src2));
11094   effect(KILL cr);
11095   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11096 
11097   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11098   ins_encode %{
11099     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11100   %}
11101   ins_pipe(ialu_reg_reg);
11102 %}
11103 
11104 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11105 %{
11106   predicate(UseAPX);
11107   match(Set dst (SubL src1 src2));
11108   effect(KILL cr);
11109   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11110 
11111   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11112   ins_encode %{
11113     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11114   %}
11115   ins_pipe(ialu_reg_reg);
11116 %}
11117 
11118 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11119 %{
11120   predicate(UseAPX);
11121   match(Set dst (SubL (LoadL src1) src2));
11122   effect(KILL cr);
11123   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11124 
11125   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11126   ins_encode %{
11127     __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11128   %}
11129   ins_pipe(ialu_reg_reg);
11130 %}
11131 
11132 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11133 %{
11134   predicate(!UseAPX);
11135   match(Set dst (SubL dst (LoadL src)));
11136   effect(KILL cr);
11137   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11138 
11139   ins_cost(150);
11140   format %{ "subq    $dst, $src\t# long" %}
11141   ins_encode %{
11142     __ subq($dst$$Register, $src$$Address);
11143   %}
11144   ins_pipe(ialu_reg_mem);
11145 %}
11146 
11147 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11148 %{
11149   predicate(UseAPX);
11150   match(Set dst (SubL src1 (LoadL src2)));
11151   effect(KILL cr);
11152   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11153 
11154   ins_cost(150);
11155   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11156   ins_encode %{
11157     __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11158   %}
11159   ins_pipe(ialu_reg_mem);
11160 %}
11161 
11162 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11163 %{
11164   predicate(UseAPX);
11165   match(Set dst (SubL (LoadL src1) src2));
11166   effect(KILL cr);
11167   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11168 
11169   ins_cost(150);
11170   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11171   ins_encode %{
11172     __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11173   %}
11174   ins_pipe(ialu_reg_mem);
11175 %}
11176 
11177 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11178 %{
11179   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11180   effect(KILL cr);
11181   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11182 
11183   ins_cost(150);
11184   format %{ "subq    $dst, $src\t# long" %}
11185   ins_encode %{
11186     __ subq($dst$$Address, $src$$Register);
11187   %}
11188   ins_pipe(ialu_mem_reg);
11189 %}
11190 
11191 // Subtract from a pointer
11192 // XXX hmpf???
11193 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11194 %{
11195   match(Set dst (AddP dst (SubI zero src)));
11196   effect(KILL cr);
11197 
11198   format %{ "subq    $dst, $src\t# ptr - int" %}
11199   ins_encode %{
11200     __ subq($dst$$Register, $src$$Register);
11201   %}
11202   ins_pipe(ialu_reg_reg);
11203 %}
11204 
11205 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11206 %{
11207   predicate(!UseAPX);
11208   match(Set dst (SubI zero dst));
11209   effect(KILL cr);
11210   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11211 
11212   format %{ "negl    $dst\t# int" %}
11213   ins_encode %{
11214     __ negl($dst$$Register);
11215   %}
11216   ins_pipe(ialu_reg);
11217 %}
11218 
11219 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11220 %{
11221   predicate(UseAPX);
11222   match(Set dst (SubI zero src));
11223   effect(KILL cr);
11224   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11225 
11226   format %{ "enegl    $dst, $src\t# int ndd" %}
11227   ins_encode %{
11228     __ enegl($dst$$Register, $src$$Register, false);
11229   %}
11230   ins_pipe(ialu_reg);
11231 %}
11232 
11233 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11234 %{
11235   predicate(!UseAPX);
11236   match(Set dst (NegI dst));
11237   effect(KILL cr);
11238   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11239 
11240   format %{ "negl    $dst\t# int" %}
11241   ins_encode %{
11242     __ negl($dst$$Register);
11243   %}
11244   ins_pipe(ialu_reg);
11245 %}
11246 
11247 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11248 %{
11249   predicate(UseAPX);
11250   match(Set dst (NegI src));
11251   effect(KILL cr);
11252   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11253 
11254   format %{ "enegl    $dst, $src\t# int ndd" %}
11255   ins_encode %{
11256     __ enegl($dst$$Register, $src$$Register, false);
11257   %}
11258   ins_pipe(ialu_reg);
11259 %}
11260 
11261 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11262 %{
11263   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11264   effect(KILL cr);
11265   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11266 
11267   format %{ "negl    $dst\t# int" %}
11268   ins_encode %{
11269     __ negl($dst$$Address);
11270   %}
11271   ins_pipe(ialu_reg);
11272 %}
11273 
11274 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11275 %{
11276   predicate(!UseAPX);
11277   match(Set dst (SubL zero dst));
11278   effect(KILL cr);
11279   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11280 
11281   format %{ "negq    $dst\t# long" %}
11282   ins_encode %{
11283     __ negq($dst$$Register);
11284   %}
11285   ins_pipe(ialu_reg);
11286 %}
11287 
11288 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11289 %{
11290   predicate(UseAPX);
11291   match(Set dst (SubL zero src));
11292   effect(KILL cr);
11293   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11294 
11295   format %{ "enegq    $dst, $src\t# long ndd" %}
11296   ins_encode %{
11297     __ enegq($dst$$Register, $src$$Register, false);
11298   %}
11299   ins_pipe(ialu_reg);
11300 %}
11301 
11302 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11303 %{
11304   predicate(!UseAPX);
11305   match(Set dst (NegL dst));
11306   effect(KILL cr);
11307   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11308 
11309   format %{ "negq    $dst\t# int" %}
11310   ins_encode %{
11311     __ negq($dst$$Register);
11312   %}
11313   ins_pipe(ialu_reg);
11314 %}
11315 
11316 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11317 %{
11318   predicate(UseAPX);
11319   match(Set dst (NegL src));
11320   effect(KILL cr);
11321   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11322 
11323   format %{ "enegq    $dst, $src\t# long ndd" %}
11324   ins_encode %{
11325     __ enegq($dst$$Register, $src$$Register, false);
11326   %}
11327   ins_pipe(ialu_reg);
11328 %}
11329 
11330 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11331 %{
11332   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11333   effect(KILL cr);
11334   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11335 
11336   format %{ "negq    $dst\t# long" %}
11337   ins_encode %{
11338     __ negq($dst$$Address);
11339   %}
11340   ins_pipe(ialu_reg);
11341 %}
11342 
11343 //----------Multiplication/Division Instructions-------------------------------
11344 // Integer Multiplication Instructions
11345 // Multiply Register
11346 
11347 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11348 %{
11349   predicate(!UseAPX);
11350   match(Set dst (MulI dst src));
11351   effect(KILL cr);
11352 
11353   ins_cost(300);
11354   format %{ "imull   $dst, $src\t# int" %}
11355   ins_encode %{
11356     __ imull($dst$$Register, $src$$Register);
11357   %}
11358   ins_pipe(ialu_reg_reg_alu0);
11359 %}
11360 
11361 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11362 %{
11363   predicate(UseAPX);
11364   match(Set dst (MulI src1 src2));
11365   effect(KILL cr);
11366 
11367   ins_cost(300);
11368   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11369   ins_encode %{
11370     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11371   %}
11372   ins_pipe(ialu_reg_reg_alu0);
11373 %}
11374 
11375 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11376 %{
11377   match(Set dst (MulI src imm));
11378   effect(KILL cr);
11379 
11380   ins_cost(300);
11381   format %{ "imull   $dst, $src, $imm\t# int" %}
11382   ins_encode %{
11383     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11384   %}
11385   ins_pipe(ialu_reg_reg_alu0);
11386 %}
11387 
11388 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11389 %{
11390   predicate(!UseAPX);
11391   match(Set dst (MulI dst (LoadI src)));
11392   effect(KILL cr);
11393 
11394   ins_cost(350);
11395   format %{ "imull   $dst, $src\t# int" %}
11396   ins_encode %{
11397     __ imull($dst$$Register, $src$$Address);
11398   %}
11399   ins_pipe(ialu_reg_mem_alu0);
11400 %}
11401 
11402 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11403 %{
11404   predicate(UseAPX);
11405   match(Set dst (MulI src1 (LoadI src2)));
11406   effect(KILL cr);
11407 
11408   ins_cost(350);
11409   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11410   ins_encode %{
11411     __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11412   %}
11413   ins_pipe(ialu_reg_mem_alu0);
11414 %}
11415 
11416 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11417 %{
11418   match(Set dst (MulI (LoadI src) imm));
11419   effect(KILL cr);
11420 
11421   ins_cost(300);
11422   format %{ "imull   $dst, $src, $imm\t# int" %}
11423   ins_encode %{
11424     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11425   %}
11426   ins_pipe(ialu_reg_mem_alu0);
11427 %}
11428 
11429 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11430 %{
11431   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11432   effect(KILL cr, KILL src2);
11433 
11434   expand %{ mulI_rReg(dst, src1, cr);
11435            mulI_rReg(src2, src3, cr);
11436            addI_rReg(dst, src2, cr); %}
11437 %}
11438 
11439 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11440 %{
11441   predicate(!UseAPX);
11442   match(Set dst (MulL dst src));
11443   effect(KILL cr);
11444 
11445   ins_cost(300);
11446   format %{ "imulq   $dst, $src\t# long" %}
11447   ins_encode %{
11448     __ imulq($dst$$Register, $src$$Register);
11449   %}
11450   ins_pipe(ialu_reg_reg_alu0);
11451 %}
11452 
11453 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11454 %{
11455   predicate(UseAPX);
11456   match(Set dst (MulL src1 src2));
11457   effect(KILL cr);
11458 
11459   ins_cost(300);
11460   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11461   ins_encode %{
11462     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11463   %}
11464   ins_pipe(ialu_reg_reg_alu0);
11465 %}
11466 
11467 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11468 %{
11469   match(Set dst (MulL src imm));
11470   effect(KILL cr);
11471 
11472   ins_cost(300);
11473   format %{ "imulq   $dst, $src, $imm\t# long" %}
11474   ins_encode %{
11475     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11476   %}
11477   ins_pipe(ialu_reg_reg_alu0);
11478 %}
11479 
11480 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11481 %{
11482   predicate(!UseAPX);
11483   match(Set dst (MulL dst (LoadL src)));
11484   effect(KILL cr);
11485 
11486   ins_cost(350);
11487   format %{ "imulq   $dst, $src\t# long" %}
11488   ins_encode %{
11489     __ imulq($dst$$Register, $src$$Address);
11490   %}
11491   ins_pipe(ialu_reg_mem_alu0);
11492 %}
11493 
11494 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11495 %{
11496   predicate(UseAPX);
11497   match(Set dst (MulL src1 (LoadL src2)));
11498   effect(KILL cr);
11499 
11500   ins_cost(350);
11501   format %{ "eimulq   $dst, $src1, $src2 \t# long" %}
11502   ins_encode %{
11503     __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11504   %}
11505   ins_pipe(ialu_reg_mem_alu0);
11506 %}
11507 
11508 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11509 %{
11510   match(Set dst (MulL (LoadL src) imm));
11511   effect(KILL cr);
11512 
11513   ins_cost(300);
11514   format %{ "imulq   $dst, $src, $imm\t# long" %}
11515   ins_encode %{
11516     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11517   %}
11518   ins_pipe(ialu_reg_mem_alu0);
11519 %}
11520 
11521 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11522 %{
11523   match(Set dst (MulHiL src rax));
11524   effect(USE_KILL rax, KILL cr);
11525 
11526   ins_cost(300);
11527   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11528   ins_encode %{
11529     __ imulq($src$$Register);
11530   %}
11531   ins_pipe(ialu_reg_reg_alu0);
11532 %}
11533 
11534 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11535 %{
11536   match(Set dst (UMulHiL src rax));
11537   effect(USE_KILL rax, KILL cr);
11538 
11539   ins_cost(300);
11540   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11541   ins_encode %{
11542     __ mulq($src$$Register);
11543   %}
11544   ins_pipe(ialu_reg_reg_alu0);
11545 %}
11546 
11547 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11548                    rFlagsReg cr)
11549 %{
11550   match(Set rax (DivI rax div));
11551   effect(KILL rdx, KILL cr);
11552 
11553   ins_cost(30*100+10*100); // XXX
11554   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11555             "jne,s   normal\n\t"
11556             "xorl    rdx, rdx\n\t"
11557             "cmpl    $div, -1\n\t"
11558             "je,s    done\n"
11559     "normal: cdql\n\t"
11560             "idivl   $div\n"
11561     "done:"        %}
11562   ins_encode(cdql_enc(div));
11563   ins_pipe(ialu_reg_reg_alu0);
11564 %}
11565 
11566 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11567                    rFlagsReg cr)
11568 %{
11569   match(Set rax (DivL rax div));
11570   effect(KILL rdx, KILL cr);
11571 
11572   ins_cost(30*100+10*100); // XXX
11573   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11574             "cmpq    rax, rdx\n\t"
11575             "jne,s   normal\n\t"
11576             "xorl    rdx, rdx\n\t"
11577             "cmpq    $div, -1\n\t"
11578             "je,s    done\n"
11579     "normal: cdqq\n\t"
11580             "idivq   $div\n"
11581     "done:"        %}
11582   ins_encode(cdqq_enc(div));
11583   ins_pipe(ialu_reg_reg_alu0);
11584 %}
11585 
11586 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11587 %{
11588   match(Set rax (UDivI rax div));
11589   effect(KILL rdx, KILL cr);
11590 
11591   ins_cost(300);
11592   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11593   ins_encode %{
11594     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11595   %}
11596   ins_pipe(ialu_reg_reg_alu0);
11597 %}
11598 
11599 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11600 %{
11601   match(Set rax (UDivL rax div));
11602   effect(KILL rdx, KILL cr);
11603 
11604   ins_cost(300);
11605   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11606   ins_encode %{
11607      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11608   %}
11609   ins_pipe(ialu_reg_reg_alu0);
11610 %}
11611 
11612 // Integer DIVMOD with Register, both quotient and mod results
11613 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11614                              rFlagsReg cr)
11615 %{
11616   match(DivModI rax div);
11617   effect(KILL cr);
11618 
11619   ins_cost(30*100+10*100); // XXX
11620   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11621             "jne,s   normal\n\t"
11622             "xorl    rdx, rdx\n\t"
11623             "cmpl    $div, -1\n\t"
11624             "je,s    done\n"
11625     "normal: cdql\n\t"
11626             "idivl   $div\n"
11627     "done:"        %}
11628   ins_encode(cdql_enc(div));
11629   ins_pipe(pipe_slow);
11630 %}
11631 
11632 // Long DIVMOD with Register, both quotient and mod results
11633 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11634                              rFlagsReg cr)
11635 %{
11636   match(DivModL rax div);
11637   effect(KILL cr);
11638 
11639   ins_cost(30*100+10*100); // XXX
11640   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11641             "cmpq    rax, rdx\n\t"
11642             "jne,s   normal\n\t"
11643             "xorl    rdx, rdx\n\t"
11644             "cmpq    $div, -1\n\t"
11645             "je,s    done\n"
11646     "normal: cdqq\n\t"
11647             "idivq   $div\n"
11648     "done:"        %}
11649   ins_encode(cdqq_enc(div));
11650   ins_pipe(pipe_slow);
11651 %}
11652 
11653 // Unsigned integer DIVMOD with Register, both quotient and mod results
11654 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11655                               no_rax_rdx_RegI div, rFlagsReg cr)
11656 %{
11657   match(UDivModI rax div);
11658   effect(TEMP tmp, KILL cr);
11659 
11660   ins_cost(300);
11661   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11662             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11663           %}
11664   ins_encode %{
11665     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11666   %}
11667   ins_pipe(pipe_slow);
11668 %}
11669 
11670 // Unsigned long DIVMOD with Register, both quotient and mod results
11671 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11672                               no_rax_rdx_RegL div, rFlagsReg cr)
11673 %{
11674   match(UDivModL rax div);
11675   effect(TEMP tmp, KILL cr);
11676 
11677   ins_cost(300);
11678   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11679             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11680           %}
11681   ins_encode %{
11682     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11683   %}
11684   ins_pipe(pipe_slow);
11685 %}
11686 
11687 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11688                    rFlagsReg cr)
11689 %{
11690   match(Set rdx (ModI rax div));
11691   effect(KILL rax, KILL cr);
11692 
11693   ins_cost(300); // XXX
11694   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11695             "jne,s   normal\n\t"
11696             "xorl    rdx, rdx\n\t"
11697             "cmpl    $div, -1\n\t"
11698             "je,s    done\n"
11699     "normal: cdql\n\t"
11700             "idivl   $div\n"
11701     "done:"        %}
11702   ins_encode(cdql_enc(div));
11703   ins_pipe(ialu_reg_reg_alu0);
11704 %}
11705 
11706 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11707                    rFlagsReg cr)
11708 %{
11709   match(Set rdx (ModL rax div));
11710   effect(KILL rax, KILL cr);
11711 
11712   ins_cost(300); // XXX
11713   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11714             "cmpq    rax, rdx\n\t"
11715             "jne,s   normal\n\t"
11716             "xorl    rdx, rdx\n\t"
11717             "cmpq    $div, -1\n\t"
11718             "je,s    done\n"
11719     "normal: cdqq\n\t"
11720             "idivq   $div\n"
11721     "done:"        %}
11722   ins_encode(cdqq_enc(div));
11723   ins_pipe(ialu_reg_reg_alu0);
11724 %}
11725 
11726 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11727 %{
11728   match(Set rdx (UModI rax div));
11729   effect(KILL rax, KILL cr);
11730 
11731   ins_cost(300);
11732   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11733   ins_encode %{
11734     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11735   %}
11736   ins_pipe(ialu_reg_reg_alu0);
11737 %}
11738 
11739 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11740 %{
11741   match(Set rdx (UModL rax div));
11742   effect(KILL rax, KILL cr);
11743 
11744   ins_cost(300);
11745   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11746   ins_encode %{
11747     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11748   %}
11749   ins_pipe(ialu_reg_reg_alu0);
11750 %}
11751 
11752 // Integer Shift Instructions
11753 // Shift Left by one, two, three
11754 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11755 %{
11756   predicate(!UseAPX);
11757   match(Set dst (LShiftI dst shift));
11758   effect(KILL cr);
11759 
11760   format %{ "sall    $dst, $shift" %}
11761   ins_encode %{
11762     __ sall($dst$$Register, $shift$$constant);
11763   %}
11764   ins_pipe(ialu_reg);
11765 %}
11766 
11767 // Shift Left by one, two, three
11768 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11769 %{
11770   predicate(UseAPX);
11771   match(Set dst (LShiftI src shift));
11772   effect(KILL cr);
11773 
11774   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
11775   ins_encode %{
11776     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11777   %}
11778   ins_pipe(ialu_reg);
11779 %}
11780 
11781 // Shift Left by 8-bit immediate
11782 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11783 %{
11784   predicate(!UseAPX);
11785   match(Set dst (LShiftI dst shift));
11786   effect(KILL cr);
11787 
11788   format %{ "sall    $dst, $shift" %}
11789   ins_encode %{
11790     __ sall($dst$$Register, $shift$$constant);
11791   %}
11792   ins_pipe(ialu_reg);
11793 %}
11794 
11795 // Shift Left by 8-bit immediate
11796 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11797 %{
11798   predicate(UseAPX);
11799   match(Set dst (LShiftI src shift));
11800   effect(KILL cr);
11801 
11802   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11803   ins_encode %{
11804     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11805   %}
11806   ins_pipe(ialu_reg);
11807 %}
11808 
11809 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11810 %{
11811   predicate(UseAPX);
11812   match(Set dst (LShiftI (LoadI src) shift));
11813   effect(KILL cr);
11814 
11815   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11816   ins_encode %{
11817     __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11818   %}
11819   ins_pipe(ialu_reg);
11820 %}
11821 
11822 // Shift Left by 8-bit immediate
11823 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11824 %{
11825   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11826   effect(KILL cr);
11827 
11828   format %{ "sall    $dst, $shift" %}
11829   ins_encode %{
11830     __ sall($dst$$Address, $shift$$constant);
11831   %}
11832   ins_pipe(ialu_mem_imm);
11833 %}
11834 
11835 // Shift Left by variable
11836 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11837 %{
11838   predicate(!VM_Version::supports_bmi2());
11839   match(Set dst (LShiftI dst shift));
11840   effect(KILL cr);
11841 
11842   format %{ "sall    $dst, $shift" %}
11843   ins_encode %{
11844     __ sall($dst$$Register);
11845   %}
11846   ins_pipe(ialu_reg_reg);
11847 %}
11848 
11849 // Shift Left by variable
11850 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11851 %{
11852   predicate(!VM_Version::supports_bmi2());
11853   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11854   effect(KILL cr);
11855 
11856   format %{ "sall    $dst, $shift" %}
11857   ins_encode %{
11858     __ sall($dst$$Address);
11859   %}
11860   ins_pipe(ialu_mem_reg);
11861 %}
11862 
11863 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11864 %{
11865   predicate(VM_Version::supports_bmi2());
11866   match(Set dst (LShiftI src shift));
11867 
11868   format %{ "shlxl   $dst, $src, $shift" %}
11869   ins_encode %{
11870     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
11871   %}
11872   ins_pipe(ialu_reg_reg);
11873 %}
11874 
11875 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
11876 %{
11877   predicate(VM_Version::supports_bmi2());
11878   match(Set dst (LShiftI (LoadI src) shift));
11879   ins_cost(175);
11880   format %{ "shlxl   $dst, $src, $shift" %}
11881   ins_encode %{
11882     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
11883   %}
11884   ins_pipe(ialu_reg_mem);
11885 %}
11886 
11887 // Arithmetic Shift Right by 8-bit immediate
11888 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11889 %{
11890   predicate(!UseAPX);
11891   match(Set dst (RShiftI dst shift));
11892   effect(KILL cr);
11893 
11894   format %{ "sarl    $dst, $shift" %}
11895   ins_encode %{
11896     __ sarl($dst$$Register, $shift$$constant);
11897   %}
11898   ins_pipe(ialu_mem_imm);
11899 %}
11900 
11901 // Arithmetic Shift Right by 8-bit immediate
11902 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11903 %{
11904   predicate(UseAPX);
11905   match(Set dst (RShiftI src shift));
11906   effect(KILL cr);
11907 
11908   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
11909   ins_encode %{
11910     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
11911   %}
11912   ins_pipe(ialu_mem_imm);
11913 %}
11914 
11915 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11916 %{
11917   predicate(UseAPX);
11918   match(Set dst (RShiftI (LoadI src) shift));
11919   effect(KILL cr);
11920 
11921   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
11922   ins_encode %{
11923     __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
11924   %}
11925   ins_pipe(ialu_mem_imm);
11926 %}
11927 
11928 // Arithmetic Shift Right by 8-bit immediate
11929 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11930 %{
11931   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11932   effect(KILL cr);
11933 
11934   format %{ "sarl    $dst, $shift" %}
11935   ins_encode %{
11936     __ sarl($dst$$Address, $shift$$constant);
11937   %}
11938   ins_pipe(ialu_mem_imm);
11939 %}
11940 
11941 // Arithmetic Shift Right by variable
11942 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11943 %{
11944   predicate(!VM_Version::supports_bmi2());
11945   match(Set dst (RShiftI dst shift));
11946   effect(KILL cr);
11947 
11948   format %{ "sarl    $dst, $shift" %}
11949   ins_encode %{
11950     __ sarl($dst$$Register);
11951   %}
11952   ins_pipe(ialu_reg_reg);
11953 %}
11954 
11955 // Arithmetic Shift Right by variable
11956 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11957 %{
11958   predicate(!VM_Version::supports_bmi2());
11959   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11960   effect(KILL cr);
11961 
11962   format %{ "sarl    $dst, $shift" %}
11963   ins_encode %{
11964     __ sarl($dst$$Address);
11965   %}
11966   ins_pipe(ialu_mem_reg);
11967 %}
11968 
11969 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11970 %{
11971   predicate(VM_Version::supports_bmi2());
11972   match(Set dst (RShiftI src shift));
11973 
11974   format %{ "sarxl   $dst, $src, $shift" %}
11975   ins_encode %{
11976     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
11977   %}
11978   ins_pipe(ialu_reg_reg);
11979 %}
11980 
11981 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
11982 %{
11983   predicate(VM_Version::supports_bmi2());
11984   match(Set dst (RShiftI (LoadI src) shift));
11985   ins_cost(175);
11986   format %{ "sarxl   $dst, $src, $shift" %}
11987   ins_encode %{
11988     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
11989   %}
11990   ins_pipe(ialu_reg_mem);
11991 %}
11992 
11993 // Logical Shift Right by 8-bit immediate
11994 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11995 %{
11996   predicate(!UseAPX);
11997   match(Set dst (URShiftI dst shift));
11998   effect(KILL cr);
11999 
12000   format %{ "shrl    $dst, $shift" %}
12001   ins_encode %{
12002     __ shrl($dst$$Register, $shift$$constant);
12003   %}
12004   ins_pipe(ialu_reg);
12005 %}
12006 
12007 // Logical Shift Right by 8-bit immediate
12008 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12009 %{
12010   predicate(UseAPX);
12011   match(Set dst (URShiftI src shift));
12012   effect(KILL cr);
12013 
12014   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12015   ins_encode %{
12016     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12017   %}
12018   ins_pipe(ialu_reg);
12019 %}
12020 
12021 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12022 %{
12023   predicate(UseAPX);
12024   match(Set dst (URShiftI (LoadI src) shift));
12025   effect(KILL cr);
12026 
12027   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12028   ins_encode %{
12029     __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12030   %}
12031   ins_pipe(ialu_reg);
12032 %}
12033 
12034 // Logical Shift Right by 8-bit immediate
12035 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12036 %{
12037   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12038   effect(KILL cr);
12039 
12040   format %{ "shrl    $dst, $shift" %}
12041   ins_encode %{
12042     __ shrl($dst$$Address, $shift$$constant);
12043   %}
12044   ins_pipe(ialu_mem_imm);
12045 %}
12046 
12047 // Logical Shift Right by variable
12048 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12049 %{
12050   predicate(!VM_Version::supports_bmi2());
12051   match(Set dst (URShiftI dst shift));
12052   effect(KILL cr);
12053 
12054   format %{ "shrl    $dst, $shift" %}
12055   ins_encode %{
12056     __ shrl($dst$$Register);
12057   %}
12058   ins_pipe(ialu_reg_reg);
12059 %}
12060 
12061 // Logical Shift Right by variable
12062 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12063 %{
12064   predicate(!VM_Version::supports_bmi2());
12065   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12066   effect(KILL cr);
12067 
12068   format %{ "shrl    $dst, $shift" %}
12069   ins_encode %{
12070     __ shrl($dst$$Address);
12071   %}
12072   ins_pipe(ialu_mem_reg);
12073 %}
12074 
12075 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12076 %{
12077   predicate(VM_Version::supports_bmi2());
12078   match(Set dst (URShiftI src shift));
12079 
12080   format %{ "shrxl   $dst, $src, $shift" %}
12081   ins_encode %{
12082     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12083   %}
12084   ins_pipe(ialu_reg_reg);
12085 %}
12086 
12087 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12088 %{
12089   predicate(VM_Version::supports_bmi2());
12090   match(Set dst (URShiftI (LoadI src) shift));
12091   ins_cost(175);
12092   format %{ "shrxl   $dst, $src, $shift" %}
12093   ins_encode %{
12094     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12095   %}
12096   ins_pipe(ialu_reg_mem);
12097 %}
12098 
12099 // Long Shift Instructions
12100 // Shift Left by one, two, three
12101 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12102 %{
12103   predicate(!UseAPX);
12104   match(Set dst (LShiftL dst shift));
12105   effect(KILL cr);
12106 
12107   format %{ "salq    $dst, $shift" %}
12108   ins_encode %{
12109     __ salq($dst$$Register, $shift$$constant);
12110   %}
12111   ins_pipe(ialu_reg);
12112 %}
12113 
12114 // Shift Left by one, two, three
12115 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12116 %{
12117   predicate(UseAPX);
12118   match(Set dst (LShiftL src shift));
12119   effect(KILL cr);
12120 
12121   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12122   ins_encode %{
12123     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12124   %}
12125   ins_pipe(ialu_reg);
12126 %}
12127 
12128 // Shift Left by 8-bit immediate
12129 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12130 %{
12131   predicate(!UseAPX);
12132   match(Set dst (LShiftL dst shift));
12133   effect(KILL cr);
12134 
12135   format %{ "salq    $dst, $shift" %}
12136   ins_encode %{
12137     __ salq($dst$$Register, $shift$$constant);
12138   %}
12139   ins_pipe(ialu_reg);
12140 %}
12141 
12142 // Shift Left by 8-bit immediate
12143 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12144 %{
12145   predicate(UseAPX);
12146   match(Set dst (LShiftL src shift));
12147   effect(KILL cr);
12148 
12149   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12150   ins_encode %{
12151     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12152   %}
12153   ins_pipe(ialu_reg);
12154 %}
12155 
12156 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12157 %{
12158   predicate(UseAPX);
12159   match(Set dst (LShiftL (LoadL src) shift));
12160   effect(KILL cr);
12161 
12162   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12163   ins_encode %{
12164     __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12165   %}
12166   ins_pipe(ialu_reg);
12167 %}
12168 
12169 // Shift Left by 8-bit immediate
12170 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12171 %{
12172   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12173   effect(KILL cr);
12174 
12175   format %{ "salq    $dst, $shift" %}
12176   ins_encode %{
12177     __ salq($dst$$Address, $shift$$constant);
12178   %}
12179   ins_pipe(ialu_mem_imm);
12180 %}
12181 
12182 // Shift Left by variable
12183 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12184 %{
12185   predicate(!VM_Version::supports_bmi2());
12186   match(Set dst (LShiftL dst shift));
12187   effect(KILL cr);
12188 
12189   format %{ "salq    $dst, $shift" %}
12190   ins_encode %{
12191     __ salq($dst$$Register);
12192   %}
12193   ins_pipe(ialu_reg_reg);
12194 %}
12195 
12196 // Shift Left by variable
12197 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12198 %{
12199   predicate(!VM_Version::supports_bmi2());
12200   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12201   effect(KILL cr);
12202 
12203   format %{ "salq    $dst, $shift" %}
12204   ins_encode %{
12205     __ salq($dst$$Address);
12206   %}
12207   ins_pipe(ialu_mem_reg);
12208 %}
12209 
12210 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12211 %{
12212   predicate(VM_Version::supports_bmi2());
12213   match(Set dst (LShiftL src shift));
12214 
12215   format %{ "shlxq   $dst, $src, $shift" %}
12216   ins_encode %{
12217     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12218   %}
12219   ins_pipe(ialu_reg_reg);
12220 %}
12221 
12222 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12223 %{
12224   predicate(VM_Version::supports_bmi2());
12225   match(Set dst (LShiftL (LoadL src) shift));
12226   ins_cost(175);
12227   format %{ "shlxq   $dst, $src, $shift" %}
12228   ins_encode %{
12229     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12230   %}
12231   ins_pipe(ialu_reg_mem);
12232 %}
12233 
12234 // Arithmetic Shift Right by 8-bit immediate
12235 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12236 %{
12237   predicate(!UseAPX);
12238   match(Set dst (RShiftL dst shift));
12239   effect(KILL cr);
12240 
12241   format %{ "sarq    $dst, $shift" %}
12242   ins_encode %{
12243     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12244   %}
12245   ins_pipe(ialu_mem_imm);
12246 %}
12247 
12248 // Arithmetic Shift Right by 8-bit immediate
12249 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12250 %{
12251   predicate(UseAPX);
12252   match(Set dst (RShiftL src shift));
12253   effect(KILL cr);
12254 
12255   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12256   ins_encode %{
12257     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12258   %}
12259   ins_pipe(ialu_mem_imm);
12260 %}
12261 
12262 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12263 %{
12264   predicate(UseAPX);
12265   match(Set dst (RShiftL (LoadL src) shift));
12266   effect(KILL cr);
12267 
12268   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12269   ins_encode %{
12270     __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12271   %}
12272   ins_pipe(ialu_mem_imm);
12273 %}
12274 
12275 // Arithmetic Shift Right by 8-bit immediate
12276 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12277 %{
12278   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12279   effect(KILL cr);
12280 
12281   format %{ "sarq    $dst, $shift" %}
12282   ins_encode %{
12283     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12284   %}
12285   ins_pipe(ialu_mem_imm);
12286 %}
12287 
12288 // Arithmetic Shift Right by variable
12289 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12290 %{
12291   predicate(!VM_Version::supports_bmi2());
12292   match(Set dst (RShiftL dst shift));
12293   effect(KILL cr);
12294 
12295   format %{ "sarq    $dst, $shift" %}
12296   ins_encode %{
12297     __ sarq($dst$$Register);
12298   %}
12299   ins_pipe(ialu_reg_reg);
12300 %}
12301 
12302 // Arithmetic Shift Right by variable
12303 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12304 %{
12305   predicate(!VM_Version::supports_bmi2());
12306   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12307   effect(KILL cr);
12308 
12309   format %{ "sarq    $dst, $shift" %}
12310   ins_encode %{
12311     __ sarq($dst$$Address);
12312   %}
12313   ins_pipe(ialu_mem_reg);
12314 %}
12315 
12316 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12317 %{
12318   predicate(VM_Version::supports_bmi2());
12319   match(Set dst (RShiftL src shift));
12320 
12321   format %{ "sarxq   $dst, $src, $shift" %}
12322   ins_encode %{
12323     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12324   %}
12325   ins_pipe(ialu_reg_reg);
12326 %}
12327 
12328 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12329 %{
12330   predicate(VM_Version::supports_bmi2());
12331   match(Set dst (RShiftL (LoadL src) shift));
12332   ins_cost(175);
12333   format %{ "sarxq   $dst, $src, $shift" %}
12334   ins_encode %{
12335     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12336   %}
12337   ins_pipe(ialu_reg_mem);
12338 %}
12339 
12340 // Logical Shift Right by 8-bit immediate
12341 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12342 %{
12343   predicate(!UseAPX);
12344   match(Set dst (URShiftL dst shift));
12345   effect(KILL cr);
12346 
12347   format %{ "shrq    $dst, $shift" %}
12348   ins_encode %{
12349     __ shrq($dst$$Register, $shift$$constant);
12350   %}
12351   ins_pipe(ialu_reg);
12352 %}
12353 
12354 // Logical Shift Right by 8-bit immediate
12355 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12356 %{
12357   predicate(UseAPX);
12358   match(Set dst (URShiftL src shift));
12359   effect(KILL cr);
12360 
12361   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12362   ins_encode %{
12363     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12364   %}
12365   ins_pipe(ialu_reg);
12366 %}
12367 
12368 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12369 %{
12370   predicate(UseAPX);
12371   match(Set dst (URShiftL (LoadL src) shift));
12372   effect(KILL cr);
12373 
12374   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12375   ins_encode %{
12376     __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12377   %}
12378   ins_pipe(ialu_reg);
12379 %}
12380 
12381 // Logical Shift Right by 8-bit immediate
12382 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12383 %{
12384   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12385   effect(KILL cr);
12386 
12387   format %{ "shrq    $dst, $shift" %}
12388   ins_encode %{
12389     __ shrq($dst$$Address, $shift$$constant);
12390   %}
12391   ins_pipe(ialu_mem_imm);
12392 %}
12393 
12394 // Logical Shift Right by variable
12395 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12396 %{
12397   predicate(!VM_Version::supports_bmi2());
12398   match(Set dst (URShiftL dst shift));
12399   effect(KILL cr);
12400 
12401   format %{ "shrq    $dst, $shift" %}
12402   ins_encode %{
12403     __ shrq($dst$$Register);
12404   %}
12405   ins_pipe(ialu_reg_reg);
12406 %}
12407 
12408 // Logical Shift Right by variable
12409 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12410 %{
12411   predicate(!VM_Version::supports_bmi2());
12412   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12413   effect(KILL cr);
12414 
12415   format %{ "shrq    $dst, $shift" %}
12416   ins_encode %{
12417     __ shrq($dst$$Address);
12418   %}
12419   ins_pipe(ialu_mem_reg);
12420 %}
12421 
12422 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12423 %{
12424   predicate(VM_Version::supports_bmi2());
12425   match(Set dst (URShiftL src shift));
12426 
12427   format %{ "shrxq   $dst, $src, $shift" %}
12428   ins_encode %{
12429     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12430   %}
12431   ins_pipe(ialu_reg_reg);
12432 %}
12433 
12434 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12435 %{
12436   predicate(VM_Version::supports_bmi2());
12437   match(Set dst (URShiftL (LoadL src) shift));
12438   ins_cost(175);
12439   format %{ "shrxq   $dst, $src, $shift" %}
12440   ins_encode %{
12441     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12442   %}
12443   ins_pipe(ialu_reg_mem);
12444 %}
12445 
12446 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12447 // This idiom is used by the compiler for the i2b bytecode.
12448 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12449 %{
12450   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12451 
12452   format %{ "movsbl  $dst, $src\t# i2b" %}
12453   ins_encode %{
12454     __ movsbl($dst$$Register, $src$$Register);
12455   %}
12456   ins_pipe(ialu_reg_reg);
12457 %}
12458 
12459 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12460 // This idiom is used by the compiler the i2s bytecode.
12461 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12462 %{
12463   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12464 
12465   format %{ "movswl  $dst, $src\t# i2s" %}
12466   ins_encode %{
12467     __ movswl($dst$$Register, $src$$Register);
12468   %}
12469   ins_pipe(ialu_reg_reg);
12470 %}
12471 
12472 // ROL/ROR instructions
12473 
12474 // Rotate left by constant.
12475 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12476 %{
12477   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12478   match(Set dst (RotateLeft dst shift));
12479   effect(KILL cr);
12480   format %{ "roll    $dst, $shift" %}
12481   ins_encode %{
12482     __ roll($dst$$Register, $shift$$constant);
12483   %}
12484   ins_pipe(ialu_reg);
12485 %}
12486 
12487 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12488 %{
12489   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12490   match(Set dst (RotateLeft src shift));
12491   format %{ "rolxl   $dst, $src, $shift" %}
12492   ins_encode %{
12493     int shift = 32 - ($shift$$constant & 31);
12494     __ rorxl($dst$$Register, $src$$Register, shift);
12495   %}
12496   ins_pipe(ialu_reg_reg);
12497 %}
12498 
12499 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12500 %{
12501   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12502   match(Set dst (RotateLeft (LoadI src) shift));
12503   ins_cost(175);
12504   format %{ "rolxl   $dst, $src, $shift" %}
12505   ins_encode %{
12506     int shift = 32 - ($shift$$constant & 31);
12507     __ rorxl($dst$$Register, $src$$Address, shift);
12508   %}
12509   ins_pipe(ialu_reg_mem);
12510 %}
12511 
12512 // Rotate Left by variable
12513 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12514 %{
12515   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12516   match(Set dst (RotateLeft dst shift));
12517   effect(KILL cr);
12518   format %{ "roll    $dst, $shift" %}
12519   ins_encode %{
12520     __ roll($dst$$Register);
12521   %}
12522   ins_pipe(ialu_reg_reg);
12523 %}
12524 
12525 // Rotate Left by variable
12526 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12527 %{
12528   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12529   match(Set dst (RotateLeft src shift));
12530   effect(KILL cr);
12531 
12532   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12533   ins_encode %{
12534     __ eroll($dst$$Register, $src$$Register, false);
12535   %}
12536   ins_pipe(ialu_reg_reg);
12537 %}
12538 
12539 // Rotate Right by constant.
12540 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12541 %{
12542   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12543   match(Set dst (RotateRight dst shift));
12544   effect(KILL cr);
12545   format %{ "rorl    $dst, $shift" %}
12546   ins_encode %{
12547     __ rorl($dst$$Register, $shift$$constant);
12548   %}
12549   ins_pipe(ialu_reg);
12550 %}
12551 
12552 // Rotate Right by constant.
12553 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12554 %{
12555   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12556   match(Set dst (RotateRight src shift));
12557   format %{ "rorxl   $dst, $src, $shift" %}
12558   ins_encode %{
12559     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12560   %}
12561   ins_pipe(ialu_reg_reg);
12562 %}
12563 
12564 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12565 %{
12566   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12567   match(Set dst (RotateRight (LoadI src) shift));
12568   ins_cost(175);
12569   format %{ "rorxl   $dst, $src, $shift" %}
12570   ins_encode %{
12571     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12572   %}
12573   ins_pipe(ialu_reg_mem);
12574 %}
12575 
12576 // Rotate Right by variable
12577 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12578 %{
12579   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12580   match(Set dst (RotateRight dst shift));
12581   effect(KILL cr);
12582   format %{ "rorl    $dst, $shift" %}
12583   ins_encode %{
12584     __ rorl($dst$$Register);
12585   %}
12586   ins_pipe(ialu_reg_reg);
12587 %}
12588 
12589 // Rotate Right by variable
12590 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12591 %{
12592   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12593   match(Set dst (RotateRight src shift));
12594   effect(KILL cr);
12595 
12596   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12597   ins_encode %{
12598     __ erorl($dst$$Register, $src$$Register, false);
12599   %}
12600   ins_pipe(ialu_reg_reg);
12601 %}
12602 
12603 // Rotate Left by constant.
12604 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12605 %{
12606   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12607   match(Set dst (RotateLeft dst shift));
12608   effect(KILL cr);
12609   format %{ "rolq    $dst, $shift" %}
12610   ins_encode %{
12611     __ rolq($dst$$Register, $shift$$constant);
12612   %}
12613   ins_pipe(ialu_reg);
12614 %}
12615 
12616 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12617 %{
12618   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12619   match(Set dst (RotateLeft src shift));
12620   format %{ "rolxq   $dst, $src, $shift" %}
12621   ins_encode %{
12622     int shift = 64 - ($shift$$constant & 63);
12623     __ rorxq($dst$$Register, $src$$Register, shift);
12624   %}
12625   ins_pipe(ialu_reg_reg);
12626 %}
12627 
12628 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12629 %{
12630   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12631   match(Set dst (RotateLeft (LoadL src) shift));
12632   ins_cost(175);
12633   format %{ "rolxq   $dst, $src, $shift" %}
12634   ins_encode %{
12635     int shift = 64 - ($shift$$constant & 63);
12636     __ rorxq($dst$$Register, $src$$Address, shift);
12637   %}
12638   ins_pipe(ialu_reg_mem);
12639 %}
12640 
12641 // Rotate Left by variable
12642 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12643 %{
12644   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12645   match(Set dst (RotateLeft dst shift));
12646   effect(KILL cr);
12647   format %{ "rolq    $dst, $shift" %}
12648   ins_encode %{
12649     __ rolq($dst$$Register);
12650   %}
12651   ins_pipe(ialu_reg_reg);
12652 %}
12653 
12654 // Rotate Left by variable
12655 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12656 %{
12657   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12658   match(Set dst (RotateLeft src shift));
12659   effect(KILL cr);
12660 
12661   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12662   ins_encode %{
12663     __ erolq($dst$$Register, $src$$Register, false);
12664   %}
12665   ins_pipe(ialu_reg_reg);
12666 %}
12667 
12668 // Rotate Right by constant.
12669 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12670 %{
12671   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12672   match(Set dst (RotateRight dst shift));
12673   effect(KILL cr);
12674   format %{ "rorq    $dst, $shift" %}
12675   ins_encode %{
12676     __ rorq($dst$$Register, $shift$$constant);
12677   %}
12678   ins_pipe(ialu_reg);
12679 %}
12680 
12681 // Rotate Right by constant
12682 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12683 %{
12684   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12685   match(Set dst (RotateRight src shift));
12686   format %{ "rorxq   $dst, $src, $shift" %}
12687   ins_encode %{
12688     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12689   %}
12690   ins_pipe(ialu_reg_reg);
12691 %}
12692 
12693 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12694 %{
12695   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12696   match(Set dst (RotateRight (LoadL src) shift));
12697   ins_cost(175);
12698   format %{ "rorxq   $dst, $src, $shift" %}
12699   ins_encode %{
12700     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12701   %}
12702   ins_pipe(ialu_reg_mem);
12703 %}
12704 
12705 // Rotate Right by variable
12706 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12707 %{
12708   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12709   match(Set dst (RotateRight dst shift));
12710   effect(KILL cr);
12711   format %{ "rorq    $dst, $shift" %}
12712   ins_encode %{
12713     __ rorq($dst$$Register);
12714   %}
12715   ins_pipe(ialu_reg_reg);
12716 %}
12717 
12718 // Rotate Right by variable
12719 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12720 %{
12721   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12722   match(Set dst (RotateRight src shift));
12723   effect(KILL cr);
12724 
12725   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12726   ins_encode %{
12727     __ erorq($dst$$Register, $src$$Register, false);
12728   %}
12729   ins_pipe(ialu_reg_reg);
12730 %}
12731 
12732 //----------------------------- CompressBits/ExpandBits ------------------------
12733 
12734 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12735   predicate(n->bottom_type()->isa_long());
12736   match(Set dst (CompressBits src mask));
12737   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12738   ins_encode %{
12739     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12740   %}
12741   ins_pipe( pipe_slow );
12742 %}
12743 
12744 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12745   predicate(n->bottom_type()->isa_long());
12746   match(Set dst (ExpandBits src mask));
12747   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12748   ins_encode %{
12749     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12750   %}
12751   ins_pipe( pipe_slow );
12752 %}
12753 
12754 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12755   predicate(n->bottom_type()->isa_long());
12756   match(Set dst (CompressBits src (LoadL mask)));
12757   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12758   ins_encode %{
12759     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12760   %}
12761   ins_pipe( pipe_slow );
12762 %}
12763 
12764 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12765   predicate(n->bottom_type()->isa_long());
12766   match(Set dst (ExpandBits src (LoadL mask)));
12767   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12768   ins_encode %{
12769     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12770   %}
12771   ins_pipe( pipe_slow );
12772 %}
12773 
12774 
12775 // Logical Instructions
12776 
12777 // Integer Logical Instructions
12778 
12779 // And Instructions
12780 // And Register with Register
12781 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12782 %{
12783   predicate(!UseAPX);
12784   match(Set dst (AndI dst src));
12785   effect(KILL cr);
12786   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12787 
12788   format %{ "andl    $dst, $src\t# int" %}
12789   ins_encode %{
12790     __ andl($dst$$Register, $src$$Register);
12791   %}
12792   ins_pipe(ialu_reg_reg);
12793 %}
12794 
12795 // And Register with Register using New Data Destination (NDD)
12796 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12797 %{
12798   predicate(UseAPX);
12799   match(Set dst (AndI src1 src2));
12800   effect(KILL cr);
12801   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12802 
12803   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
12804   ins_encode %{
12805     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12806 
12807   %}
12808   ins_pipe(ialu_reg_reg);
12809 %}
12810 
12811 // And Register with Immediate 255
12812 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12813 %{
12814   match(Set dst (AndI src mask));
12815 
12816   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
12817   ins_encode %{
12818     __ movzbl($dst$$Register, $src$$Register);
12819   %}
12820   ins_pipe(ialu_reg);
12821 %}
12822 
12823 // And Register with Immediate 255 and promote to long
12824 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12825 %{
12826   match(Set dst (ConvI2L (AndI src mask)));
12827 
12828   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
12829   ins_encode %{
12830     __ movzbl($dst$$Register, $src$$Register);
12831   %}
12832   ins_pipe(ialu_reg);
12833 %}
12834 
12835 // And Register with Immediate 65535
12836 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12837 %{
12838   match(Set dst (AndI src mask));
12839 
12840   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
12841   ins_encode %{
12842     __ movzwl($dst$$Register, $src$$Register);
12843   %}
12844   ins_pipe(ialu_reg);
12845 %}
12846 
12847 // And Register with Immediate 65535 and promote to long
12848 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12849 %{
12850   match(Set dst (ConvI2L (AndI src mask)));
12851 
12852   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
12853   ins_encode %{
12854     __ movzwl($dst$$Register, $src$$Register);
12855   %}
12856   ins_pipe(ialu_reg);
12857 %}
12858 
12859 // Can skip int2long conversions after AND with small bitmask
12860 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
12861 %{
12862   predicate(VM_Version::supports_bmi2());
12863   ins_cost(125);
12864   effect(TEMP tmp, KILL cr);
12865   match(Set dst (ConvI2L (AndI src mask)));
12866   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
12867   ins_encode %{
12868     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
12869     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
12870   %}
12871   ins_pipe(ialu_reg_reg);
12872 %}
12873 
12874 // And Register with Immediate
12875 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12876 %{
12877   predicate(!UseAPX);
12878   match(Set dst (AndI dst src));
12879   effect(KILL cr);
12880   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12881 
12882   format %{ "andl    $dst, $src\t# int" %}
12883   ins_encode %{
12884     __ andl($dst$$Register, $src$$constant);
12885   %}
12886   ins_pipe(ialu_reg);
12887 %}
12888 
12889 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
12890 %{
12891   predicate(UseAPX);
12892   match(Set dst (AndI src1 src2));
12893   effect(KILL cr);
12894   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12895 
12896   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
12897   ins_encode %{
12898     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
12899   %}
12900   ins_pipe(ialu_reg);
12901 %}
12902 
12903 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
12904 %{
12905   predicate(UseAPX);
12906   match(Set dst (AndI (LoadI src1) src2));
12907   effect(KILL cr);
12908   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12909 
12910   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
12911   ins_encode %{
12912     __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
12913   %}
12914   ins_pipe(ialu_reg);
12915 %}
12916 
12917 // And Register with Memory
12918 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
12919 %{
12920   predicate(!UseAPX);
12921   match(Set dst (AndI dst (LoadI src)));
12922   effect(KILL cr);
12923   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12924 
12925   ins_cost(150);
12926   format %{ "andl    $dst, $src\t# int" %}
12927   ins_encode %{
12928     __ andl($dst$$Register, $src$$Address);
12929   %}
12930   ins_pipe(ialu_reg_mem);
12931 %}
12932 
12933 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
12934 %{
12935   predicate(UseAPX);
12936   match(Set dst (AndI src1 (LoadI src2)));
12937   effect(KILL cr);
12938   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12939 
12940   ins_cost(150);
12941   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
12942   ins_encode %{
12943     __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
12944   %}
12945   ins_pipe(ialu_reg_mem);
12946 %}
12947 
12948 // And Memory with Register
12949 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12950 %{
12951   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
12952   effect(KILL cr);
12953   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12954 
12955   ins_cost(150);
12956   format %{ "andb    $dst, $src\t# byte" %}
12957   ins_encode %{
12958     __ andb($dst$$Address, $src$$Register);
12959   %}
12960   ins_pipe(ialu_mem_reg);
12961 %}
12962 
12963 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12964 %{
12965   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12966   effect(KILL cr);
12967   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12968 
12969   ins_cost(150);
12970   format %{ "andl    $dst, $src\t# int" %}
12971   ins_encode %{
12972     __ andl($dst$$Address, $src$$Register);
12973   %}
12974   ins_pipe(ialu_mem_reg);
12975 %}
12976 
12977 // And Memory with Immediate
12978 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
12979 %{
12980   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12981   effect(KILL cr);
12982   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12983 
12984   ins_cost(125);
12985   format %{ "andl    $dst, $src\t# int" %}
12986   ins_encode %{
12987     __ andl($dst$$Address, $src$$constant);
12988   %}
12989   ins_pipe(ialu_mem_imm);
12990 %}
12991 
12992 // BMI1 instructions
12993 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
12994   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
12995   predicate(UseBMI1Instructions);
12996   effect(KILL cr);
12997   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12998 
12999   ins_cost(125);
13000   format %{ "andnl  $dst, $src1, $src2" %}
13001 
13002   ins_encode %{
13003     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13004   %}
13005   ins_pipe(ialu_reg_mem);
13006 %}
13007 
13008 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13009   match(Set dst (AndI (XorI src1 minus_1) src2));
13010   predicate(UseBMI1Instructions);
13011   effect(KILL cr);
13012   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13013 
13014   format %{ "andnl  $dst, $src1, $src2" %}
13015 
13016   ins_encode %{
13017     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13018   %}
13019   ins_pipe(ialu_reg);
13020 %}
13021 
13022 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13023   match(Set dst (AndI (SubI imm_zero src) src));
13024   predicate(UseBMI1Instructions);
13025   effect(KILL cr);
13026   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13027 
13028   format %{ "blsil  $dst, $src" %}
13029 
13030   ins_encode %{
13031     __ blsil($dst$$Register, $src$$Register);
13032   %}
13033   ins_pipe(ialu_reg);
13034 %}
13035 
13036 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13037   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13038   predicate(UseBMI1Instructions);
13039   effect(KILL cr);
13040   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13041 
13042   ins_cost(125);
13043   format %{ "blsil  $dst, $src" %}
13044 
13045   ins_encode %{
13046     __ blsil($dst$$Register, $src$$Address);
13047   %}
13048   ins_pipe(ialu_reg_mem);
13049 %}
13050 
13051 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13052 %{
13053   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13054   predicate(UseBMI1Instructions);
13055   effect(KILL cr);
13056   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13057 
13058   ins_cost(125);
13059   format %{ "blsmskl $dst, $src" %}
13060 
13061   ins_encode %{
13062     __ blsmskl($dst$$Register, $src$$Address);
13063   %}
13064   ins_pipe(ialu_reg_mem);
13065 %}
13066 
13067 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13068 %{
13069   match(Set dst (XorI (AddI src minus_1) src));
13070   predicate(UseBMI1Instructions);
13071   effect(KILL cr);
13072   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13073 
13074   format %{ "blsmskl $dst, $src" %}
13075 
13076   ins_encode %{
13077     __ blsmskl($dst$$Register, $src$$Register);
13078   %}
13079 
13080   ins_pipe(ialu_reg);
13081 %}
13082 
13083 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13084 %{
13085   match(Set dst (AndI (AddI src minus_1) src) );
13086   predicate(UseBMI1Instructions);
13087   effect(KILL cr);
13088   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13089 
13090   format %{ "blsrl  $dst, $src" %}
13091 
13092   ins_encode %{
13093     __ blsrl($dst$$Register, $src$$Register);
13094   %}
13095 
13096   ins_pipe(ialu_reg_mem);
13097 %}
13098 
13099 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13100 %{
13101   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13102   predicate(UseBMI1Instructions);
13103   effect(KILL cr);
13104   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13105 
13106   ins_cost(125);
13107   format %{ "blsrl  $dst, $src" %}
13108 
13109   ins_encode %{
13110     __ blsrl($dst$$Register, $src$$Address);
13111   %}
13112 
13113   ins_pipe(ialu_reg);
13114 %}
13115 
13116 // Or Instructions
13117 // Or Register with Register
13118 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13119 %{
13120   predicate(!UseAPX);
13121   match(Set dst (OrI dst src));
13122   effect(KILL cr);
13123   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13124 
13125   format %{ "orl     $dst, $src\t# int" %}
13126   ins_encode %{
13127     __ orl($dst$$Register, $src$$Register);
13128   %}
13129   ins_pipe(ialu_reg_reg);
13130 %}
13131 
13132 // Or Register with Register using New Data Destination (NDD)
13133 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13134 %{
13135   predicate(UseAPX);
13136   match(Set dst (OrI src1 src2));
13137   effect(KILL cr);
13138   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13139 
13140   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13141   ins_encode %{
13142     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13143   %}
13144   ins_pipe(ialu_reg_reg);
13145 %}
13146 
13147 // Or Register with Immediate
13148 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13149 %{
13150   predicate(!UseAPX);
13151   match(Set dst (OrI dst src));
13152   effect(KILL cr);
13153   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13154 
13155   format %{ "orl     $dst, $src\t# int" %}
13156   ins_encode %{
13157     __ orl($dst$$Register, $src$$constant);
13158   %}
13159   ins_pipe(ialu_reg);
13160 %}
13161 
13162 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13163 %{
13164   predicate(UseAPX);
13165   match(Set dst (OrI src1 src2));
13166   effect(KILL cr);
13167   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13168 
13169   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13170   ins_encode %{
13171     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13172   %}
13173   ins_pipe(ialu_reg);
13174 %}
13175 
13176 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13177 %{
13178   predicate(UseAPX);
13179   match(Set dst (OrI src1 src2));
13180   effect(KILL cr);
13181   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13182 
13183   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
13184   ins_encode %{
13185     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13186   %}
13187   ins_pipe(ialu_reg);
13188 %}
13189 
13190 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13191 %{
13192   predicate(UseAPX);
13193   match(Set dst (OrI (LoadI src1) src2));
13194   effect(KILL cr);
13195   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13196 
13197   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13198   ins_encode %{
13199     __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13200   %}
13201   ins_pipe(ialu_reg);
13202 %}
13203 
13204 // Or Register with Memory
13205 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13206 %{
13207   predicate(!UseAPX);
13208   match(Set dst (OrI dst (LoadI src)));
13209   effect(KILL cr);
13210   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13211 
13212   ins_cost(150);
13213   format %{ "orl     $dst, $src\t# int" %}
13214   ins_encode %{
13215     __ orl($dst$$Register, $src$$Address);
13216   %}
13217   ins_pipe(ialu_reg_mem);
13218 %}
13219 
13220 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13221 %{
13222   predicate(UseAPX);
13223   match(Set dst (OrI src1 (LoadI src2)));
13224   effect(KILL cr);
13225   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13226 
13227   ins_cost(150);
13228   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13229   ins_encode %{
13230     __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13231   %}
13232   ins_pipe(ialu_reg_mem);
13233 %}
13234 
13235 // Or Memory with Register
13236 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13237 %{
13238   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13239   effect(KILL cr);
13240   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13241 
13242   ins_cost(150);
13243   format %{ "orb    $dst, $src\t# byte" %}
13244   ins_encode %{
13245     __ orb($dst$$Address, $src$$Register);
13246   %}
13247   ins_pipe(ialu_mem_reg);
13248 %}
13249 
13250 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13251 %{
13252   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13253   effect(KILL cr);
13254   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13255 
13256   ins_cost(150);
13257   format %{ "orl     $dst, $src\t# int" %}
13258   ins_encode %{
13259     __ orl($dst$$Address, $src$$Register);
13260   %}
13261   ins_pipe(ialu_mem_reg);
13262 %}
13263 
13264 // Or Memory with Immediate
13265 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13266 %{
13267   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13268   effect(KILL cr);
13269   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13270 
13271   ins_cost(125);
13272   format %{ "orl     $dst, $src\t# int" %}
13273   ins_encode %{
13274     __ orl($dst$$Address, $src$$constant);
13275   %}
13276   ins_pipe(ialu_mem_imm);
13277 %}
13278 
13279 // Xor Instructions
13280 // Xor Register with Register
13281 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13282 %{
13283   predicate(!UseAPX);
13284   match(Set dst (XorI dst src));
13285   effect(KILL cr);
13286   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13287 
13288   format %{ "xorl    $dst, $src\t# int" %}
13289   ins_encode %{
13290     __ xorl($dst$$Register, $src$$Register);
13291   %}
13292   ins_pipe(ialu_reg_reg);
13293 %}
13294 
13295 // Xor Register with Register using New Data Destination (NDD)
13296 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13297 %{
13298   predicate(UseAPX);
13299   match(Set dst (XorI src1 src2));
13300   effect(KILL cr);
13301   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13302 
13303   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13304   ins_encode %{
13305     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13306   %}
13307   ins_pipe(ialu_reg_reg);
13308 %}
13309 
13310 // Xor Register with Immediate -1
13311 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13312 %{
13313   predicate(!UseAPX);
13314   match(Set dst (XorI dst imm));
13315 
13316   format %{ "notl    $dst" %}
13317   ins_encode %{
13318      __ notl($dst$$Register);
13319   %}
13320   ins_pipe(ialu_reg);
13321 %}
13322 
13323 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13324 %{
13325   match(Set dst (XorI src imm));
13326   predicate(UseAPX);
13327 
13328   format %{ "enotl    $dst, $src" %}
13329   ins_encode %{
13330      __ enotl($dst$$Register, $src$$Register);
13331   %}
13332   ins_pipe(ialu_reg);
13333 %}
13334 
13335 // Xor Register with Immediate
13336 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13337 %{
13338   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13339   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13340   match(Set dst (XorI dst src));
13341   effect(KILL cr);
13342   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13343 
13344   format %{ "xorl    $dst, $src\t# int" %}
13345   ins_encode %{
13346     __ xorl($dst$$Register, $src$$constant);
13347   %}
13348   ins_pipe(ialu_reg);
13349 %}
13350 
13351 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13352 %{
13353   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13354   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13355   match(Set dst (XorI src1 src2));
13356   effect(KILL cr);
13357   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13358 
13359   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13360   ins_encode %{
13361     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13362   %}
13363   ins_pipe(ialu_reg);
13364 %}
13365 
13366 // Xor Memory with Immediate
13367 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13368 %{
13369   predicate(UseAPX);
13370   match(Set dst (XorI (LoadI src1) src2));
13371   effect(KILL cr);
13372   ins_cost(150);
13373   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13374 
13375   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13376   ins_encode %{
13377     __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13378   %}
13379   ins_pipe(ialu_reg);
13380 %}
13381 
13382 // Xor Register with Memory
13383 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13384 %{
13385   predicate(!UseAPX);
13386   match(Set dst (XorI dst (LoadI src)));
13387   effect(KILL cr);
13388   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13389 
13390   ins_cost(150);
13391   format %{ "xorl    $dst, $src\t# int" %}
13392   ins_encode %{
13393     __ xorl($dst$$Register, $src$$Address);
13394   %}
13395   ins_pipe(ialu_reg_mem);
13396 %}
13397 
13398 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13399 %{
13400   predicate(UseAPX);
13401   match(Set dst (XorI src1 (LoadI src2)));
13402   effect(KILL cr);
13403   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13404 
13405   ins_cost(150);
13406   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13407   ins_encode %{
13408     __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13409   %}
13410   ins_pipe(ialu_reg_mem);
13411 %}
13412 
13413 // Xor Memory with Register
13414 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13415 %{
13416   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13417   effect(KILL cr);
13418   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13419 
13420   ins_cost(150);
13421   format %{ "xorb    $dst, $src\t# byte" %}
13422   ins_encode %{
13423     __ xorb($dst$$Address, $src$$Register);
13424   %}
13425   ins_pipe(ialu_mem_reg);
13426 %}
13427 
13428 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13429 %{
13430   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13431   effect(KILL cr);
13432   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13433 
13434   ins_cost(150);
13435   format %{ "xorl    $dst, $src\t# int" %}
13436   ins_encode %{
13437     __ xorl($dst$$Address, $src$$Register);
13438   %}
13439   ins_pipe(ialu_mem_reg);
13440 %}
13441 
13442 // Xor Memory with Immediate
13443 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13444 %{
13445   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13446   effect(KILL cr);
13447   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13448 
13449   ins_cost(125);
13450   format %{ "xorl    $dst, $src\t# int" %}
13451   ins_encode %{
13452     __ xorl($dst$$Address, $src$$constant);
13453   %}
13454   ins_pipe(ialu_mem_imm);
13455 %}
13456 
13457 
13458 // Long Logical Instructions
13459 
13460 // And Instructions
13461 // And Register with Register
13462 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13463 %{
13464   predicate(!UseAPX);
13465   match(Set dst (AndL dst src));
13466   effect(KILL cr);
13467   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13468 
13469   format %{ "andq    $dst, $src\t# long" %}
13470   ins_encode %{
13471     __ andq($dst$$Register, $src$$Register);
13472   %}
13473   ins_pipe(ialu_reg_reg);
13474 %}
13475 
13476 // And Register with Register using New Data Destination (NDD)
13477 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13478 %{
13479   predicate(UseAPX);
13480   match(Set dst (AndL src1 src2));
13481   effect(KILL cr);
13482   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13483 
13484   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13485   ins_encode %{
13486     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13487 
13488   %}
13489   ins_pipe(ialu_reg_reg);
13490 %}
13491 
13492 // And Register with Immediate 255
13493 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13494 %{
13495   match(Set dst (AndL src mask));
13496 
13497   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13498   ins_encode %{
13499     // movzbl zeroes out the upper 32-bit and does not need REX.W
13500     __ movzbl($dst$$Register, $src$$Register);
13501   %}
13502   ins_pipe(ialu_reg);
13503 %}
13504 
13505 // And Register with Immediate 65535
13506 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13507 %{
13508   match(Set dst (AndL src mask));
13509 
13510   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13511   ins_encode %{
13512     // movzwl zeroes out the upper 32-bit and does not need REX.W
13513     __ movzwl($dst$$Register, $src$$Register);
13514   %}
13515   ins_pipe(ialu_reg);
13516 %}
13517 
13518 // And Register with Immediate
13519 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13520 %{
13521   predicate(!UseAPX);
13522   match(Set dst (AndL dst src));
13523   effect(KILL cr);
13524   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13525 
13526   format %{ "andq    $dst, $src\t# long" %}
13527   ins_encode %{
13528     __ andq($dst$$Register, $src$$constant);
13529   %}
13530   ins_pipe(ialu_reg);
13531 %}
13532 
13533 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13534 %{
13535   predicate(UseAPX);
13536   match(Set dst (AndL src1 src2));
13537   effect(KILL cr);
13538   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13539 
13540   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13541   ins_encode %{
13542     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13543   %}
13544   ins_pipe(ialu_reg);
13545 %}
13546 
13547 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13548 %{
13549   predicate(UseAPX);
13550   match(Set dst (AndL (LoadL src1) src2));
13551   effect(KILL cr);
13552   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13553 
13554   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13555   ins_encode %{
13556     __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13557   %}
13558   ins_pipe(ialu_reg);
13559 %}
13560 
13561 // And Register with Memory
13562 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13563 %{
13564   predicate(!UseAPX);
13565   match(Set dst (AndL dst (LoadL src)));
13566   effect(KILL cr);
13567   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13568 
13569   ins_cost(150);
13570   format %{ "andq    $dst, $src\t# long" %}
13571   ins_encode %{
13572     __ andq($dst$$Register, $src$$Address);
13573   %}
13574   ins_pipe(ialu_reg_mem);
13575 %}
13576 
13577 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13578 %{
13579   predicate(UseAPX);
13580   match(Set dst (AndL src1 (LoadL src2)));
13581   effect(KILL cr);
13582   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13583 
13584   ins_cost(150);
13585   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13586   ins_encode %{
13587     __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13588   %}
13589   ins_pipe(ialu_reg_mem);
13590 %}
13591 
13592 // And Memory with Register
13593 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13594 %{
13595   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13596   effect(KILL cr);
13597   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13598 
13599   ins_cost(150);
13600   format %{ "andq    $dst, $src\t# long" %}
13601   ins_encode %{
13602     __ andq($dst$$Address, $src$$Register);
13603   %}
13604   ins_pipe(ialu_mem_reg);
13605 %}
13606 
13607 // And Memory with Immediate
13608 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13609 %{
13610   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13611   effect(KILL cr);
13612   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13613 
13614   ins_cost(125);
13615   format %{ "andq    $dst, $src\t# long" %}
13616   ins_encode %{
13617     __ andq($dst$$Address, $src$$constant);
13618   %}
13619   ins_pipe(ialu_mem_imm);
13620 %}
13621 
13622 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13623 %{
13624   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13625   // because AND/OR works well enough for 8/32-bit values.
13626   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13627 
13628   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13629   effect(KILL cr);
13630 
13631   ins_cost(125);
13632   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13633   ins_encode %{
13634     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13635   %}
13636   ins_pipe(ialu_mem_imm);
13637 %}
13638 
13639 // BMI1 instructions
13640 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13641   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13642   predicate(UseBMI1Instructions);
13643   effect(KILL cr);
13644   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13645 
13646   ins_cost(125);
13647   format %{ "andnq  $dst, $src1, $src2" %}
13648 
13649   ins_encode %{
13650     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13651   %}
13652   ins_pipe(ialu_reg_mem);
13653 %}
13654 
13655 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13656   match(Set dst (AndL (XorL src1 minus_1) src2));
13657   predicate(UseBMI1Instructions);
13658   effect(KILL cr);
13659   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13660 
13661   format %{ "andnq  $dst, $src1, $src2" %}
13662 
13663   ins_encode %{
13664   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13665   %}
13666   ins_pipe(ialu_reg_mem);
13667 %}
13668 
13669 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13670   match(Set dst (AndL (SubL imm_zero src) src));
13671   predicate(UseBMI1Instructions);
13672   effect(KILL cr);
13673   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13674 
13675   format %{ "blsiq  $dst, $src" %}
13676 
13677   ins_encode %{
13678     __ blsiq($dst$$Register, $src$$Register);
13679   %}
13680   ins_pipe(ialu_reg);
13681 %}
13682 
13683 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13684   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13685   predicate(UseBMI1Instructions);
13686   effect(KILL cr);
13687   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13688 
13689   ins_cost(125);
13690   format %{ "blsiq  $dst, $src" %}
13691 
13692   ins_encode %{
13693     __ blsiq($dst$$Register, $src$$Address);
13694   %}
13695   ins_pipe(ialu_reg_mem);
13696 %}
13697 
13698 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13699 %{
13700   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13701   predicate(UseBMI1Instructions);
13702   effect(KILL cr);
13703   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13704 
13705   ins_cost(125);
13706   format %{ "blsmskq $dst, $src" %}
13707 
13708   ins_encode %{
13709     __ blsmskq($dst$$Register, $src$$Address);
13710   %}
13711   ins_pipe(ialu_reg_mem);
13712 %}
13713 
13714 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13715 %{
13716   match(Set dst (XorL (AddL src minus_1) src));
13717   predicate(UseBMI1Instructions);
13718   effect(KILL cr);
13719   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13720 
13721   format %{ "blsmskq $dst, $src" %}
13722 
13723   ins_encode %{
13724     __ blsmskq($dst$$Register, $src$$Register);
13725   %}
13726 
13727   ins_pipe(ialu_reg);
13728 %}
13729 
13730 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13731 %{
13732   match(Set dst (AndL (AddL src minus_1) src) );
13733   predicate(UseBMI1Instructions);
13734   effect(KILL cr);
13735   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13736 
13737   format %{ "blsrq  $dst, $src" %}
13738 
13739   ins_encode %{
13740     __ blsrq($dst$$Register, $src$$Register);
13741   %}
13742 
13743   ins_pipe(ialu_reg);
13744 %}
13745 
13746 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13747 %{
13748   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13749   predicate(UseBMI1Instructions);
13750   effect(KILL cr);
13751   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13752 
13753   ins_cost(125);
13754   format %{ "blsrq  $dst, $src" %}
13755 
13756   ins_encode %{
13757     __ blsrq($dst$$Register, $src$$Address);
13758   %}
13759 
13760   ins_pipe(ialu_reg);
13761 %}
13762 
13763 // Or Instructions
13764 // Or Register with Register
13765 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13766 %{
13767   predicate(!UseAPX);
13768   match(Set dst (OrL dst src));
13769   effect(KILL cr);
13770   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13771 
13772   format %{ "orq     $dst, $src\t# long" %}
13773   ins_encode %{
13774     __ orq($dst$$Register, $src$$Register);
13775   %}
13776   ins_pipe(ialu_reg_reg);
13777 %}
13778 
13779 // Or Register with Register using New Data Destination (NDD)
13780 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13781 %{
13782   predicate(UseAPX);
13783   match(Set dst (OrL src1 src2));
13784   effect(KILL cr);
13785   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13786 
13787   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13788   ins_encode %{
13789     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13790 
13791   %}
13792   ins_pipe(ialu_reg_reg);
13793 %}
13794 
13795 // Use any_RegP to match R15 (TLS register) without spilling.
13796 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13797   match(Set dst (OrL dst (CastP2X src)));
13798   effect(KILL cr);
13799   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13800 
13801   format %{ "orq     $dst, $src\t# long" %}
13802   ins_encode %{
13803     __ orq($dst$$Register, $src$$Register);
13804   %}
13805   ins_pipe(ialu_reg_reg);
13806 %}
13807 
13808 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13809   match(Set dst (OrL src1 (CastP2X src2)));
13810   effect(KILL cr);
13811   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13812 
13813   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13814   ins_encode %{
13815     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13816   %}
13817   ins_pipe(ialu_reg_reg);
13818 %}
13819 
13820 // Or Register with Immediate
13821 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13822 %{
13823   predicate(!UseAPX);
13824   match(Set dst (OrL dst src));
13825   effect(KILL cr);
13826   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13827 
13828   format %{ "orq     $dst, $src\t# long" %}
13829   ins_encode %{
13830     __ orq($dst$$Register, $src$$constant);
13831   %}
13832   ins_pipe(ialu_reg);
13833 %}
13834 
13835 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13836 %{
13837   predicate(UseAPX);
13838   match(Set dst (OrL src1 src2));
13839   effect(KILL cr);
13840   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13841 
13842   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13843   ins_encode %{
13844     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13845   %}
13846   ins_pipe(ialu_reg);
13847 %}
13848 
13849 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13850 %{
13851   predicate(UseAPX);
13852   match(Set dst (OrL src1 src2));
13853   effect(KILL cr);
13854   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13855 
13856   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
13857   ins_encode %{
13858     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
13859   %}
13860   ins_pipe(ialu_reg);
13861 %}
13862 
13863 // Or Memory with Immediate
13864 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13865 %{
13866   predicate(UseAPX);
13867   match(Set dst (OrL (LoadL src1) src2));
13868   effect(KILL cr);
13869   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13870 
13871   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13872   ins_encode %{
13873     __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
13874   %}
13875   ins_pipe(ialu_reg);
13876 %}
13877 
13878 // Or Register with Memory
13879 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13880 %{
13881   predicate(!UseAPX);
13882   match(Set dst (OrL dst (LoadL src)));
13883   effect(KILL cr);
13884   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13885 
13886   ins_cost(150);
13887   format %{ "orq     $dst, $src\t# long" %}
13888   ins_encode %{
13889     __ orq($dst$$Register, $src$$Address);
13890   %}
13891   ins_pipe(ialu_reg_mem);
13892 %}
13893 
13894 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13895 %{
13896   predicate(UseAPX);
13897   match(Set dst (OrL src1 (LoadL src2)));
13898   effect(KILL cr);
13899   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13900 
13901   ins_cost(150);
13902   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13903   ins_encode %{
13904     __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
13905   %}
13906   ins_pipe(ialu_reg_mem);
13907 %}
13908 
13909 // Or Memory with Register
13910 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13911 %{
13912   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13913   effect(KILL cr);
13914   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13915 
13916   ins_cost(150);
13917   format %{ "orq     $dst, $src\t# long" %}
13918   ins_encode %{
13919     __ orq($dst$$Address, $src$$Register);
13920   %}
13921   ins_pipe(ialu_mem_reg);
13922 %}
13923 
13924 // Or Memory with Immediate
13925 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13926 %{
13927   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13928   effect(KILL cr);
13929   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13930 
13931   ins_cost(125);
13932   format %{ "orq     $dst, $src\t# long" %}
13933   ins_encode %{
13934     __ orq($dst$$Address, $src$$constant);
13935   %}
13936   ins_pipe(ialu_mem_imm);
13937 %}
13938 
13939 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
13940 %{
13941   // con should be a pure 64-bit power of 2 immediate
13942   // because AND/OR works well enough for 8/32-bit values.
13943   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
13944 
13945   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
13946   effect(KILL cr);
13947 
13948   ins_cost(125);
13949   format %{ "btsq    $dst, log2($con)\t# long" %}
13950   ins_encode %{
13951     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
13952   %}
13953   ins_pipe(ialu_mem_imm);
13954 %}
13955 
13956 // Xor Instructions
13957 // Xor Register with Register
13958 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13959 %{
13960   predicate(!UseAPX);
13961   match(Set dst (XorL dst src));
13962   effect(KILL cr);
13963   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13964 
13965   format %{ "xorq    $dst, $src\t# long" %}
13966   ins_encode %{
13967     __ xorq($dst$$Register, $src$$Register);
13968   %}
13969   ins_pipe(ialu_reg_reg);
13970 %}
13971 
13972 // Xor Register with Register using New Data Destination (NDD)
13973 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13974 %{
13975   predicate(UseAPX);
13976   match(Set dst (XorL src1 src2));
13977   effect(KILL cr);
13978   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13979 
13980   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
13981   ins_encode %{
13982     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13983   %}
13984   ins_pipe(ialu_reg_reg);
13985 %}
13986 
13987 // Xor Register with Immediate -1
13988 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
13989 %{
13990   predicate(!UseAPX);
13991   match(Set dst (XorL dst imm));
13992 
13993   format %{ "notq   $dst" %}
13994   ins_encode %{
13995      __ notq($dst$$Register);
13996   %}
13997   ins_pipe(ialu_reg);
13998 %}
13999 
14000 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14001 %{
14002   predicate(UseAPX);
14003   match(Set dst (XorL src imm));
14004 
14005   format %{ "enotq   $dst, $src" %}
14006   ins_encode %{
14007     __ enotq($dst$$Register, $src$$Register);
14008   %}
14009   ins_pipe(ialu_reg);
14010 %}
14011 
14012 // Xor Register with Immediate
14013 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14014 %{
14015   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14016   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14017   match(Set dst (XorL dst src));
14018   effect(KILL cr);
14019   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14020 
14021   format %{ "xorq    $dst, $src\t# long" %}
14022   ins_encode %{
14023     __ xorq($dst$$Register, $src$$constant);
14024   %}
14025   ins_pipe(ialu_reg);
14026 %}
14027 
14028 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14029 %{
14030   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14031   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14032   match(Set dst (XorL src1 src2));
14033   effect(KILL cr);
14034   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14035 
14036   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14037   ins_encode %{
14038     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14039   %}
14040   ins_pipe(ialu_reg);
14041 %}
14042 
14043 // Xor Memory with Immediate
14044 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14045 %{
14046   predicate(UseAPX);
14047   match(Set dst (XorL (LoadL src1) src2));
14048   effect(KILL cr);
14049   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14050   ins_cost(150);
14051 
14052   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14053   ins_encode %{
14054     __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14055   %}
14056   ins_pipe(ialu_reg);
14057 %}
14058 
14059 // Xor Register with Memory
14060 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14061 %{
14062   predicate(!UseAPX);
14063   match(Set dst (XorL dst (LoadL src)));
14064   effect(KILL cr);
14065   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14066 
14067   ins_cost(150);
14068   format %{ "xorq    $dst, $src\t# long" %}
14069   ins_encode %{
14070     __ xorq($dst$$Register, $src$$Address);
14071   %}
14072   ins_pipe(ialu_reg_mem);
14073 %}
14074 
14075 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14076 %{
14077   predicate(UseAPX);
14078   match(Set dst (XorL src1 (LoadL src2)));
14079   effect(KILL cr);
14080   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14081 
14082   ins_cost(150);
14083   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14084   ins_encode %{
14085     __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14086   %}
14087   ins_pipe(ialu_reg_mem);
14088 %}
14089 
14090 // Xor Memory with Register
14091 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14092 %{
14093   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14094   effect(KILL cr);
14095   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14096 
14097   ins_cost(150);
14098   format %{ "xorq    $dst, $src\t# long" %}
14099   ins_encode %{
14100     __ xorq($dst$$Address, $src$$Register);
14101   %}
14102   ins_pipe(ialu_mem_reg);
14103 %}
14104 
14105 // Xor Memory with Immediate
14106 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14107 %{
14108   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14109   effect(KILL cr);
14110   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14111 
14112   ins_cost(125);
14113   format %{ "xorq    $dst, $src\t# long" %}
14114   ins_encode %{
14115     __ xorq($dst$$Address, $src$$constant);
14116   %}
14117   ins_pipe(ialu_mem_imm);
14118 %}
14119 
14120 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14121 %{
14122   match(Set dst (CmpLTMask p q));
14123   effect(KILL cr);
14124 
14125   ins_cost(400);
14126   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
14127             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
14128             "negl    $dst" %}
14129   ins_encode %{
14130     __ cmpl($p$$Register, $q$$Register);
14131     __ setcc(Assembler::less, $dst$$Register);
14132     __ negl($dst$$Register);
14133   %}
14134   ins_pipe(pipe_slow);
14135 %}
14136 
14137 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14138 %{
14139   match(Set dst (CmpLTMask dst zero));
14140   effect(KILL cr);
14141 
14142   ins_cost(100);
14143   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
14144   ins_encode %{
14145     __ sarl($dst$$Register, 31);
14146   %}
14147   ins_pipe(ialu_reg);
14148 %}
14149 
14150 /* Better to save a register than avoid a branch */
14151 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14152 %{
14153   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14154   effect(KILL cr);
14155   ins_cost(300);
14156   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
14157             "jge     done\n\t"
14158             "addl    $p,$y\n"
14159             "done:   " %}
14160   ins_encode %{
14161     Register Rp = $p$$Register;
14162     Register Rq = $q$$Register;
14163     Register Ry = $y$$Register;
14164     Label done;
14165     __ subl(Rp, Rq);
14166     __ jccb(Assembler::greaterEqual, done);
14167     __ addl(Rp, Ry);
14168     __ bind(done);
14169   %}
14170   ins_pipe(pipe_cmplt);
14171 %}
14172 
14173 /* Better to save a register than avoid a branch */
14174 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14175 %{
14176   match(Set y (AndI (CmpLTMask p q) y));
14177   effect(KILL cr);
14178 
14179   ins_cost(300);
14180 
14181   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
14182             "jlt     done\n\t"
14183             "xorl    $y, $y\n"
14184             "done:   " %}
14185   ins_encode %{
14186     Register Rp = $p$$Register;
14187     Register Rq = $q$$Register;
14188     Register Ry = $y$$Register;
14189     Label done;
14190     __ cmpl(Rp, Rq);
14191     __ jccb(Assembler::less, done);
14192     __ xorl(Ry, Ry);
14193     __ bind(done);
14194   %}
14195   ins_pipe(pipe_cmplt);
14196 %}
14197 
14198 
14199 //---------- FP Instructions------------------------------------------------
14200 
14201 // Really expensive, avoid
14202 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14203 %{
14204   match(Set cr (CmpF src1 src2));
14205 
14206   ins_cost(500);
14207   format %{ "ucomiss $src1, $src2\n\t"
14208             "jnp,s   exit\n\t"
14209             "pushfq\t# saw NaN, set CF\n\t"
14210             "andq    [rsp], #0xffffff2b\n\t"
14211             "popfq\n"
14212     "exit:" %}
14213   ins_encode %{
14214     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14215     emit_cmpfp_fixup(masm);
14216   %}
14217   ins_pipe(pipe_slow);
14218 %}
14219 
14220 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
14221   match(Set cr (CmpF src1 src2));
14222 
14223   ins_cost(100);
14224   format %{ "ucomiss $src1, $src2" %}
14225   ins_encode %{
14226     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14227   %}
14228   ins_pipe(pipe_slow);
14229 %}
14230 
14231 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14232   match(Set cr (CmpF src1 (LoadF src2)));
14233 
14234   ins_cost(100);
14235   format %{ "ucomiss $src1, $src2" %}
14236   ins_encode %{
14237     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14238   %}
14239   ins_pipe(pipe_slow);
14240 %}
14241 
14242 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14243   match(Set cr (CmpF src con));
14244   ins_cost(100);
14245   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14246   ins_encode %{
14247     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14248   %}
14249   ins_pipe(pipe_slow);
14250 %}
14251 
14252 // Really expensive, avoid
14253 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14254 %{
14255   match(Set cr (CmpD src1 src2));
14256 
14257   ins_cost(500);
14258   format %{ "ucomisd $src1, $src2\n\t"
14259             "jnp,s   exit\n\t"
14260             "pushfq\t# saw NaN, set CF\n\t"
14261             "andq    [rsp], #0xffffff2b\n\t"
14262             "popfq\n"
14263     "exit:" %}
14264   ins_encode %{
14265     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14266     emit_cmpfp_fixup(masm);
14267   %}
14268   ins_pipe(pipe_slow);
14269 %}
14270 
14271 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
14272   match(Set cr (CmpD src1 src2));
14273 
14274   ins_cost(100);
14275   format %{ "ucomisd $src1, $src2 test" %}
14276   ins_encode %{
14277     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14278   %}
14279   ins_pipe(pipe_slow);
14280 %}
14281 
14282 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14283   match(Set cr (CmpD src1 (LoadD src2)));
14284 
14285   ins_cost(100);
14286   format %{ "ucomisd $src1, $src2" %}
14287   ins_encode %{
14288     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14289   %}
14290   ins_pipe(pipe_slow);
14291 %}
14292 
14293 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14294   match(Set cr (CmpD src con));
14295   ins_cost(100);
14296   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14297   ins_encode %{
14298     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14299   %}
14300   ins_pipe(pipe_slow);
14301 %}
14302 
14303 // Compare into -1,0,1
14304 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14305 %{
14306   match(Set dst (CmpF3 src1 src2));
14307   effect(KILL cr);
14308 
14309   ins_cost(275);
14310   format %{ "ucomiss $src1, $src2\n\t"
14311             "movl    $dst, #-1\n\t"
14312             "jp,s    done\n\t"
14313             "jb,s    done\n\t"
14314             "setne   $dst\n\t"
14315             "movzbl  $dst, $dst\n"
14316     "done:" %}
14317   ins_encode %{
14318     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14319     emit_cmpfp3(masm, $dst$$Register);
14320   %}
14321   ins_pipe(pipe_slow);
14322 %}
14323 
14324 // Compare into -1,0,1
14325 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14326 %{
14327   match(Set dst (CmpF3 src1 (LoadF src2)));
14328   effect(KILL cr);
14329 
14330   ins_cost(275);
14331   format %{ "ucomiss $src1, $src2\n\t"
14332             "movl    $dst, #-1\n\t"
14333             "jp,s    done\n\t"
14334             "jb,s    done\n\t"
14335             "setne   $dst\n\t"
14336             "movzbl  $dst, $dst\n"
14337     "done:" %}
14338   ins_encode %{
14339     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14340     emit_cmpfp3(masm, $dst$$Register);
14341   %}
14342   ins_pipe(pipe_slow);
14343 %}
14344 
14345 // Compare into -1,0,1
14346 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14347   match(Set dst (CmpF3 src con));
14348   effect(KILL cr);
14349 
14350   ins_cost(275);
14351   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14352             "movl    $dst, #-1\n\t"
14353             "jp,s    done\n\t"
14354             "jb,s    done\n\t"
14355             "setne   $dst\n\t"
14356             "movzbl  $dst, $dst\n"
14357     "done:" %}
14358   ins_encode %{
14359     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14360     emit_cmpfp3(masm, $dst$$Register);
14361   %}
14362   ins_pipe(pipe_slow);
14363 %}
14364 
14365 // Compare into -1,0,1
14366 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14367 %{
14368   match(Set dst (CmpD3 src1 src2));
14369   effect(KILL cr);
14370 
14371   ins_cost(275);
14372   format %{ "ucomisd $src1, $src2\n\t"
14373             "movl    $dst, #-1\n\t"
14374             "jp,s    done\n\t"
14375             "jb,s    done\n\t"
14376             "setne   $dst\n\t"
14377             "movzbl  $dst, $dst\n"
14378     "done:" %}
14379   ins_encode %{
14380     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14381     emit_cmpfp3(masm, $dst$$Register);
14382   %}
14383   ins_pipe(pipe_slow);
14384 %}
14385 
14386 // Compare into -1,0,1
14387 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14388 %{
14389   match(Set dst (CmpD3 src1 (LoadD src2)));
14390   effect(KILL cr);
14391 
14392   ins_cost(275);
14393   format %{ "ucomisd $src1, $src2\n\t"
14394             "movl    $dst, #-1\n\t"
14395             "jp,s    done\n\t"
14396             "jb,s    done\n\t"
14397             "setne   $dst\n\t"
14398             "movzbl  $dst, $dst\n"
14399     "done:" %}
14400   ins_encode %{
14401     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14402     emit_cmpfp3(masm, $dst$$Register);
14403   %}
14404   ins_pipe(pipe_slow);
14405 %}
14406 
14407 // Compare into -1,0,1
14408 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14409   match(Set dst (CmpD3 src con));
14410   effect(KILL cr);
14411 
14412   ins_cost(275);
14413   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14414             "movl    $dst, #-1\n\t"
14415             "jp,s    done\n\t"
14416             "jb,s    done\n\t"
14417             "setne   $dst\n\t"
14418             "movzbl  $dst, $dst\n"
14419     "done:" %}
14420   ins_encode %{
14421     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14422     emit_cmpfp3(masm, $dst$$Register);
14423   %}
14424   ins_pipe(pipe_slow);
14425 %}
14426 
14427 //----------Arithmetic Conversion Instructions---------------------------------
14428 
14429 instruct convF2D_reg_reg(regD dst, regF src)
14430 %{
14431   match(Set dst (ConvF2D src));
14432 
14433   format %{ "cvtss2sd $dst, $src" %}
14434   ins_encode %{
14435     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14436   %}
14437   ins_pipe(pipe_slow); // XXX
14438 %}
14439 
14440 instruct convF2D_reg_mem(regD dst, memory src)
14441 %{
14442   predicate(UseAVX == 0);
14443   match(Set dst (ConvF2D (LoadF src)));
14444 
14445   format %{ "cvtss2sd $dst, $src" %}
14446   ins_encode %{
14447     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14448   %}
14449   ins_pipe(pipe_slow); // XXX
14450 %}
14451 
14452 instruct convD2F_reg_reg(regF dst, regD src)
14453 %{
14454   match(Set dst (ConvD2F src));
14455 
14456   format %{ "cvtsd2ss $dst, $src" %}
14457   ins_encode %{
14458     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14459   %}
14460   ins_pipe(pipe_slow); // XXX
14461 %}
14462 
14463 instruct convD2F_reg_mem(regF dst, memory src)
14464 %{
14465   predicate(UseAVX == 0);
14466   match(Set dst (ConvD2F (LoadD src)));
14467 
14468   format %{ "cvtsd2ss $dst, $src" %}
14469   ins_encode %{
14470     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14471   %}
14472   ins_pipe(pipe_slow); // XXX
14473 %}
14474 
14475 // XXX do mem variants
14476 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14477 %{
14478   predicate(!VM_Version::supports_avx10_2());
14479   match(Set dst (ConvF2I src));
14480   effect(KILL cr);
14481   format %{ "convert_f2i $dst, $src" %}
14482   ins_encode %{
14483     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14484   %}
14485   ins_pipe(pipe_slow);
14486 %}
14487 
14488 instruct convF2I_reg_reg_avx10(rRegI dst, regF src)
14489 %{
14490   predicate(VM_Version::supports_avx10_2());
14491   match(Set dst (ConvF2I src));
14492   format %{ "evcvttss2sisl $dst, $src" %}
14493   ins_encode %{
14494     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14495   %}
14496   ins_pipe(pipe_slow);
14497 %}
14498 
14499 instruct convF2I_reg_mem_avx10(rRegI dst, memory src)
14500 %{
14501   predicate(VM_Version::supports_avx10_2());
14502   match(Set dst (ConvF2I (LoadF src)));
14503   format %{ "evcvttss2sisl $dst, $src" %}
14504   ins_encode %{
14505     __ evcvttss2sisl($dst$$Register, $src$$Address);
14506   %}
14507   ins_pipe(pipe_slow);
14508 %}
14509 
14510 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14511 %{
14512   predicate(!VM_Version::supports_avx10_2());
14513   match(Set dst (ConvF2L src));
14514   effect(KILL cr);
14515   format %{ "convert_f2l $dst, $src"%}
14516   ins_encode %{
14517     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14518   %}
14519   ins_pipe(pipe_slow);
14520 %}
14521 
14522 instruct convF2L_reg_reg_avx10(rRegL dst, regF src)
14523 %{
14524   predicate(VM_Version::supports_avx10_2());
14525   match(Set dst (ConvF2L src));
14526   format %{ "evcvttss2sisq $dst, $src" %}
14527   ins_encode %{
14528     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14529   %}
14530   ins_pipe(pipe_slow);
14531 %}
14532 
14533 instruct convF2L_reg_mem_avx10(rRegL dst, memory src)
14534 %{
14535   predicate(VM_Version::supports_avx10_2());
14536   match(Set dst (ConvF2L (LoadF src)));
14537   format %{ "evcvttss2sisq $dst, $src" %}
14538   ins_encode %{
14539     __ evcvttss2sisq($dst$$Register, $src$$Address);
14540   %}
14541   ins_pipe(pipe_slow);
14542 %}
14543 
14544 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14545 %{
14546   predicate(!VM_Version::supports_avx10_2());
14547   match(Set dst (ConvD2I src));
14548   effect(KILL cr);
14549   format %{ "convert_d2i $dst, $src"%}
14550   ins_encode %{
14551     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14552   %}
14553   ins_pipe(pipe_slow);
14554 %}
14555 
14556 instruct convD2I_reg_reg_avx10(rRegI dst, regD src)
14557 %{
14558   predicate(VM_Version::supports_avx10_2());
14559   match(Set dst (ConvD2I src));
14560   format %{ "evcvttsd2sisl $dst, $src" %}
14561   ins_encode %{
14562     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14563   %}
14564   ins_pipe(pipe_slow);
14565 %}
14566 
14567 instruct convD2I_reg_mem_avx10(rRegI dst, memory src)
14568 %{
14569   predicate(VM_Version::supports_avx10_2());
14570   match(Set dst (ConvD2I (LoadD src)));
14571   format %{ "evcvttsd2sisl $dst, $src" %}
14572   ins_encode %{
14573     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14574   %}
14575   ins_pipe(pipe_slow);
14576 %}
14577 
14578 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14579 %{
14580   predicate(!VM_Version::supports_avx10_2());
14581   match(Set dst (ConvD2L src));
14582   effect(KILL cr);
14583   format %{ "convert_d2l $dst, $src"%}
14584   ins_encode %{
14585     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14586   %}
14587   ins_pipe(pipe_slow);
14588 %}
14589 
14590 instruct convD2L_reg_reg_avx10(rRegL dst, regD src)
14591 %{
14592   predicate(VM_Version::supports_avx10_2());
14593   match(Set dst (ConvD2L src));
14594   format %{ "evcvttsd2sisq $dst, $src" %}
14595   ins_encode %{
14596     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14597   %}
14598   ins_pipe(pipe_slow);
14599 %}
14600 
14601 instruct convD2L_reg_mem_avx10(rRegL dst, memory src)
14602 %{
14603   predicate(VM_Version::supports_avx10_2());
14604   match(Set dst (ConvD2L (LoadD src)));
14605   format %{ "evcvttsd2sisq $dst, $src" %}
14606   ins_encode %{
14607     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14608   %}
14609   ins_pipe(pipe_slow);
14610 %}
14611 
14612 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14613 %{
14614   match(Set dst (RoundD src));
14615   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14616   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14617   ins_encode %{
14618     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14619   %}
14620   ins_pipe(pipe_slow);
14621 %}
14622 
14623 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14624 %{
14625   match(Set dst (RoundF src));
14626   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14627   format %{ "round_float $dst,$src" %}
14628   ins_encode %{
14629     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14630   %}
14631   ins_pipe(pipe_slow);
14632 %}
14633 
14634 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14635 %{
14636   predicate(!UseXmmI2F);
14637   match(Set dst (ConvI2F src));
14638 
14639   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14640   ins_encode %{
14641     if (UseAVX > 0) {
14642       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14643     }
14644     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14645   %}
14646   ins_pipe(pipe_slow); // XXX
14647 %}
14648 
14649 instruct convI2F_reg_mem(regF dst, memory src)
14650 %{
14651   predicate(UseAVX == 0);
14652   match(Set dst (ConvI2F (LoadI src)));
14653 
14654   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14655   ins_encode %{
14656     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14657   %}
14658   ins_pipe(pipe_slow); // XXX
14659 %}
14660 
14661 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14662 %{
14663   predicate(!UseXmmI2D);
14664   match(Set dst (ConvI2D src));
14665 
14666   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14667   ins_encode %{
14668     if (UseAVX > 0) {
14669       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14670     }
14671     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14672   %}
14673   ins_pipe(pipe_slow); // XXX
14674 %}
14675 
14676 instruct convI2D_reg_mem(regD dst, memory src)
14677 %{
14678   predicate(UseAVX == 0);
14679   match(Set dst (ConvI2D (LoadI src)));
14680 
14681   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14682   ins_encode %{
14683     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14684   %}
14685   ins_pipe(pipe_slow); // XXX
14686 %}
14687 
14688 instruct convXI2F_reg(regF dst, rRegI src)
14689 %{
14690   predicate(UseXmmI2F);
14691   match(Set dst (ConvI2F src));
14692 
14693   format %{ "movdl $dst, $src\n\t"
14694             "cvtdq2psl $dst, $dst\t# i2f" %}
14695   ins_encode %{
14696     __ movdl($dst$$XMMRegister, $src$$Register);
14697     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14698   %}
14699   ins_pipe(pipe_slow); // XXX
14700 %}
14701 
14702 instruct convXI2D_reg(regD dst, rRegI src)
14703 %{
14704   predicate(UseXmmI2D);
14705   match(Set dst (ConvI2D src));
14706 
14707   format %{ "movdl $dst, $src\n\t"
14708             "cvtdq2pdl $dst, $dst\t# i2d" %}
14709   ins_encode %{
14710     __ movdl($dst$$XMMRegister, $src$$Register);
14711     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14712   %}
14713   ins_pipe(pipe_slow); // XXX
14714 %}
14715 
14716 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14717 %{
14718   match(Set dst (ConvL2F src));
14719 
14720   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14721   ins_encode %{
14722     if (UseAVX > 0) {
14723       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14724     }
14725     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14726   %}
14727   ins_pipe(pipe_slow); // XXX
14728 %}
14729 
14730 instruct convL2F_reg_mem(regF dst, memory src)
14731 %{
14732   predicate(UseAVX == 0);
14733   match(Set dst (ConvL2F (LoadL src)));
14734 
14735   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14736   ins_encode %{
14737     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14738   %}
14739   ins_pipe(pipe_slow); // XXX
14740 %}
14741 
14742 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14743 %{
14744   match(Set dst (ConvL2D src));
14745 
14746   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14747   ins_encode %{
14748     if (UseAVX > 0) {
14749       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14750     }
14751     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14752   %}
14753   ins_pipe(pipe_slow); // XXX
14754 %}
14755 
14756 instruct convL2D_reg_mem(regD dst, memory src)
14757 %{
14758   predicate(UseAVX == 0);
14759   match(Set dst (ConvL2D (LoadL src)));
14760 
14761   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14762   ins_encode %{
14763     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14764   %}
14765   ins_pipe(pipe_slow); // XXX
14766 %}
14767 
14768 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14769 %{
14770   match(Set dst (ConvI2L src));
14771 
14772   ins_cost(125);
14773   format %{ "movslq  $dst, $src\t# i2l" %}
14774   ins_encode %{
14775     __ movslq($dst$$Register, $src$$Register);
14776   %}
14777   ins_pipe(ialu_reg_reg);
14778 %}
14779 
14780 // Zero-extend convert int to long
14781 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14782 %{
14783   match(Set dst (AndL (ConvI2L src) mask));
14784 
14785   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14786   ins_encode %{
14787     if ($dst$$reg != $src$$reg) {
14788       __ movl($dst$$Register, $src$$Register);
14789     }
14790   %}
14791   ins_pipe(ialu_reg_reg);
14792 %}
14793 
14794 // Zero-extend convert int to long
14795 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14796 %{
14797   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14798 
14799   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14800   ins_encode %{
14801     __ movl($dst$$Register, $src$$Address);
14802   %}
14803   ins_pipe(ialu_reg_mem);
14804 %}
14805 
14806 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14807 %{
14808   match(Set dst (AndL src mask));
14809 
14810   format %{ "movl    $dst, $src\t# zero-extend long" %}
14811   ins_encode %{
14812     __ movl($dst$$Register, $src$$Register);
14813   %}
14814   ins_pipe(ialu_reg_reg);
14815 %}
14816 
14817 instruct convL2I_reg_reg(rRegI dst, rRegL src)
14818 %{
14819   match(Set dst (ConvL2I src));
14820 
14821   format %{ "movl    $dst, $src\t# l2i" %}
14822   ins_encode %{
14823     __ movl($dst$$Register, $src$$Register);
14824   %}
14825   ins_pipe(ialu_reg_reg);
14826 %}
14827 
14828 
14829 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
14830   match(Set dst (MoveF2I src));
14831   effect(DEF dst, USE src);
14832 
14833   ins_cost(125);
14834   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
14835   ins_encode %{
14836     __ movl($dst$$Register, Address(rsp, $src$$disp));
14837   %}
14838   ins_pipe(ialu_reg_mem);
14839 %}
14840 
14841 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
14842   match(Set dst (MoveI2F src));
14843   effect(DEF dst, USE src);
14844 
14845   ins_cost(125);
14846   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
14847   ins_encode %{
14848     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
14849   %}
14850   ins_pipe(pipe_slow);
14851 %}
14852 
14853 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
14854   match(Set dst (MoveD2L src));
14855   effect(DEF dst, USE src);
14856 
14857   ins_cost(125);
14858   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
14859   ins_encode %{
14860     __ movq($dst$$Register, Address(rsp, $src$$disp));
14861   %}
14862   ins_pipe(ialu_reg_mem);
14863 %}
14864 
14865 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
14866   predicate(!UseXmmLoadAndClearUpper);
14867   match(Set dst (MoveL2D src));
14868   effect(DEF dst, USE src);
14869 
14870   ins_cost(125);
14871   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
14872   ins_encode %{
14873     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14874   %}
14875   ins_pipe(pipe_slow);
14876 %}
14877 
14878 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
14879   predicate(UseXmmLoadAndClearUpper);
14880   match(Set dst (MoveL2D src));
14881   effect(DEF dst, USE src);
14882 
14883   ins_cost(125);
14884   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
14885   ins_encode %{
14886     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14887   %}
14888   ins_pipe(pipe_slow);
14889 %}
14890 
14891 
14892 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
14893   match(Set dst (MoveF2I src));
14894   effect(DEF dst, USE src);
14895 
14896   ins_cost(95); // XXX
14897   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
14898   ins_encode %{
14899     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
14900   %}
14901   ins_pipe(pipe_slow);
14902 %}
14903 
14904 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
14905   match(Set dst (MoveI2F src));
14906   effect(DEF dst, USE src);
14907 
14908   ins_cost(100);
14909   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
14910   ins_encode %{
14911     __ movl(Address(rsp, $dst$$disp), $src$$Register);
14912   %}
14913   ins_pipe( ialu_mem_reg );
14914 %}
14915 
14916 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
14917   match(Set dst (MoveD2L src));
14918   effect(DEF dst, USE src);
14919 
14920   ins_cost(95); // XXX
14921   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
14922   ins_encode %{
14923     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
14924   %}
14925   ins_pipe(pipe_slow);
14926 %}
14927 
14928 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
14929   match(Set dst (MoveL2D src));
14930   effect(DEF dst, USE src);
14931 
14932   ins_cost(100);
14933   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
14934   ins_encode %{
14935     __ movq(Address(rsp, $dst$$disp), $src$$Register);
14936   %}
14937   ins_pipe(ialu_mem_reg);
14938 %}
14939 
14940 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
14941   match(Set dst (MoveF2I src));
14942   effect(DEF dst, USE src);
14943   ins_cost(85);
14944   format %{ "movd    $dst,$src\t# MoveF2I" %}
14945   ins_encode %{
14946     __ movdl($dst$$Register, $src$$XMMRegister);
14947   %}
14948   ins_pipe( pipe_slow );
14949 %}
14950 
14951 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
14952   match(Set dst (MoveD2L src));
14953   effect(DEF dst, USE src);
14954   ins_cost(85);
14955   format %{ "movd    $dst,$src\t# MoveD2L" %}
14956   ins_encode %{
14957     __ movdq($dst$$Register, $src$$XMMRegister);
14958   %}
14959   ins_pipe( pipe_slow );
14960 %}
14961 
14962 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
14963   match(Set dst (MoveI2F src));
14964   effect(DEF dst, USE src);
14965   ins_cost(100);
14966   format %{ "movd    $dst,$src\t# MoveI2F" %}
14967   ins_encode %{
14968     __ movdl($dst$$XMMRegister, $src$$Register);
14969   %}
14970   ins_pipe( pipe_slow );
14971 %}
14972 
14973 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
14974   match(Set dst (MoveL2D src));
14975   effect(DEF dst, USE src);
14976   ins_cost(100);
14977   format %{ "movd    $dst,$src\t# MoveL2D" %}
14978   ins_encode %{
14979      __ movdq($dst$$XMMRegister, $src$$Register);
14980   %}
14981   ins_pipe( pipe_slow );
14982 %}
14983 
14984 // Fast clearing of an array
14985 // Small non-constant lenght ClearArray for non-AVX512 targets.
14986 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
14987                   Universe dummy, rFlagsReg cr)
14988 %{
14989   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
14990   match(Set dummy (ClearArray cnt base));
14991   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
14992 
14993   format %{ $$template
14994     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14995     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
14996     $$emit$$"jg      LARGE\n\t"
14997     $$emit$$"dec     rcx\n\t"
14998     $$emit$$"js      DONE\t# Zero length\n\t"
14999     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15000     $$emit$$"dec     rcx\n\t"
15001     $$emit$$"jge     LOOP\n\t"
15002     $$emit$$"jmp     DONE\n\t"
15003     $$emit$$"# LARGE:\n\t"
15004     if (UseFastStosb) {
15005        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15006        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15007     } else if (UseXMMForObjInit) {
15008        $$emit$$"mov     rdi,rax\n\t"
15009        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15010        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15011        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15012        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15013        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15014        $$emit$$"add     0x40,rax\n\t"
15015        $$emit$$"# L_zero_64_bytes:\n\t"
15016        $$emit$$"sub     0x8,rcx\n\t"
15017        $$emit$$"jge     L_loop\n\t"
15018        $$emit$$"add     0x4,rcx\n\t"
15019        $$emit$$"jl      L_tail\n\t"
15020        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15021        $$emit$$"add     0x20,rax\n\t"
15022        $$emit$$"sub     0x4,rcx\n\t"
15023        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15024        $$emit$$"add     0x4,rcx\n\t"
15025        $$emit$$"jle     L_end\n\t"
15026        $$emit$$"dec     rcx\n\t"
15027        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15028        $$emit$$"vmovq   xmm0,(rax)\n\t"
15029        $$emit$$"add     0x8,rax\n\t"
15030        $$emit$$"dec     rcx\n\t"
15031        $$emit$$"jge     L_sloop\n\t"
15032        $$emit$$"# L_end:\n\t"
15033     } else {
15034        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15035     }
15036     $$emit$$"# DONE"
15037   %}
15038   ins_encode %{
15039     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15040                  $tmp$$XMMRegister, false, knoreg);
15041   %}
15042   ins_pipe(pipe_slow);
15043 %}
15044 
15045 // Small non-constant length ClearArray for AVX512 targets.
15046 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15047                        Universe dummy, rFlagsReg cr)
15048 %{
15049   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15050   match(Set dummy (ClearArray cnt base));
15051   ins_cost(125);
15052   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15053 
15054   format %{ $$template
15055     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15056     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15057     $$emit$$"jg      LARGE\n\t"
15058     $$emit$$"dec     rcx\n\t"
15059     $$emit$$"js      DONE\t# Zero length\n\t"
15060     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15061     $$emit$$"dec     rcx\n\t"
15062     $$emit$$"jge     LOOP\n\t"
15063     $$emit$$"jmp     DONE\n\t"
15064     $$emit$$"# LARGE:\n\t"
15065     if (UseFastStosb) {
15066        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15067        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15068     } else if (UseXMMForObjInit) {
15069        $$emit$$"mov     rdi,rax\n\t"
15070        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15071        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15072        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15073        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15074        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15075        $$emit$$"add     0x40,rax\n\t"
15076        $$emit$$"# L_zero_64_bytes:\n\t"
15077        $$emit$$"sub     0x8,rcx\n\t"
15078        $$emit$$"jge     L_loop\n\t"
15079        $$emit$$"add     0x4,rcx\n\t"
15080        $$emit$$"jl      L_tail\n\t"
15081        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15082        $$emit$$"add     0x20,rax\n\t"
15083        $$emit$$"sub     0x4,rcx\n\t"
15084        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15085        $$emit$$"add     0x4,rcx\n\t"
15086        $$emit$$"jle     L_end\n\t"
15087        $$emit$$"dec     rcx\n\t"
15088        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15089        $$emit$$"vmovq   xmm0,(rax)\n\t"
15090        $$emit$$"add     0x8,rax\n\t"
15091        $$emit$$"dec     rcx\n\t"
15092        $$emit$$"jge     L_sloop\n\t"
15093        $$emit$$"# L_end:\n\t"
15094     } else {
15095        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15096     }
15097     $$emit$$"# DONE"
15098   %}
15099   ins_encode %{
15100     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15101                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15102   %}
15103   ins_pipe(pipe_slow);
15104 %}
15105 
15106 // Large non-constant length ClearArray for non-AVX512 targets.
15107 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15108                         Universe dummy, rFlagsReg cr)
15109 %{
15110   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15111   match(Set dummy (ClearArray cnt base));
15112   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15113 
15114   format %{ $$template
15115     if (UseFastStosb) {
15116        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15117        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15118        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15119     } else if (UseXMMForObjInit) {
15120        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15121        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15122        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15123        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15124        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15125        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15126        $$emit$$"add     0x40,rax\n\t"
15127        $$emit$$"# L_zero_64_bytes:\n\t"
15128        $$emit$$"sub     0x8,rcx\n\t"
15129        $$emit$$"jge     L_loop\n\t"
15130        $$emit$$"add     0x4,rcx\n\t"
15131        $$emit$$"jl      L_tail\n\t"
15132        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15133        $$emit$$"add     0x20,rax\n\t"
15134        $$emit$$"sub     0x4,rcx\n\t"
15135        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15136        $$emit$$"add     0x4,rcx\n\t"
15137        $$emit$$"jle     L_end\n\t"
15138        $$emit$$"dec     rcx\n\t"
15139        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15140        $$emit$$"vmovq   xmm0,(rax)\n\t"
15141        $$emit$$"add     0x8,rax\n\t"
15142        $$emit$$"dec     rcx\n\t"
15143        $$emit$$"jge     L_sloop\n\t"
15144        $$emit$$"# L_end:\n\t"
15145     } else {
15146        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15147        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15148     }
15149   %}
15150   ins_encode %{
15151     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15152                  $tmp$$XMMRegister, true, knoreg);
15153   %}
15154   ins_pipe(pipe_slow);
15155 %}
15156 
15157 // Large non-constant length ClearArray for AVX512 targets.
15158 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15159                              Universe dummy, rFlagsReg cr)
15160 %{
15161   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15162   match(Set dummy (ClearArray cnt base));
15163   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15164 
15165   format %{ $$template
15166     if (UseFastStosb) {
15167        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15168        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15169        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15170     } else if (UseXMMForObjInit) {
15171        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15172        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15173        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15174        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15175        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15176        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15177        $$emit$$"add     0x40,rax\n\t"
15178        $$emit$$"# L_zero_64_bytes:\n\t"
15179        $$emit$$"sub     0x8,rcx\n\t"
15180        $$emit$$"jge     L_loop\n\t"
15181        $$emit$$"add     0x4,rcx\n\t"
15182        $$emit$$"jl      L_tail\n\t"
15183        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15184        $$emit$$"add     0x20,rax\n\t"
15185        $$emit$$"sub     0x4,rcx\n\t"
15186        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15187        $$emit$$"add     0x4,rcx\n\t"
15188        $$emit$$"jle     L_end\n\t"
15189        $$emit$$"dec     rcx\n\t"
15190        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15191        $$emit$$"vmovq   xmm0,(rax)\n\t"
15192        $$emit$$"add     0x8,rax\n\t"
15193        $$emit$$"dec     rcx\n\t"
15194        $$emit$$"jge     L_sloop\n\t"
15195        $$emit$$"# L_end:\n\t"
15196     } else {
15197        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15198        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15199     }
15200   %}
15201   ins_encode %{
15202     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15203                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15204   %}
15205   ins_pipe(pipe_slow);
15206 %}
15207 
15208 // Small constant length ClearArray for AVX512 targets.
15209 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15210 %{
15211   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15212   match(Set dummy (ClearArray cnt base));
15213   ins_cost(100);
15214   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15215   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15216   ins_encode %{
15217    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15218   %}
15219   ins_pipe(pipe_slow);
15220 %}
15221 
15222 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15223                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15224 %{
15225   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15226   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15227   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15228 
15229   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15230   ins_encode %{
15231     __ string_compare($str1$$Register, $str2$$Register,
15232                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15233                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15234   %}
15235   ins_pipe( pipe_slow );
15236 %}
15237 
15238 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15239                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15240 %{
15241   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15242   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15243   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15244 
15245   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15246   ins_encode %{
15247     __ string_compare($str1$$Register, $str2$$Register,
15248                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15249                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15250   %}
15251   ins_pipe( pipe_slow );
15252 %}
15253 
15254 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15255                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15256 %{
15257   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15258   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15259   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15260 
15261   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15262   ins_encode %{
15263     __ string_compare($str1$$Register, $str2$$Register,
15264                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15265                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15266   %}
15267   ins_pipe( pipe_slow );
15268 %}
15269 
15270 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15271                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15272 %{
15273   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15274   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15275   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15276 
15277   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15278   ins_encode %{
15279     __ string_compare($str1$$Register, $str2$$Register,
15280                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15281                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15282   %}
15283   ins_pipe( pipe_slow );
15284 %}
15285 
15286 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15287                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15288 %{
15289   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15290   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15291   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15292 
15293   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15294   ins_encode %{
15295     __ string_compare($str1$$Register, $str2$$Register,
15296                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15297                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15298   %}
15299   ins_pipe( pipe_slow );
15300 %}
15301 
15302 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15303                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15304 %{
15305   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15306   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15307   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15308 
15309   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15310   ins_encode %{
15311     __ string_compare($str1$$Register, $str2$$Register,
15312                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15313                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15314   %}
15315   ins_pipe( pipe_slow );
15316 %}
15317 
15318 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15319                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15320 %{
15321   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15322   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15323   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15324 
15325   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15326   ins_encode %{
15327     __ string_compare($str2$$Register, $str1$$Register,
15328                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15329                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15330   %}
15331   ins_pipe( pipe_slow );
15332 %}
15333 
15334 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15335                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15336 %{
15337   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15338   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15339   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15340 
15341   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15342   ins_encode %{
15343     __ string_compare($str2$$Register, $str1$$Register,
15344                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15345                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15346   %}
15347   ins_pipe( pipe_slow );
15348 %}
15349 
15350 // fast search of substring with known size.
15351 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15352                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15353 %{
15354   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15355   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15356   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15357 
15358   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15359   ins_encode %{
15360     int icnt2 = (int)$int_cnt2$$constant;
15361     if (icnt2 >= 16) {
15362       // IndexOf for constant substrings with size >= 16 elements
15363       // which don't need to be loaded through stack.
15364       __ string_indexofC8($str1$$Register, $str2$$Register,
15365                           $cnt1$$Register, $cnt2$$Register,
15366                           icnt2, $result$$Register,
15367                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15368     } else {
15369       // Small strings are loaded through stack if they cross page boundary.
15370       __ string_indexof($str1$$Register, $str2$$Register,
15371                         $cnt1$$Register, $cnt2$$Register,
15372                         icnt2, $result$$Register,
15373                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15374     }
15375   %}
15376   ins_pipe( pipe_slow );
15377 %}
15378 
15379 // fast search of substring with known size.
15380 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15381                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15382 %{
15383   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15384   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15385   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15386 
15387   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15388   ins_encode %{
15389     int icnt2 = (int)$int_cnt2$$constant;
15390     if (icnt2 >= 8) {
15391       // IndexOf for constant substrings with size >= 8 elements
15392       // which don't need to be loaded through stack.
15393       __ string_indexofC8($str1$$Register, $str2$$Register,
15394                           $cnt1$$Register, $cnt2$$Register,
15395                           icnt2, $result$$Register,
15396                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15397     } else {
15398       // Small strings are loaded through stack if they cross page boundary.
15399       __ string_indexof($str1$$Register, $str2$$Register,
15400                         $cnt1$$Register, $cnt2$$Register,
15401                         icnt2, $result$$Register,
15402                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15403     }
15404   %}
15405   ins_pipe( pipe_slow );
15406 %}
15407 
15408 // fast search of substring with known size.
15409 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15410                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15411 %{
15412   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15413   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15414   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15415 
15416   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15417   ins_encode %{
15418     int icnt2 = (int)$int_cnt2$$constant;
15419     if (icnt2 >= 8) {
15420       // IndexOf for constant substrings with size >= 8 elements
15421       // which don't need to be loaded through stack.
15422       __ string_indexofC8($str1$$Register, $str2$$Register,
15423                           $cnt1$$Register, $cnt2$$Register,
15424                           icnt2, $result$$Register,
15425                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15426     } else {
15427       // Small strings are loaded through stack if they cross page boundary.
15428       __ string_indexof($str1$$Register, $str2$$Register,
15429                         $cnt1$$Register, $cnt2$$Register,
15430                         icnt2, $result$$Register,
15431                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15432     }
15433   %}
15434   ins_pipe( pipe_slow );
15435 %}
15436 
15437 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15438                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15439 %{
15440   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15441   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15442   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15443 
15444   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15445   ins_encode %{
15446     __ string_indexof($str1$$Register, $str2$$Register,
15447                       $cnt1$$Register, $cnt2$$Register,
15448                       (-1), $result$$Register,
15449                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15450   %}
15451   ins_pipe( pipe_slow );
15452 %}
15453 
15454 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15455                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15456 %{
15457   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15458   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15459   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15460 
15461   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15462   ins_encode %{
15463     __ string_indexof($str1$$Register, $str2$$Register,
15464                       $cnt1$$Register, $cnt2$$Register,
15465                       (-1), $result$$Register,
15466                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15467   %}
15468   ins_pipe( pipe_slow );
15469 %}
15470 
15471 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15472                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15473 %{
15474   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15475   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15476   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15477 
15478   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15479   ins_encode %{
15480     __ string_indexof($str1$$Register, $str2$$Register,
15481                       $cnt1$$Register, $cnt2$$Register,
15482                       (-1), $result$$Register,
15483                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15484   %}
15485   ins_pipe( pipe_slow );
15486 %}
15487 
15488 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15489                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15490 %{
15491   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15492   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15493   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15494   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15495   ins_encode %{
15496     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15497                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15498   %}
15499   ins_pipe( pipe_slow );
15500 %}
15501 
15502 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15503                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15504 %{
15505   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15506   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15507   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15508   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15509   ins_encode %{
15510     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15511                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15512   %}
15513   ins_pipe( pipe_slow );
15514 %}
15515 
15516 // fast string equals
15517 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15518                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15519 %{
15520   predicate(!VM_Version::supports_avx512vlbw());
15521   match(Set result (StrEquals (Binary str1 str2) cnt));
15522   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15523 
15524   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15525   ins_encode %{
15526     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15527                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15528                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15529   %}
15530   ins_pipe( pipe_slow );
15531 %}
15532 
15533 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15534                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15535 %{
15536   predicate(VM_Version::supports_avx512vlbw());
15537   match(Set result (StrEquals (Binary str1 str2) cnt));
15538   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15539 
15540   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15541   ins_encode %{
15542     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15543                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15544                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15545   %}
15546   ins_pipe( pipe_slow );
15547 %}
15548 
15549 // fast array equals
15550 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15551                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15552 %{
15553   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15554   match(Set result (AryEq ary1 ary2));
15555   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15556 
15557   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15558   ins_encode %{
15559     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15560                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15561                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15562   %}
15563   ins_pipe( pipe_slow );
15564 %}
15565 
15566 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15567                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15568 %{
15569   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15570   match(Set result (AryEq ary1 ary2));
15571   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15572 
15573   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15574   ins_encode %{
15575     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15576                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15577                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15578   %}
15579   ins_pipe( pipe_slow );
15580 %}
15581 
15582 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15583                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15584 %{
15585   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15586   match(Set result (AryEq ary1 ary2));
15587   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15588 
15589   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15590   ins_encode %{
15591     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15592                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15593                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15594   %}
15595   ins_pipe( pipe_slow );
15596 %}
15597 
15598 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15599                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15600 %{
15601   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15602   match(Set result (AryEq ary1 ary2));
15603   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15604 
15605   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15606   ins_encode %{
15607     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15608                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15609                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15610   %}
15611   ins_pipe( pipe_slow );
15612 %}
15613 
15614 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15615                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15616                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15617                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15618                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15619 %{
15620   predicate(UseAVX >= 2);
15621   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15622   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15623          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15624          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15625          USE basic_type, KILL cr);
15626 
15627   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
15628   ins_encode %{
15629     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15630                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15631                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15632                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15633                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15634                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15635                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15636   %}
15637   ins_pipe( pipe_slow );
15638 %}
15639 
15640 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15641                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15642 %{
15643   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15644   match(Set result (CountPositives ary1 len));
15645   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15646 
15647   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15648   ins_encode %{
15649     __ count_positives($ary1$$Register, $len$$Register,
15650                        $result$$Register, $tmp3$$Register,
15651                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15652   %}
15653   ins_pipe( pipe_slow );
15654 %}
15655 
15656 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15657                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15658 %{
15659   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15660   match(Set result (CountPositives ary1 len));
15661   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15662 
15663   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15664   ins_encode %{
15665     __ count_positives($ary1$$Register, $len$$Register,
15666                        $result$$Register, $tmp3$$Register,
15667                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15668   %}
15669   ins_pipe( pipe_slow );
15670 %}
15671 
15672 // fast char[] to byte[] compression
15673 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15674                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15675   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15676   match(Set result (StrCompressedCopy src (Binary dst len)));
15677   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15678          USE_KILL len, KILL tmp5, KILL cr);
15679 
15680   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15681   ins_encode %{
15682     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15683                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15684                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15685                            knoreg, knoreg);
15686   %}
15687   ins_pipe( pipe_slow );
15688 %}
15689 
15690 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15691                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15692   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15693   match(Set result (StrCompressedCopy src (Binary dst len)));
15694   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15695          USE_KILL len, KILL tmp5, KILL cr);
15696 
15697   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15698   ins_encode %{
15699     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15700                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15701                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15702                            $ktmp1$$KRegister, $ktmp2$$KRegister);
15703   %}
15704   ins_pipe( pipe_slow );
15705 %}
15706 // fast byte[] to char[] inflation
15707 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15708                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15709   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15710   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15711   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15712 
15713   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15714   ins_encode %{
15715     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15716                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15717   %}
15718   ins_pipe( pipe_slow );
15719 %}
15720 
15721 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15722                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15723   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15724   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15725   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15726 
15727   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15728   ins_encode %{
15729     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15730                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15731   %}
15732   ins_pipe( pipe_slow );
15733 %}
15734 
15735 // encode char[] to byte[] in ISO_8859_1
15736 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15737                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15738                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15739   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15740   match(Set result (EncodeISOArray src (Binary dst len)));
15741   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15742 
15743   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15744   ins_encode %{
15745     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15746                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15747                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15748   %}
15749   ins_pipe( pipe_slow );
15750 %}
15751 
15752 // encode char[] to byte[] in ASCII
15753 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15754                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15755                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15756   predicate(((EncodeISOArrayNode*)n)->is_ascii());
15757   match(Set result (EncodeISOArray src (Binary dst len)));
15758   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15759 
15760   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15761   ins_encode %{
15762     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15763                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15764                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15765   %}
15766   ins_pipe( pipe_slow );
15767 %}
15768 
15769 //----------Overflow Math Instructions-----------------------------------------
15770 
15771 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15772 %{
15773   match(Set cr (OverflowAddI op1 op2));
15774   effect(DEF cr, USE_KILL op1, USE op2);
15775 
15776   format %{ "addl    $op1, $op2\t# overflow check int" %}
15777 
15778   ins_encode %{
15779     __ addl($op1$$Register, $op2$$Register);
15780   %}
15781   ins_pipe(ialu_reg_reg);
15782 %}
15783 
15784 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
15785 %{
15786   match(Set cr (OverflowAddI op1 op2));
15787   effect(DEF cr, USE_KILL op1, USE op2);
15788 
15789   format %{ "addl    $op1, $op2\t# overflow check int" %}
15790 
15791   ins_encode %{
15792     __ addl($op1$$Register, $op2$$constant);
15793   %}
15794   ins_pipe(ialu_reg_reg);
15795 %}
15796 
15797 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15798 %{
15799   match(Set cr (OverflowAddL op1 op2));
15800   effect(DEF cr, USE_KILL op1, USE op2);
15801 
15802   format %{ "addq    $op1, $op2\t# overflow check long" %}
15803   ins_encode %{
15804     __ addq($op1$$Register, $op2$$Register);
15805   %}
15806   ins_pipe(ialu_reg_reg);
15807 %}
15808 
15809 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
15810 %{
15811   match(Set cr (OverflowAddL op1 op2));
15812   effect(DEF cr, USE_KILL op1, USE op2);
15813 
15814   format %{ "addq    $op1, $op2\t# overflow check long" %}
15815   ins_encode %{
15816     __ addq($op1$$Register, $op2$$constant);
15817   %}
15818   ins_pipe(ialu_reg_reg);
15819 %}
15820 
15821 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15822 %{
15823   match(Set cr (OverflowSubI op1 op2));
15824 
15825   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
15826   ins_encode %{
15827     __ cmpl($op1$$Register, $op2$$Register);
15828   %}
15829   ins_pipe(ialu_reg_reg);
15830 %}
15831 
15832 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15833 %{
15834   match(Set cr (OverflowSubI op1 op2));
15835 
15836   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
15837   ins_encode %{
15838     __ cmpl($op1$$Register, $op2$$constant);
15839   %}
15840   ins_pipe(ialu_reg_reg);
15841 %}
15842 
15843 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15844 %{
15845   match(Set cr (OverflowSubL op1 op2));
15846 
15847   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
15848   ins_encode %{
15849     __ cmpq($op1$$Register, $op2$$Register);
15850   %}
15851   ins_pipe(ialu_reg_reg);
15852 %}
15853 
15854 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15855 %{
15856   match(Set cr (OverflowSubL op1 op2));
15857 
15858   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
15859   ins_encode %{
15860     __ cmpq($op1$$Register, $op2$$constant);
15861   %}
15862   ins_pipe(ialu_reg_reg);
15863 %}
15864 
15865 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
15866 %{
15867   match(Set cr (OverflowSubI zero op2));
15868   effect(DEF cr, USE_KILL op2);
15869 
15870   format %{ "negl    $op2\t# overflow check int" %}
15871   ins_encode %{
15872     __ negl($op2$$Register);
15873   %}
15874   ins_pipe(ialu_reg_reg);
15875 %}
15876 
15877 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
15878 %{
15879   match(Set cr (OverflowSubL zero op2));
15880   effect(DEF cr, USE_KILL op2);
15881 
15882   format %{ "negq    $op2\t# overflow check long" %}
15883   ins_encode %{
15884     __ negq($op2$$Register);
15885   %}
15886   ins_pipe(ialu_reg_reg);
15887 %}
15888 
15889 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15890 %{
15891   match(Set cr (OverflowMulI op1 op2));
15892   effect(DEF cr, USE_KILL op1, USE op2);
15893 
15894   format %{ "imull    $op1, $op2\t# overflow check int" %}
15895   ins_encode %{
15896     __ imull($op1$$Register, $op2$$Register);
15897   %}
15898   ins_pipe(ialu_reg_reg_alu0);
15899 %}
15900 
15901 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
15902 %{
15903   match(Set cr (OverflowMulI op1 op2));
15904   effect(DEF cr, TEMP tmp, USE op1, USE op2);
15905 
15906   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
15907   ins_encode %{
15908     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
15909   %}
15910   ins_pipe(ialu_reg_reg_alu0);
15911 %}
15912 
15913 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15914 %{
15915   match(Set cr (OverflowMulL op1 op2));
15916   effect(DEF cr, USE_KILL op1, USE op2);
15917 
15918   format %{ "imulq    $op1, $op2\t# overflow check long" %}
15919   ins_encode %{
15920     __ imulq($op1$$Register, $op2$$Register);
15921   %}
15922   ins_pipe(ialu_reg_reg_alu0);
15923 %}
15924 
15925 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
15926 %{
15927   match(Set cr (OverflowMulL op1 op2));
15928   effect(DEF cr, TEMP tmp, USE op1, USE op2);
15929 
15930   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
15931   ins_encode %{
15932     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
15933   %}
15934   ins_pipe(ialu_reg_reg_alu0);
15935 %}
15936 
15937 
15938 //----------Control Flow Instructions------------------------------------------
15939 // Signed compare Instructions
15940 
15941 // XXX more variants!!
15942 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15943 %{
15944   match(Set cr (CmpI op1 op2));
15945   effect(DEF cr, USE op1, USE op2);
15946 
15947   format %{ "cmpl    $op1, $op2" %}
15948   ins_encode %{
15949     __ cmpl($op1$$Register, $op2$$Register);
15950   %}
15951   ins_pipe(ialu_cr_reg_reg);
15952 %}
15953 
15954 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15955 %{
15956   match(Set cr (CmpI op1 op2));
15957 
15958   format %{ "cmpl    $op1, $op2" %}
15959   ins_encode %{
15960     __ cmpl($op1$$Register, $op2$$constant);
15961   %}
15962   ins_pipe(ialu_cr_reg_imm);
15963 %}
15964 
15965 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
15966 %{
15967   match(Set cr (CmpI op1 (LoadI op2)));
15968 
15969   ins_cost(500); // XXX
15970   format %{ "cmpl    $op1, $op2" %}
15971   ins_encode %{
15972     __ cmpl($op1$$Register, $op2$$Address);
15973   %}
15974   ins_pipe(ialu_cr_reg_mem);
15975 %}
15976 
15977 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
15978 %{
15979   match(Set cr (CmpI src zero));
15980 
15981   format %{ "testl   $src, $src" %}
15982   ins_encode %{
15983     __ testl($src$$Register, $src$$Register);
15984   %}
15985   ins_pipe(ialu_cr_reg_imm);
15986 %}
15987 
15988 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
15989 %{
15990   match(Set cr (CmpI (AndI src con) zero));
15991 
15992   format %{ "testl   $src, $con" %}
15993   ins_encode %{
15994     __ testl($src$$Register, $con$$constant);
15995   %}
15996   ins_pipe(ialu_cr_reg_imm);
15997 %}
15998 
15999 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16000 %{
16001   match(Set cr (CmpI (AndI src1 src2) zero));
16002 
16003   format %{ "testl   $src1, $src2" %}
16004   ins_encode %{
16005     __ testl($src1$$Register, $src2$$Register);
16006   %}
16007   ins_pipe(ialu_cr_reg_imm);
16008 %}
16009 
16010 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16011 %{
16012   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16013 
16014   format %{ "testl   $src, $mem" %}
16015   ins_encode %{
16016     __ testl($src$$Register, $mem$$Address);
16017   %}
16018   ins_pipe(ialu_cr_reg_mem);
16019 %}
16020 
16021 // Unsigned compare Instructions; really, same as signed except they
16022 // produce an rFlagsRegU instead of rFlagsReg.
16023 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16024 %{
16025   match(Set cr (CmpU op1 op2));
16026 
16027   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16028   ins_encode %{
16029     __ cmpl($op1$$Register, $op2$$Register);
16030   %}
16031   ins_pipe(ialu_cr_reg_reg);
16032 %}
16033 
16034 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16035 %{
16036   match(Set cr (CmpU op1 op2));
16037 
16038   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16039   ins_encode %{
16040     __ cmpl($op1$$Register, $op2$$constant);
16041   %}
16042   ins_pipe(ialu_cr_reg_imm);
16043 %}
16044 
16045 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16046 %{
16047   match(Set cr (CmpU op1 (LoadI op2)));
16048 
16049   ins_cost(500); // XXX
16050   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16051   ins_encode %{
16052     __ cmpl($op1$$Register, $op2$$Address);
16053   %}
16054   ins_pipe(ialu_cr_reg_mem);
16055 %}
16056 
16057 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16058 %{
16059   match(Set cr (CmpU src zero));
16060 
16061   format %{ "testl   $src, $src\t# unsigned" %}
16062   ins_encode %{
16063     __ testl($src$$Register, $src$$Register);
16064   %}
16065   ins_pipe(ialu_cr_reg_imm);
16066 %}
16067 
16068 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16069 %{
16070   match(Set cr (CmpP op1 op2));
16071 
16072   format %{ "cmpq    $op1, $op2\t# ptr" %}
16073   ins_encode %{
16074     __ cmpq($op1$$Register, $op2$$Register);
16075   %}
16076   ins_pipe(ialu_cr_reg_reg);
16077 %}
16078 
16079 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16080 %{
16081   match(Set cr (CmpP op1 (LoadP op2)));
16082   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16083 
16084   ins_cost(500); // XXX
16085   format %{ "cmpq    $op1, $op2\t# ptr" %}
16086   ins_encode %{
16087     __ cmpq($op1$$Register, $op2$$Address);
16088   %}
16089   ins_pipe(ialu_cr_reg_mem);
16090 %}
16091 
16092 // XXX this is generalized by compP_rReg_mem???
16093 // Compare raw pointer (used in out-of-heap check).
16094 // Only works because non-oop pointers must be raw pointers
16095 // and raw pointers have no anti-dependencies.
16096 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16097 %{
16098   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16099             n->in(2)->as_Load()->barrier_data() == 0);
16100   match(Set cr (CmpP op1 (LoadP op2)));
16101 
16102   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
16103   ins_encode %{
16104     __ cmpq($op1$$Register, $op2$$Address);
16105   %}
16106   ins_pipe(ialu_cr_reg_mem);
16107 %}
16108 
16109 // This will generate a signed flags result. This should be OK since
16110 // any compare to a zero should be eq/neq.
16111 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16112 %{
16113   match(Set cr (CmpP src zero));
16114 
16115   format %{ "testq   $src, $src\t# ptr" %}
16116   ins_encode %{
16117     __ testq($src$$Register, $src$$Register);
16118   %}
16119   ins_pipe(ialu_cr_reg_imm);
16120 %}
16121 
16122 // This will generate a signed flags result. This should be OK since
16123 // any compare to a zero should be eq/neq.
16124 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16125 %{
16126   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16127             n->in(1)->as_Load()->barrier_data() == 0);
16128   match(Set cr (CmpP (LoadP op) zero));
16129 
16130   ins_cost(500); // XXX
16131   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
16132   ins_encode %{
16133     __ testq($op$$Address, 0xFFFFFFFF);
16134   %}
16135   ins_pipe(ialu_cr_reg_imm);
16136 %}
16137 
16138 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16139 %{
16140   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16141             n->in(1)->as_Load()->barrier_data() == 0);
16142   match(Set cr (CmpP (LoadP mem) zero));
16143 
16144   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
16145   ins_encode %{
16146     __ cmpq(r12, $mem$$Address);
16147   %}
16148   ins_pipe(ialu_cr_reg_mem);
16149 %}
16150 
16151 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16152 %{
16153   match(Set cr (CmpN op1 op2));
16154 
16155   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16156   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16157   ins_pipe(ialu_cr_reg_reg);
16158 %}
16159 
16160 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16161 %{
16162   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16163   match(Set cr (CmpN src (LoadN mem)));
16164 
16165   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
16166   ins_encode %{
16167     __ cmpl($src$$Register, $mem$$Address);
16168   %}
16169   ins_pipe(ialu_cr_reg_mem);
16170 %}
16171 
16172 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16173   match(Set cr (CmpN op1 op2));
16174 
16175   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16176   ins_encode %{
16177     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16178   %}
16179   ins_pipe(ialu_cr_reg_imm);
16180 %}
16181 
16182 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16183 %{
16184   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16185   match(Set cr (CmpN src (LoadN mem)));
16186 
16187   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
16188   ins_encode %{
16189     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16190   %}
16191   ins_pipe(ialu_cr_reg_mem);
16192 %}
16193 
16194 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16195   match(Set cr (CmpN op1 op2));
16196 
16197   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
16198   ins_encode %{
16199     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16200   %}
16201   ins_pipe(ialu_cr_reg_imm);
16202 %}
16203 
16204 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16205 %{
16206   predicate(!UseCompactObjectHeaders);
16207   match(Set cr (CmpN src (LoadNKlass mem)));
16208 
16209   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
16210   ins_encode %{
16211     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16212   %}
16213   ins_pipe(ialu_cr_reg_mem);
16214 %}
16215 
16216 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16217   match(Set cr (CmpN src zero));
16218 
16219   format %{ "testl   $src, $src\t# compressed ptr" %}
16220   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16221   ins_pipe(ialu_cr_reg_imm);
16222 %}
16223 
16224 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16225 %{
16226   predicate(CompressedOops::base() != nullptr &&
16227             n->in(1)->as_Load()->barrier_data() == 0);
16228   match(Set cr (CmpN (LoadN mem) zero));
16229 
16230   ins_cost(500); // XXX
16231   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
16232   ins_encode %{
16233     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16234   %}
16235   ins_pipe(ialu_cr_reg_mem);
16236 %}
16237 
16238 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16239 %{
16240   predicate(CompressedOops::base() == nullptr &&
16241             n->in(1)->as_Load()->barrier_data() == 0);
16242   match(Set cr (CmpN (LoadN mem) zero));
16243 
16244   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16245   ins_encode %{
16246     __ cmpl(r12, $mem$$Address);
16247   %}
16248   ins_pipe(ialu_cr_reg_mem);
16249 %}
16250 
16251 // Yanked all unsigned pointer compare operations.
16252 // Pointer compares are done with CmpP which is already unsigned.
16253 
16254 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16255 %{
16256   match(Set cr (CmpL op1 op2));
16257 
16258   format %{ "cmpq    $op1, $op2" %}
16259   ins_encode %{
16260     __ cmpq($op1$$Register, $op2$$Register);
16261   %}
16262   ins_pipe(ialu_cr_reg_reg);
16263 %}
16264 
16265 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16266 %{
16267   match(Set cr (CmpL op1 op2));
16268 
16269   format %{ "cmpq    $op1, $op2" %}
16270   ins_encode %{
16271     __ cmpq($op1$$Register, $op2$$constant);
16272   %}
16273   ins_pipe(ialu_cr_reg_imm);
16274 %}
16275 
16276 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16277 %{
16278   match(Set cr (CmpL op1 (LoadL op2)));
16279 
16280   format %{ "cmpq    $op1, $op2" %}
16281   ins_encode %{
16282     __ cmpq($op1$$Register, $op2$$Address);
16283   %}
16284   ins_pipe(ialu_cr_reg_mem);
16285 %}
16286 
16287 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16288 %{
16289   match(Set cr (CmpL src zero));
16290 
16291   format %{ "testq   $src, $src" %}
16292   ins_encode %{
16293     __ testq($src$$Register, $src$$Register);
16294   %}
16295   ins_pipe(ialu_cr_reg_imm);
16296 %}
16297 
16298 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16299 %{
16300   match(Set cr (CmpL (AndL src con) zero));
16301 
16302   format %{ "testq   $src, $con\t# long" %}
16303   ins_encode %{
16304     __ testq($src$$Register, $con$$constant);
16305   %}
16306   ins_pipe(ialu_cr_reg_imm);
16307 %}
16308 
16309 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16310 %{
16311   match(Set cr (CmpL (AndL src1 src2) zero));
16312 
16313   format %{ "testq   $src1, $src2\t# long" %}
16314   ins_encode %{
16315     __ testq($src1$$Register, $src2$$Register);
16316   %}
16317   ins_pipe(ialu_cr_reg_imm);
16318 %}
16319 
16320 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16321 %{
16322   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16323 
16324   format %{ "testq   $src, $mem" %}
16325   ins_encode %{
16326     __ testq($src$$Register, $mem$$Address);
16327   %}
16328   ins_pipe(ialu_cr_reg_mem);
16329 %}
16330 
16331 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16332 %{
16333   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16334 
16335   format %{ "testq   $src, $mem" %}
16336   ins_encode %{
16337     __ testq($src$$Register, $mem$$Address);
16338   %}
16339   ins_pipe(ialu_cr_reg_mem);
16340 %}
16341 
16342 // Manifest a CmpU result in an integer register.  Very painful.
16343 // This is the test to avoid.
16344 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16345 %{
16346   match(Set dst (CmpU3 src1 src2));
16347   effect(KILL flags);
16348 
16349   ins_cost(275); // XXX
16350   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16351             "movl    $dst, -1\n\t"
16352             "jb,u    done\n\t"
16353             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16354     "done:" %}
16355   ins_encode %{
16356     Label done;
16357     __ cmpl($src1$$Register, $src2$$Register);
16358     __ movl($dst$$Register, -1);
16359     __ jccb(Assembler::below, done);
16360     __ setcc(Assembler::notZero, $dst$$Register);
16361     __ bind(done);
16362   %}
16363   ins_pipe(pipe_slow);
16364 %}
16365 
16366 // Manifest a CmpL result in an integer register.  Very painful.
16367 // This is the test to avoid.
16368 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16369 %{
16370   match(Set dst (CmpL3 src1 src2));
16371   effect(KILL flags);
16372 
16373   ins_cost(275); // XXX
16374   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16375             "movl    $dst, -1\n\t"
16376             "jl,s    done\n\t"
16377             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16378     "done:" %}
16379   ins_encode %{
16380     Label done;
16381     __ cmpq($src1$$Register, $src2$$Register);
16382     __ movl($dst$$Register, -1);
16383     __ jccb(Assembler::less, done);
16384     __ setcc(Assembler::notZero, $dst$$Register);
16385     __ bind(done);
16386   %}
16387   ins_pipe(pipe_slow);
16388 %}
16389 
16390 // Manifest a CmpUL result in an integer register.  Very painful.
16391 // This is the test to avoid.
16392 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16393 %{
16394   match(Set dst (CmpUL3 src1 src2));
16395   effect(KILL flags);
16396 
16397   ins_cost(275); // XXX
16398   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16399             "movl    $dst, -1\n\t"
16400             "jb,u    done\n\t"
16401             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16402     "done:" %}
16403   ins_encode %{
16404     Label done;
16405     __ cmpq($src1$$Register, $src2$$Register);
16406     __ movl($dst$$Register, -1);
16407     __ jccb(Assembler::below, done);
16408     __ setcc(Assembler::notZero, $dst$$Register);
16409     __ bind(done);
16410   %}
16411   ins_pipe(pipe_slow);
16412 %}
16413 
16414 // Unsigned long compare Instructions; really, same as signed long except they
16415 // produce an rFlagsRegU instead of rFlagsReg.
16416 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16417 %{
16418   match(Set cr (CmpUL op1 op2));
16419 
16420   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16421   ins_encode %{
16422     __ cmpq($op1$$Register, $op2$$Register);
16423   %}
16424   ins_pipe(ialu_cr_reg_reg);
16425 %}
16426 
16427 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16428 %{
16429   match(Set cr (CmpUL op1 op2));
16430 
16431   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16432   ins_encode %{
16433     __ cmpq($op1$$Register, $op2$$constant);
16434   %}
16435   ins_pipe(ialu_cr_reg_imm);
16436 %}
16437 
16438 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16439 %{
16440   match(Set cr (CmpUL op1 (LoadL op2)));
16441 
16442   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16443   ins_encode %{
16444     __ cmpq($op1$$Register, $op2$$Address);
16445   %}
16446   ins_pipe(ialu_cr_reg_mem);
16447 %}
16448 
16449 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16450 %{
16451   match(Set cr (CmpUL src zero));
16452 
16453   format %{ "testq   $src, $src\t# unsigned" %}
16454   ins_encode %{
16455     __ testq($src$$Register, $src$$Register);
16456   %}
16457   ins_pipe(ialu_cr_reg_imm);
16458 %}
16459 
16460 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16461 %{
16462   match(Set cr (CmpI (LoadB mem) imm));
16463 
16464   ins_cost(125);
16465   format %{ "cmpb    $mem, $imm" %}
16466   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16467   ins_pipe(ialu_cr_reg_mem);
16468 %}
16469 
16470 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16471 %{
16472   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16473 
16474   ins_cost(125);
16475   format %{ "testb   $mem, $imm\t# ubyte" %}
16476   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16477   ins_pipe(ialu_cr_reg_mem);
16478 %}
16479 
16480 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16481 %{
16482   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16483 
16484   ins_cost(125);
16485   format %{ "testb   $mem, $imm\t# byte" %}
16486   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16487   ins_pipe(ialu_cr_reg_mem);
16488 %}
16489 
16490 //----------Max and Min--------------------------------------------------------
16491 // Min Instructions
16492 
16493 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16494 %{
16495   predicate(!UseAPX);
16496   effect(USE_DEF dst, USE src, USE cr);
16497 
16498   format %{ "cmovlgt $dst, $src\t# min" %}
16499   ins_encode %{
16500     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16501   %}
16502   ins_pipe(pipe_cmov_reg);
16503 %}
16504 
16505 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16506 %{
16507   predicate(UseAPX);
16508   effect(DEF dst, USE src1, USE src2, USE cr);
16509 
16510   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16511   ins_encode %{
16512     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16513   %}
16514   ins_pipe(pipe_cmov_reg);
16515 %}
16516 
16517 instruct minI_rReg(rRegI dst, rRegI src)
16518 %{
16519   predicate(!UseAPX);
16520   match(Set dst (MinI dst src));
16521 
16522   ins_cost(200);
16523   expand %{
16524     rFlagsReg cr;
16525     compI_rReg(cr, dst, src);
16526     cmovI_reg_g(dst, src, cr);
16527   %}
16528 %}
16529 
16530 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16531 %{
16532   predicate(UseAPX);
16533   match(Set dst (MinI src1 src2));
16534   effect(DEF dst, USE src1, USE src2);
16535 
16536   ins_cost(200);
16537   expand %{
16538     rFlagsReg cr;
16539     compI_rReg(cr, src1, src2);
16540     cmovI_reg_g_ndd(dst, src1, src2, cr);
16541   %}
16542 %}
16543 
16544 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16545 %{
16546   predicate(!UseAPX);
16547   effect(USE_DEF dst, USE src, USE cr);
16548 
16549   format %{ "cmovllt $dst, $src\t# max" %}
16550   ins_encode %{
16551     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16552   %}
16553   ins_pipe(pipe_cmov_reg);
16554 %}
16555 
16556 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16557 %{
16558   predicate(UseAPX);
16559   effect(DEF dst, USE src1, USE src2, USE cr);
16560 
16561   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16562   ins_encode %{
16563     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16564   %}
16565   ins_pipe(pipe_cmov_reg);
16566 %}
16567 
16568 instruct maxI_rReg(rRegI dst, rRegI src)
16569 %{
16570   predicate(!UseAPX);
16571   match(Set dst (MaxI dst src));
16572 
16573   ins_cost(200);
16574   expand %{
16575     rFlagsReg cr;
16576     compI_rReg(cr, dst, src);
16577     cmovI_reg_l(dst, src, cr);
16578   %}
16579 %}
16580 
16581 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16582 %{
16583   predicate(UseAPX);
16584   match(Set dst (MaxI src1 src2));
16585   effect(DEF dst, USE src1, USE src2);
16586 
16587   ins_cost(200);
16588   expand %{
16589     rFlagsReg cr;
16590     compI_rReg(cr, src1, src2);
16591     cmovI_reg_l_ndd(dst, src1, src2, cr);
16592   %}
16593 %}
16594 
16595 // ============================================================================
16596 // Branch Instructions
16597 
16598 // Jump Direct - Label defines a relative address from JMP+1
16599 instruct jmpDir(label labl)
16600 %{
16601   match(Goto);
16602   effect(USE labl);
16603 
16604   ins_cost(300);
16605   format %{ "jmp     $labl" %}
16606   size(5);
16607   ins_encode %{
16608     Label* L = $labl$$label;
16609     __ jmp(*L, false); // Always long jump
16610   %}
16611   ins_pipe(pipe_jmp);
16612 %}
16613 
16614 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16615 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16616 %{
16617   match(If cop cr);
16618   effect(USE labl);
16619 
16620   ins_cost(300);
16621   format %{ "j$cop     $labl" %}
16622   size(6);
16623   ins_encode %{
16624     Label* L = $labl$$label;
16625     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16626   %}
16627   ins_pipe(pipe_jcc);
16628 %}
16629 
16630 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16631 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16632 %{
16633   match(CountedLoopEnd cop cr);
16634   effect(USE labl);
16635 
16636   ins_cost(300);
16637   format %{ "j$cop     $labl\t# loop end" %}
16638   size(6);
16639   ins_encode %{
16640     Label* L = $labl$$label;
16641     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16642   %}
16643   ins_pipe(pipe_jcc);
16644 %}
16645 
16646 // Jump Direct Conditional - using unsigned comparison
16647 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16648   match(If cop cmp);
16649   effect(USE labl);
16650 
16651   ins_cost(300);
16652   format %{ "j$cop,u   $labl" %}
16653   size(6);
16654   ins_encode %{
16655     Label* L = $labl$$label;
16656     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16657   %}
16658   ins_pipe(pipe_jcc);
16659 %}
16660 
16661 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16662   match(If cop cmp);
16663   effect(USE labl);
16664 
16665   ins_cost(200);
16666   format %{ "j$cop,u   $labl" %}
16667   size(6);
16668   ins_encode %{
16669     Label* L = $labl$$label;
16670     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16671   %}
16672   ins_pipe(pipe_jcc);
16673 %}
16674 
16675 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16676   match(If cop cmp);
16677   effect(USE labl);
16678 
16679   ins_cost(200);
16680   format %{ $$template
16681     if ($cop$$cmpcode == Assembler::notEqual) {
16682       $$emit$$"jp,u    $labl\n\t"
16683       $$emit$$"j$cop,u   $labl"
16684     } else {
16685       $$emit$$"jp,u    done\n\t"
16686       $$emit$$"j$cop,u   $labl\n\t"
16687       $$emit$$"done:"
16688     }
16689   %}
16690   ins_encode %{
16691     Label* l = $labl$$label;
16692     if ($cop$$cmpcode == Assembler::notEqual) {
16693       __ jcc(Assembler::parity, *l, false);
16694       __ jcc(Assembler::notEqual, *l, false);
16695     } else if ($cop$$cmpcode == Assembler::equal) {
16696       Label done;
16697       __ jccb(Assembler::parity, done);
16698       __ jcc(Assembler::equal, *l, false);
16699       __ bind(done);
16700     } else {
16701        ShouldNotReachHere();
16702     }
16703   %}
16704   ins_pipe(pipe_jcc);
16705 %}
16706 
16707 // ============================================================================
16708 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
16709 // superklass array for an instance of the superklass.  Set a hidden
16710 // internal cache on a hit (cache is checked with exposed code in
16711 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
16712 // encoding ALSO sets flags.
16713 
16714 instruct partialSubtypeCheck(rdi_RegP result,
16715                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16716                              rFlagsReg cr)
16717 %{
16718   match(Set result (PartialSubtypeCheck sub super));
16719   predicate(!UseSecondarySupersTable);
16720   effect(KILL rcx, KILL cr);
16721 
16722   ins_cost(1100);  // slightly larger than the next version
16723   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16724             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16725             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16726             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16727             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
16728             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16729             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
16730     "miss:\t" %}
16731 
16732   ins_encode %{
16733     Label miss;
16734     // NB: Callers may assume that, when $result is a valid register,
16735     // check_klass_subtype_slow_path_linear sets it to a nonzero
16736     // value.
16737     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16738                                             $rcx$$Register, $result$$Register,
16739                                             nullptr, &miss,
16740                                             /*set_cond_codes:*/ true);
16741     __ xorptr($result$$Register, $result$$Register);
16742     __ bind(miss);
16743   %}
16744 
16745   ins_pipe(pipe_slow);
16746 %}
16747 
16748 // ============================================================================
16749 // Two versions of hashtable-based partialSubtypeCheck, both used when
16750 // we need to search for a super class in the secondary supers array.
16751 // The first is used when we don't know _a priori_ the class being
16752 // searched for. The second, far more common, is used when we do know:
16753 // this is used for instanceof, checkcast, and any case where C2 can
16754 // determine it by constant propagation.
16755 
16756 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
16757                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16758                                        rFlagsReg cr)
16759 %{
16760   match(Set result (PartialSubtypeCheck sub super));
16761   predicate(UseSecondarySupersTable);
16762   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16763 
16764   ins_cost(1000);
16765   format %{ "partialSubtypeCheck $result, $sub, $super" %}
16766 
16767   ins_encode %{
16768     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
16769 					 $temp3$$Register, $temp4$$Register, $result$$Register);
16770   %}
16771 
16772   ins_pipe(pipe_slow);
16773 %}
16774 
16775 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
16776                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16777                                        rFlagsReg cr)
16778 %{
16779   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
16780   predicate(UseSecondarySupersTable);
16781   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16782 
16783   ins_cost(700);  // smaller than the next version
16784   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
16785 
16786   ins_encode %{
16787     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
16788     if (InlineSecondarySupersTest) {
16789       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
16790                                        $temp3$$Register, $temp4$$Register, $result$$Register,
16791                                        super_klass_slot);
16792     } else {
16793       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
16794     }
16795   %}
16796 
16797   ins_pipe(pipe_slow);
16798 %}
16799 
16800 // ============================================================================
16801 // Branch Instructions -- short offset versions
16802 //
16803 // These instructions are used to replace jumps of a long offset (the default
16804 // match) with jumps of a shorter offset.  These instructions are all tagged
16805 // with the ins_short_branch attribute, which causes the ADLC to suppress the
16806 // match rules in general matching.  Instead, the ADLC generates a conversion
16807 // method in the MachNode which can be used to do in-place replacement of the
16808 // long variant with the shorter variant.  The compiler will determine if a
16809 // branch can be taken by the is_short_branch_offset() predicate in the machine
16810 // specific code section of the file.
16811 
16812 // Jump Direct - Label defines a relative address from JMP+1
16813 instruct jmpDir_short(label labl) %{
16814   match(Goto);
16815   effect(USE labl);
16816 
16817   ins_cost(300);
16818   format %{ "jmp,s   $labl" %}
16819   size(2);
16820   ins_encode %{
16821     Label* L = $labl$$label;
16822     __ jmpb(*L);
16823   %}
16824   ins_pipe(pipe_jmp);
16825   ins_short_branch(1);
16826 %}
16827 
16828 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16829 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
16830   match(If cop cr);
16831   effect(USE labl);
16832 
16833   ins_cost(300);
16834   format %{ "j$cop,s   $labl" %}
16835   size(2);
16836   ins_encode %{
16837     Label* L = $labl$$label;
16838     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16839   %}
16840   ins_pipe(pipe_jcc);
16841   ins_short_branch(1);
16842 %}
16843 
16844 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16845 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
16846   match(CountedLoopEnd cop cr);
16847   effect(USE labl);
16848 
16849   ins_cost(300);
16850   format %{ "j$cop,s   $labl\t# loop end" %}
16851   size(2);
16852   ins_encode %{
16853     Label* L = $labl$$label;
16854     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16855   %}
16856   ins_pipe(pipe_jcc);
16857   ins_short_branch(1);
16858 %}
16859 
16860 // Jump Direct Conditional - using unsigned comparison
16861 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16862   match(If cop cmp);
16863   effect(USE labl);
16864 
16865   ins_cost(300);
16866   format %{ "j$cop,us  $labl" %}
16867   size(2);
16868   ins_encode %{
16869     Label* L = $labl$$label;
16870     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16871   %}
16872   ins_pipe(pipe_jcc);
16873   ins_short_branch(1);
16874 %}
16875 
16876 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16877   match(If cop cmp);
16878   effect(USE labl);
16879 
16880   ins_cost(300);
16881   format %{ "j$cop,us  $labl" %}
16882   size(2);
16883   ins_encode %{
16884     Label* L = $labl$$label;
16885     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16886   %}
16887   ins_pipe(pipe_jcc);
16888   ins_short_branch(1);
16889 %}
16890 
16891 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16892   match(If cop cmp);
16893   effect(USE labl);
16894 
16895   ins_cost(300);
16896   format %{ $$template
16897     if ($cop$$cmpcode == Assembler::notEqual) {
16898       $$emit$$"jp,u,s  $labl\n\t"
16899       $$emit$$"j$cop,u,s  $labl"
16900     } else {
16901       $$emit$$"jp,u,s  done\n\t"
16902       $$emit$$"j$cop,u,s  $labl\n\t"
16903       $$emit$$"done:"
16904     }
16905   %}
16906   size(4);
16907   ins_encode %{
16908     Label* l = $labl$$label;
16909     if ($cop$$cmpcode == Assembler::notEqual) {
16910       __ jccb(Assembler::parity, *l);
16911       __ jccb(Assembler::notEqual, *l);
16912     } else if ($cop$$cmpcode == Assembler::equal) {
16913       Label done;
16914       __ jccb(Assembler::parity, done);
16915       __ jccb(Assembler::equal, *l);
16916       __ bind(done);
16917     } else {
16918        ShouldNotReachHere();
16919     }
16920   %}
16921   ins_pipe(pipe_jcc);
16922   ins_short_branch(1);
16923 %}
16924 
16925 // ============================================================================
16926 // inlined locking and unlocking
16927 
16928 instruct cmpFastLockLightweight(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
16929   match(Set cr (FastLock object box));
16930   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
16931   ins_cost(300);
16932   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
16933   ins_encode %{
16934     __ fast_lock_lightweight($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16935   %}
16936   ins_pipe(pipe_slow);
16937 %}
16938 
16939 instruct cmpFastUnlockLightweight(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
16940   match(Set cr (FastUnlock object rax_reg));
16941   effect(TEMP tmp, USE_KILL rax_reg);
16942   ins_cost(300);
16943   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
16944   ins_encode %{
16945     __ fast_unlock_lightweight($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16946   %}
16947   ins_pipe(pipe_slow);
16948 %}
16949 
16950 
16951 // ============================================================================
16952 // Safepoint Instructions
16953 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
16954 %{
16955   match(SafePoint poll);
16956   effect(KILL cr, USE poll);
16957 
16958   format %{ "testl   rax, [$poll]\t"
16959             "# Safepoint: poll for GC" %}
16960   ins_cost(125);
16961   ins_encode %{
16962     __ relocate(relocInfo::poll_type);
16963     address pre_pc = __ pc();
16964     __ testl(rax, Address($poll$$Register, 0));
16965     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
16966   %}
16967   ins_pipe(ialu_reg_mem);
16968 %}
16969 
16970 instruct mask_all_evexL(kReg dst, rRegL src) %{
16971   match(Set dst (MaskAll src));
16972   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
16973   ins_encode %{
16974     int mask_len = Matcher::vector_length(this);
16975     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
16976   %}
16977   ins_pipe( pipe_slow );
16978 %}
16979 
16980 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
16981   predicate(Matcher::vector_length(n) > 32);
16982   match(Set dst (MaskAll src));
16983   effect(TEMP tmp);
16984   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
16985   ins_encode %{
16986     int mask_len = Matcher::vector_length(this);
16987     __ movslq($tmp$$Register, $src$$Register);
16988     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
16989   %}
16990   ins_pipe( pipe_slow );
16991 %}
16992 
16993 // ============================================================================
16994 // Procedure Call/Return Instructions
16995 // Call Java Static Instruction
16996 // Note: If this code changes, the corresponding ret_addr_offset() and
16997 //       compute_padding() functions will have to be adjusted.
16998 instruct CallStaticJavaDirect(method meth) %{
16999   match(CallStaticJava);
17000   effect(USE meth);
17001 
17002   ins_cost(300);
17003   format %{ "call,static " %}
17004   opcode(0xE8); /* E8 cd */
17005   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17006   ins_pipe(pipe_slow);
17007   ins_alignment(4);
17008 %}
17009 
17010 // Call Java Dynamic Instruction
17011 // Note: If this code changes, the corresponding ret_addr_offset() and
17012 //       compute_padding() functions will have to be adjusted.
17013 instruct CallDynamicJavaDirect(method meth)
17014 %{
17015   match(CallDynamicJava);
17016   effect(USE meth);
17017 
17018   ins_cost(300);
17019   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
17020             "call,dynamic " %}
17021   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17022   ins_pipe(pipe_slow);
17023   ins_alignment(4);
17024 %}
17025 
17026 // Call Runtime Instruction
17027 instruct CallRuntimeDirect(method meth)
17028 %{
17029   match(CallRuntime);
17030   effect(USE meth);
17031 
17032   ins_cost(300);
17033   format %{ "call,runtime " %}
17034   ins_encode(clear_avx, Java_To_Runtime(meth));
17035   ins_pipe(pipe_slow);
17036 %}
17037 
17038 // Call runtime without safepoint
17039 instruct CallLeafDirect(method meth)
17040 %{
17041   match(CallLeaf);
17042   effect(USE meth);
17043 
17044   ins_cost(300);
17045   format %{ "call_leaf,runtime " %}
17046   ins_encode(clear_avx, Java_To_Runtime(meth));
17047   ins_pipe(pipe_slow);
17048 %}
17049 
17050 // Call runtime without safepoint and with vector arguments
17051 instruct CallLeafDirectVector(method meth)
17052 %{
17053   match(CallLeafVector);
17054   effect(USE meth);
17055 
17056   ins_cost(300);
17057   format %{ "call_leaf,vector " %}
17058   ins_encode(Java_To_Runtime(meth));
17059   ins_pipe(pipe_slow);
17060 %}
17061 
17062 // Call runtime without safepoint
17063 instruct CallLeafNoFPDirect(method meth)
17064 %{
17065   match(CallLeafNoFP);
17066   effect(USE meth);
17067 
17068   ins_cost(300);
17069   format %{ "call_leaf_nofp,runtime " %}
17070   ins_encode(clear_avx, Java_To_Runtime(meth));
17071   ins_pipe(pipe_slow);
17072 %}
17073 
17074 // Return Instruction
17075 // Remove the return address & jump to it.
17076 // Notice: We always emit a nop after a ret to make sure there is room
17077 // for safepoint patching
17078 instruct Ret()
17079 %{
17080   match(Return);
17081 
17082   format %{ "ret" %}
17083   ins_encode %{
17084     __ ret(0);
17085   %}
17086   ins_pipe(pipe_jmp);
17087 %}
17088 
17089 // Tail Call; Jump from runtime stub to Java code.
17090 // Also known as an 'interprocedural jump'.
17091 // Target of jump will eventually return to caller.
17092 // TailJump below removes the return address.
17093 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17094 // emitted just above the TailCall which has reset rbp to the caller state.
17095 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17096 %{
17097   match(TailCall jump_target method_ptr);
17098 
17099   ins_cost(300);
17100   format %{ "jmp     $jump_target\t# rbx holds method" %}
17101   ins_encode %{
17102     __ jmp($jump_target$$Register);
17103   %}
17104   ins_pipe(pipe_jmp);
17105 %}
17106 
17107 // Tail Jump; remove the return address; jump to target.
17108 // TailCall above leaves the return address around.
17109 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17110 %{
17111   match(TailJump jump_target ex_oop);
17112 
17113   ins_cost(300);
17114   format %{ "popq    rdx\t# pop return address\n\t"
17115             "jmp     $jump_target" %}
17116   ins_encode %{
17117     __ popq(as_Register(RDX_enc));
17118     __ jmp($jump_target$$Register);
17119   %}
17120   ins_pipe(pipe_jmp);
17121 %}
17122 
17123 // Forward exception.
17124 instruct ForwardExceptionjmp()
17125 %{
17126   match(ForwardException);
17127 
17128   format %{ "jmp     forward_exception_stub" %}
17129   ins_encode %{
17130     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17131   %}
17132   ins_pipe(pipe_jmp);
17133 %}
17134 
17135 // Create exception oop: created by stack-crawling runtime code.
17136 // Created exception is now available to this handler, and is setup
17137 // just prior to jumping to this handler.  No code emitted.
17138 instruct CreateException(rax_RegP ex_oop)
17139 %{
17140   match(Set ex_oop (CreateEx));
17141 
17142   size(0);
17143   // use the following format syntax
17144   format %{ "# exception oop is in rax; no code emitted" %}
17145   ins_encode();
17146   ins_pipe(empty);
17147 %}
17148 
17149 // Rethrow exception:
17150 // The exception oop will come in the first argument position.
17151 // Then JUMP (not call) to the rethrow stub code.
17152 instruct RethrowException()
17153 %{
17154   match(Rethrow);
17155 
17156   // use the following format syntax
17157   format %{ "jmp     rethrow_stub" %}
17158   ins_encode %{
17159     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17160   %}
17161   ins_pipe(pipe_jmp);
17162 %}
17163 
17164 // ============================================================================
17165 // This name is KNOWN by the ADLC and cannot be changed.
17166 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17167 // for this guy.
17168 instruct tlsLoadP(r15_RegP dst) %{
17169   match(Set dst (ThreadLocal));
17170   effect(DEF dst);
17171 
17172   size(0);
17173   format %{ "# TLS is in R15" %}
17174   ins_encode( /*empty encoding*/ );
17175   ins_pipe(ialu_reg_reg);
17176 %}
17177 
17178 instruct addF_reg(regF dst, regF src) %{
17179   predicate(UseAVX == 0);
17180   match(Set dst (AddF dst src));
17181 
17182   format %{ "addss   $dst, $src" %}
17183   ins_cost(150);
17184   ins_encode %{
17185     __ addss($dst$$XMMRegister, $src$$XMMRegister);
17186   %}
17187   ins_pipe(pipe_slow);
17188 %}
17189 
17190 instruct addF_mem(regF dst, memory src) %{
17191   predicate(UseAVX == 0);
17192   match(Set dst (AddF dst (LoadF src)));
17193 
17194   format %{ "addss   $dst, $src" %}
17195   ins_cost(150);
17196   ins_encode %{
17197     __ addss($dst$$XMMRegister, $src$$Address);
17198   %}
17199   ins_pipe(pipe_slow);
17200 %}
17201 
17202 instruct addF_imm(regF dst, immF con) %{
17203   predicate(UseAVX == 0);
17204   match(Set dst (AddF dst con));
17205   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17206   ins_cost(150);
17207   ins_encode %{
17208     __ addss($dst$$XMMRegister, $constantaddress($con));
17209   %}
17210   ins_pipe(pipe_slow);
17211 %}
17212 
17213 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17214   predicate(UseAVX > 0);
17215   match(Set dst (AddF src1 src2));
17216 
17217   format %{ "vaddss  $dst, $src1, $src2" %}
17218   ins_cost(150);
17219   ins_encode %{
17220     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17221   %}
17222   ins_pipe(pipe_slow);
17223 %}
17224 
17225 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17226   predicate(UseAVX > 0);
17227   match(Set dst (AddF src1 (LoadF src2)));
17228 
17229   format %{ "vaddss  $dst, $src1, $src2" %}
17230   ins_cost(150);
17231   ins_encode %{
17232     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17233   %}
17234   ins_pipe(pipe_slow);
17235 %}
17236 
17237 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17238   predicate(UseAVX > 0);
17239   match(Set dst (AddF src con));
17240 
17241   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17242   ins_cost(150);
17243   ins_encode %{
17244     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17245   %}
17246   ins_pipe(pipe_slow);
17247 %}
17248 
17249 instruct addD_reg(regD dst, regD src) %{
17250   predicate(UseAVX == 0);
17251   match(Set dst (AddD dst src));
17252 
17253   format %{ "addsd   $dst, $src" %}
17254   ins_cost(150);
17255   ins_encode %{
17256     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17257   %}
17258   ins_pipe(pipe_slow);
17259 %}
17260 
17261 instruct addD_mem(regD dst, memory src) %{
17262   predicate(UseAVX == 0);
17263   match(Set dst (AddD dst (LoadD src)));
17264 
17265   format %{ "addsd   $dst, $src" %}
17266   ins_cost(150);
17267   ins_encode %{
17268     __ addsd($dst$$XMMRegister, $src$$Address);
17269   %}
17270   ins_pipe(pipe_slow);
17271 %}
17272 
17273 instruct addD_imm(regD dst, immD con) %{
17274   predicate(UseAVX == 0);
17275   match(Set dst (AddD dst con));
17276   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17277   ins_cost(150);
17278   ins_encode %{
17279     __ addsd($dst$$XMMRegister, $constantaddress($con));
17280   %}
17281   ins_pipe(pipe_slow);
17282 %}
17283 
17284 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17285   predicate(UseAVX > 0);
17286   match(Set dst (AddD src1 src2));
17287 
17288   format %{ "vaddsd  $dst, $src1, $src2" %}
17289   ins_cost(150);
17290   ins_encode %{
17291     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17292   %}
17293   ins_pipe(pipe_slow);
17294 %}
17295 
17296 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17297   predicate(UseAVX > 0);
17298   match(Set dst (AddD src1 (LoadD src2)));
17299 
17300   format %{ "vaddsd  $dst, $src1, $src2" %}
17301   ins_cost(150);
17302   ins_encode %{
17303     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17304   %}
17305   ins_pipe(pipe_slow);
17306 %}
17307 
17308 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17309   predicate(UseAVX > 0);
17310   match(Set dst (AddD src con));
17311 
17312   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17313   ins_cost(150);
17314   ins_encode %{
17315     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17316   %}
17317   ins_pipe(pipe_slow);
17318 %}
17319 
17320 instruct subF_reg(regF dst, regF src) %{
17321   predicate(UseAVX == 0);
17322   match(Set dst (SubF dst src));
17323 
17324   format %{ "subss   $dst, $src" %}
17325   ins_cost(150);
17326   ins_encode %{
17327     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17328   %}
17329   ins_pipe(pipe_slow);
17330 %}
17331 
17332 instruct subF_mem(regF dst, memory src) %{
17333   predicate(UseAVX == 0);
17334   match(Set dst (SubF dst (LoadF src)));
17335 
17336   format %{ "subss   $dst, $src" %}
17337   ins_cost(150);
17338   ins_encode %{
17339     __ subss($dst$$XMMRegister, $src$$Address);
17340   %}
17341   ins_pipe(pipe_slow);
17342 %}
17343 
17344 instruct subF_imm(regF dst, immF con) %{
17345   predicate(UseAVX == 0);
17346   match(Set dst (SubF dst con));
17347   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17348   ins_cost(150);
17349   ins_encode %{
17350     __ subss($dst$$XMMRegister, $constantaddress($con));
17351   %}
17352   ins_pipe(pipe_slow);
17353 %}
17354 
17355 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17356   predicate(UseAVX > 0);
17357   match(Set dst (SubF src1 src2));
17358 
17359   format %{ "vsubss  $dst, $src1, $src2" %}
17360   ins_cost(150);
17361   ins_encode %{
17362     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17363   %}
17364   ins_pipe(pipe_slow);
17365 %}
17366 
17367 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17368   predicate(UseAVX > 0);
17369   match(Set dst (SubF src1 (LoadF src2)));
17370 
17371   format %{ "vsubss  $dst, $src1, $src2" %}
17372   ins_cost(150);
17373   ins_encode %{
17374     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17375   %}
17376   ins_pipe(pipe_slow);
17377 %}
17378 
17379 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17380   predicate(UseAVX > 0);
17381   match(Set dst (SubF src con));
17382 
17383   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17384   ins_cost(150);
17385   ins_encode %{
17386     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17387   %}
17388   ins_pipe(pipe_slow);
17389 %}
17390 
17391 instruct subD_reg(regD dst, regD src) %{
17392   predicate(UseAVX == 0);
17393   match(Set dst (SubD dst src));
17394 
17395   format %{ "subsd   $dst, $src" %}
17396   ins_cost(150);
17397   ins_encode %{
17398     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17399   %}
17400   ins_pipe(pipe_slow);
17401 %}
17402 
17403 instruct subD_mem(regD dst, memory src) %{
17404   predicate(UseAVX == 0);
17405   match(Set dst (SubD dst (LoadD src)));
17406 
17407   format %{ "subsd   $dst, $src" %}
17408   ins_cost(150);
17409   ins_encode %{
17410     __ subsd($dst$$XMMRegister, $src$$Address);
17411   %}
17412   ins_pipe(pipe_slow);
17413 %}
17414 
17415 instruct subD_imm(regD dst, immD con) %{
17416   predicate(UseAVX == 0);
17417   match(Set dst (SubD dst con));
17418   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17419   ins_cost(150);
17420   ins_encode %{
17421     __ subsd($dst$$XMMRegister, $constantaddress($con));
17422   %}
17423   ins_pipe(pipe_slow);
17424 %}
17425 
17426 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17427   predicate(UseAVX > 0);
17428   match(Set dst (SubD src1 src2));
17429 
17430   format %{ "vsubsd  $dst, $src1, $src2" %}
17431   ins_cost(150);
17432   ins_encode %{
17433     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17434   %}
17435   ins_pipe(pipe_slow);
17436 %}
17437 
17438 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17439   predicate(UseAVX > 0);
17440   match(Set dst (SubD src1 (LoadD src2)));
17441 
17442   format %{ "vsubsd  $dst, $src1, $src2" %}
17443   ins_cost(150);
17444   ins_encode %{
17445     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17446   %}
17447   ins_pipe(pipe_slow);
17448 %}
17449 
17450 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17451   predicate(UseAVX > 0);
17452   match(Set dst (SubD src con));
17453 
17454   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17455   ins_cost(150);
17456   ins_encode %{
17457     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17458   %}
17459   ins_pipe(pipe_slow);
17460 %}
17461 
17462 instruct mulF_reg(regF dst, regF src) %{
17463   predicate(UseAVX == 0);
17464   match(Set dst (MulF dst src));
17465 
17466   format %{ "mulss   $dst, $src" %}
17467   ins_cost(150);
17468   ins_encode %{
17469     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17470   %}
17471   ins_pipe(pipe_slow);
17472 %}
17473 
17474 instruct mulF_mem(regF dst, memory src) %{
17475   predicate(UseAVX == 0);
17476   match(Set dst (MulF dst (LoadF src)));
17477 
17478   format %{ "mulss   $dst, $src" %}
17479   ins_cost(150);
17480   ins_encode %{
17481     __ mulss($dst$$XMMRegister, $src$$Address);
17482   %}
17483   ins_pipe(pipe_slow);
17484 %}
17485 
17486 instruct mulF_imm(regF dst, immF con) %{
17487   predicate(UseAVX == 0);
17488   match(Set dst (MulF dst con));
17489   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17490   ins_cost(150);
17491   ins_encode %{
17492     __ mulss($dst$$XMMRegister, $constantaddress($con));
17493   %}
17494   ins_pipe(pipe_slow);
17495 %}
17496 
17497 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17498   predicate(UseAVX > 0);
17499   match(Set dst (MulF src1 src2));
17500 
17501   format %{ "vmulss  $dst, $src1, $src2" %}
17502   ins_cost(150);
17503   ins_encode %{
17504     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17505   %}
17506   ins_pipe(pipe_slow);
17507 %}
17508 
17509 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17510   predicate(UseAVX > 0);
17511   match(Set dst (MulF src1 (LoadF src2)));
17512 
17513   format %{ "vmulss  $dst, $src1, $src2" %}
17514   ins_cost(150);
17515   ins_encode %{
17516     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17517   %}
17518   ins_pipe(pipe_slow);
17519 %}
17520 
17521 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17522   predicate(UseAVX > 0);
17523   match(Set dst (MulF src con));
17524 
17525   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17526   ins_cost(150);
17527   ins_encode %{
17528     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17529   %}
17530   ins_pipe(pipe_slow);
17531 %}
17532 
17533 instruct mulD_reg(regD dst, regD src) %{
17534   predicate(UseAVX == 0);
17535   match(Set dst (MulD dst src));
17536 
17537   format %{ "mulsd   $dst, $src" %}
17538   ins_cost(150);
17539   ins_encode %{
17540     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17541   %}
17542   ins_pipe(pipe_slow);
17543 %}
17544 
17545 instruct mulD_mem(regD dst, memory src) %{
17546   predicate(UseAVX == 0);
17547   match(Set dst (MulD dst (LoadD src)));
17548 
17549   format %{ "mulsd   $dst, $src" %}
17550   ins_cost(150);
17551   ins_encode %{
17552     __ mulsd($dst$$XMMRegister, $src$$Address);
17553   %}
17554   ins_pipe(pipe_slow);
17555 %}
17556 
17557 instruct mulD_imm(regD dst, immD con) %{
17558   predicate(UseAVX == 0);
17559   match(Set dst (MulD dst con));
17560   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17561   ins_cost(150);
17562   ins_encode %{
17563     __ mulsd($dst$$XMMRegister, $constantaddress($con));
17564   %}
17565   ins_pipe(pipe_slow);
17566 %}
17567 
17568 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17569   predicate(UseAVX > 0);
17570   match(Set dst (MulD src1 src2));
17571 
17572   format %{ "vmulsd  $dst, $src1, $src2" %}
17573   ins_cost(150);
17574   ins_encode %{
17575     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17576   %}
17577   ins_pipe(pipe_slow);
17578 %}
17579 
17580 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17581   predicate(UseAVX > 0);
17582   match(Set dst (MulD src1 (LoadD src2)));
17583 
17584   format %{ "vmulsd  $dst, $src1, $src2" %}
17585   ins_cost(150);
17586   ins_encode %{
17587     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17588   %}
17589   ins_pipe(pipe_slow);
17590 %}
17591 
17592 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17593   predicate(UseAVX > 0);
17594   match(Set dst (MulD src con));
17595 
17596   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17597   ins_cost(150);
17598   ins_encode %{
17599     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17600   %}
17601   ins_pipe(pipe_slow);
17602 %}
17603 
17604 instruct divF_reg(regF dst, regF src) %{
17605   predicate(UseAVX == 0);
17606   match(Set dst (DivF dst src));
17607 
17608   format %{ "divss   $dst, $src" %}
17609   ins_cost(150);
17610   ins_encode %{
17611     __ divss($dst$$XMMRegister, $src$$XMMRegister);
17612   %}
17613   ins_pipe(pipe_slow);
17614 %}
17615 
17616 instruct divF_mem(regF dst, memory src) %{
17617   predicate(UseAVX == 0);
17618   match(Set dst (DivF dst (LoadF src)));
17619 
17620   format %{ "divss   $dst, $src" %}
17621   ins_cost(150);
17622   ins_encode %{
17623     __ divss($dst$$XMMRegister, $src$$Address);
17624   %}
17625   ins_pipe(pipe_slow);
17626 %}
17627 
17628 instruct divF_imm(regF dst, immF con) %{
17629   predicate(UseAVX == 0);
17630   match(Set dst (DivF dst con));
17631   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17632   ins_cost(150);
17633   ins_encode %{
17634     __ divss($dst$$XMMRegister, $constantaddress($con));
17635   %}
17636   ins_pipe(pipe_slow);
17637 %}
17638 
17639 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17640   predicate(UseAVX > 0);
17641   match(Set dst (DivF src1 src2));
17642 
17643   format %{ "vdivss  $dst, $src1, $src2" %}
17644   ins_cost(150);
17645   ins_encode %{
17646     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17647   %}
17648   ins_pipe(pipe_slow);
17649 %}
17650 
17651 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17652   predicate(UseAVX > 0);
17653   match(Set dst (DivF src1 (LoadF src2)));
17654 
17655   format %{ "vdivss  $dst, $src1, $src2" %}
17656   ins_cost(150);
17657   ins_encode %{
17658     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17659   %}
17660   ins_pipe(pipe_slow);
17661 %}
17662 
17663 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17664   predicate(UseAVX > 0);
17665   match(Set dst (DivF src con));
17666 
17667   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17668   ins_cost(150);
17669   ins_encode %{
17670     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17671   %}
17672   ins_pipe(pipe_slow);
17673 %}
17674 
17675 instruct divD_reg(regD dst, regD src) %{
17676   predicate(UseAVX == 0);
17677   match(Set dst (DivD dst src));
17678 
17679   format %{ "divsd   $dst, $src" %}
17680   ins_cost(150);
17681   ins_encode %{
17682     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17683   %}
17684   ins_pipe(pipe_slow);
17685 %}
17686 
17687 instruct divD_mem(regD dst, memory src) %{
17688   predicate(UseAVX == 0);
17689   match(Set dst (DivD dst (LoadD src)));
17690 
17691   format %{ "divsd   $dst, $src" %}
17692   ins_cost(150);
17693   ins_encode %{
17694     __ divsd($dst$$XMMRegister, $src$$Address);
17695   %}
17696   ins_pipe(pipe_slow);
17697 %}
17698 
17699 instruct divD_imm(regD dst, immD con) %{
17700   predicate(UseAVX == 0);
17701   match(Set dst (DivD dst con));
17702   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17703   ins_cost(150);
17704   ins_encode %{
17705     __ divsd($dst$$XMMRegister, $constantaddress($con));
17706   %}
17707   ins_pipe(pipe_slow);
17708 %}
17709 
17710 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17711   predicate(UseAVX > 0);
17712   match(Set dst (DivD src1 src2));
17713 
17714   format %{ "vdivsd  $dst, $src1, $src2" %}
17715   ins_cost(150);
17716   ins_encode %{
17717     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17718   %}
17719   ins_pipe(pipe_slow);
17720 %}
17721 
17722 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17723   predicate(UseAVX > 0);
17724   match(Set dst (DivD src1 (LoadD src2)));
17725 
17726   format %{ "vdivsd  $dst, $src1, $src2" %}
17727   ins_cost(150);
17728   ins_encode %{
17729     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17730   %}
17731   ins_pipe(pipe_slow);
17732 %}
17733 
17734 instruct divD_reg_imm(regD dst, regD src, immD con) %{
17735   predicate(UseAVX > 0);
17736   match(Set dst (DivD src con));
17737 
17738   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17739   ins_cost(150);
17740   ins_encode %{
17741     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17742   %}
17743   ins_pipe(pipe_slow);
17744 %}
17745 
17746 instruct absF_reg(regF dst) %{
17747   predicate(UseAVX == 0);
17748   match(Set dst (AbsF dst));
17749   ins_cost(150);
17750   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
17751   ins_encode %{
17752     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
17753   %}
17754   ins_pipe(pipe_slow);
17755 %}
17756 
17757 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
17758   predicate(UseAVX > 0);
17759   match(Set dst (AbsF src));
17760   ins_cost(150);
17761   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
17762   ins_encode %{
17763     int vlen_enc = Assembler::AVX_128bit;
17764     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
17765               ExternalAddress(float_signmask()), vlen_enc);
17766   %}
17767   ins_pipe(pipe_slow);
17768 %}
17769 
17770 instruct absD_reg(regD dst) %{
17771   predicate(UseAVX == 0);
17772   match(Set dst (AbsD dst));
17773   ins_cost(150);
17774   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
17775             "# abs double by sign masking" %}
17776   ins_encode %{
17777     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
17778   %}
17779   ins_pipe(pipe_slow);
17780 %}
17781 
17782 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
17783   predicate(UseAVX > 0);
17784   match(Set dst (AbsD src));
17785   ins_cost(150);
17786   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
17787             "# abs double by sign masking" %}
17788   ins_encode %{
17789     int vlen_enc = Assembler::AVX_128bit;
17790     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
17791               ExternalAddress(double_signmask()), vlen_enc);
17792   %}
17793   ins_pipe(pipe_slow);
17794 %}
17795 
17796 instruct negF_reg(regF dst) %{
17797   predicate(UseAVX == 0);
17798   match(Set dst (NegF dst));
17799   ins_cost(150);
17800   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
17801   ins_encode %{
17802     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
17803   %}
17804   ins_pipe(pipe_slow);
17805 %}
17806 
17807 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
17808   predicate(UseAVX > 0);
17809   match(Set dst (NegF src));
17810   ins_cost(150);
17811   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
17812   ins_encode %{
17813     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
17814                  ExternalAddress(float_signflip()));
17815   %}
17816   ins_pipe(pipe_slow);
17817 %}
17818 
17819 instruct negD_reg(regD dst) %{
17820   predicate(UseAVX == 0);
17821   match(Set dst (NegD dst));
17822   ins_cost(150);
17823   format %{ "xorpd   $dst, [0x8000000000000000]\t"
17824             "# neg double by sign flipping" %}
17825   ins_encode %{
17826     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
17827   %}
17828   ins_pipe(pipe_slow);
17829 %}
17830 
17831 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
17832   predicate(UseAVX > 0);
17833   match(Set dst (NegD src));
17834   ins_cost(150);
17835   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
17836             "# neg double by sign flipping" %}
17837   ins_encode %{
17838     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
17839                  ExternalAddress(double_signflip()));
17840   %}
17841   ins_pipe(pipe_slow);
17842 %}
17843 
17844 // sqrtss instruction needs destination register to be pre initialized for best performance
17845 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17846 instruct sqrtF_reg(regF dst) %{
17847   match(Set dst (SqrtF dst));
17848   format %{ "sqrtss  $dst, $dst" %}
17849   ins_encode %{
17850     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
17851   %}
17852   ins_pipe(pipe_slow);
17853 %}
17854 
17855 // sqrtsd instruction needs destination register to be pre initialized for best performance
17856 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17857 instruct sqrtD_reg(regD dst) %{
17858   match(Set dst (SqrtD dst));
17859   format %{ "sqrtsd  $dst, $dst" %}
17860   ins_encode %{
17861     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
17862   %}
17863   ins_pipe(pipe_slow);
17864 %}
17865 
17866 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
17867   effect(TEMP tmp);
17868   match(Set dst (ConvF2HF src));
17869   ins_cost(125);
17870   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
17871   ins_encode %{
17872     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
17873   %}
17874   ins_pipe( pipe_slow );
17875 %}
17876 
17877 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
17878   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
17879   effect(TEMP ktmp, TEMP rtmp);
17880   match(Set mem (StoreC mem (ConvF2HF src)));
17881   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
17882   ins_encode %{
17883     __ movl($rtmp$$Register, 0x1);
17884     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
17885     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
17886   %}
17887   ins_pipe( pipe_slow );
17888 %}
17889 
17890 instruct vconvF2HF(vec dst, vec src) %{
17891   match(Set dst (VectorCastF2HF src));
17892   format %{ "vector_conv_F2HF $dst $src" %}
17893   ins_encode %{
17894     int vlen_enc = vector_length_encoding(this, $src);
17895     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
17896   %}
17897   ins_pipe( pipe_slow );
17898 %}
17899 
17900 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
17901   predicate(n->as_StoreVector()->memory_size() >= 16);
17902   match(Set mem (StoreVector mem (VectorCastF2HF src)));
17903   format %{ "vcvtps2ph $mem,$src" %}
17904   ins_encode %{
17905     int vlen_enc = vector_length_encoding(this, $src);
17906     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
17907   %}
17908   ins_pipe( pipe_slow );
17909 %}
17910 
17911 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
17912   match(Set dst (ConvHF2F src));
17913   format %{ "vcvtph2ps $dst,$src" %}
17914   ins_encode %{
17915     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
17916   %}
17917   ins_pipe( pipe_slow );
17918 %}
17919 
17920 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
17921   match(Set dst (VectorCastHF2F (LoadVector mem)));
17922   format %{ "vcvtph2ps $dst,$mem" %}
17923   ins_encode %{
17924     int vlen_enc = vector_length_encoding(this);
17925     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
17926   %}
17927   ins_pipe( pipe_slow );
17928 %}
17929 
17930 instruct vconvHF2F(vec dst, vec src) %{
17931   match(Set dst (VectorCastHF2F src));
17932   ins_cost(125);
17933   format %{ "vector_conv_HF2F $dst,$src" %}
17934   ins_encode %{
17935     int vlen_enc = vector_length_encoding(this);
17936     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
17937   %}
17938   ins_pipe( pipe_slow );
17939 %}
17940 
17941 // ---------------------------------------- VectorReinterpret ------------------------------------
17942 instruct reinterpret_mask(kReg dst) %{
17943   predicate(n->bottom_type()->isa_vectmask() &&
17944             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
17945   match(Set dst (VectorReinterpret dst));
17946   ins_cost(125);
17947   format %{ "vector_reinterpret $dst\t!" %}
17948   ins_encode %{
17949     // empty
17950   %}
17951   ins_pipe( pipe_slow );
17952 %}
17953 
17954 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
17955   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17956             n->bottom_type()->isa_vectmask() &&
17957             n->in(1)->bottom_type()->isa_vectmask() &&
17958             n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
17959             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
17960   match(Set dst (VectorReinterpret src));
17961   effect(TEMP xtmp);
17962   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
17963   ins_encode %{
17964      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
17965      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17966      assert(src_sz == dst_sz , "src and dst size mismatch");
17967      int vlen_enc = vector_length_encoding(src_sz);
17968      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
17969      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
17970   %}
17971   ins_pipe( pipe_slow );
17972 %}
17973 
17974 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
17975   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17976             n->bottom_type()->isa_vectmask() &&
17977             n->in(1)->bottom_type()->isa_vectmask() &&
17978             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
17979              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
17980             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
17981   match(Set dst (VectorReinterpret src));
17982   effect(TEMP xtmp);
17983   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
17984   ins_encode %{
17985      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
17986      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17987      assert(src_sz == dst_sz , "src and dst size mismatch");
17988      int vlen_enc = vector_length_encoding(src_sz);
17989      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
17990      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
17991   %}
17992   ins_pipe( pipe_slow );
17993 %}
17994 
17995 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
17996   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17997             n->bottom_type()->isa_vectmask() &&
17998             n->in(1)->bottom_type()->isa_vectmask() &&
17999             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18000              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18001             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18002   match(Set dst (VectorReinterpret src));
18003   effect(TEMP xtmp);
18004   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18005   ins_encode %{
18006      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18007      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18008      assert(src_sz == dst_sz , "src and dst size mismatch");
18009      int vlen_enc = vector_length_encoding(src_sz);
18010      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18011      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18012   %}
18013   ins_pipe( pipe_slow );
18014 %}
18015 
18016 instruct reinterpret(vec dst) %{
18017   predicate(!n->bottom_type()->isa_vectmask() &&
18018             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18019   match(Set dst (VectorReinterpret dst));
18020   ins_cost(125);
18021   format %{ "vector_reinterpret $dst\t!" %}
18022   ins_encode %{
18023     // empty
18024   %}
18025   ins_pipe( pipe_slow );
18026 %}
18027 
18028 instruct reinterpret_expand(vec dst, vec src) %{
18029   predicate(UseAVX == 0 &&
18030             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18031   match(Set dst (VectorReinterpret src));
18032   ins_cost(125);
18033   effect(TEMP dst);
18034   format %{ "vector_reinterpret_expand $dst,$src" %}
18035   ins_encode %{
18036     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
18037     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
18038 
18039     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18040     if (src_vlen_in_bytes == 4) {
18041       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18042     } else {
18043       assert(src_vlen_in_bytes == 8, "");
18044       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18045     }
18046     __ pand($dst$$XMMRegister, $src$$XMMRegister);
18047   %}
18048   ins_pipe( pipe_slow );
18049 %}
18050 
18051 instruct vreinterpret_expand4(legVec dst, vec src) %{
18052   predicate(UseAVX > 0 &&
18053             !n->bottom_type()->isa_vectmask() &&
18054             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18055             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18056   match(Set dst (VectorReinterpret src));
18057   ins_cost(125);
18058   format %{ "vector_reinterpret_expand $dst,$src" %}
18059   ins_encode %{
18060     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18061   %}
18062   ins_pipe( pipe_slow );
18063 %}
18064 
18065 
18066 instruct vreinterpret_expand(legVec dst, vec src) %{
18067   predicate(UseAVX > 0 &&
18068             !n->bottom_type()->isa_vectmask() &&
18069             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18070             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18071   match(Set dst (VectorReinterpret src));
18072   ins_cost(125);
18073   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18074   ins_encode %{
18075     switch (Matcher::vector_length_in_bytes(this, $src)) {
18076       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18077       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18078       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18079       default: ShouldNotReachHere();
18080     }
18081   %}
18082   ins_pipe( pipe_slow );
18083 %}
18084 
18085 instruct reinterpret_shrink(vec dst, legVec src) %{
18086   predicate(!n->bottom_type()->isa_vectmask() &&
18087             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18088   match(Set dst (VectorReinterpret src));
18089   ins_cost(125);
18090   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18091   ins_encode %{
18092     switch (Matcher::vector_length_in_bytes(this)) {
18093       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18094       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18095       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18096       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18097       default: ShouldNotReachHere();
18098     }
18099   %}
18100   ins_pipe( pipe_slow );
18101 %}
18102 
18103 // ----------------------------------------------------------------------------------------------------
18104 
18105 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18106   match(Set dst (RoundDoubleMode src rmode));
18107   format %{ "roundsd $dst,$src" %}
18108   ins_cost(150);
18109   ins_encode %{
18110     assert(UseSSE >= 4, "required");
18111     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18112       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18113     }
18114     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18115   %}
18116   ins_pipe(pipe_slow);
18117 %}
18118 
18119 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18120   match(Set dst (RoundDoubleMode con rmode));
18121   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18122   ins_cost(150);
18123   ins_encode %{
18124     assert(UseSSE >= 4, "required");
18125     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18126   %}
18127   ins_pipe(pipe_slow);
18128 %}
18129 
18130 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18131   predicate(Matcher::vector_length(n) < 8);
18132   match(Set dst (RoundDoubleModeV src rmode));
18133   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18134   ins_encode %{
18135     assert(UseAVX > 0, "required");
18136     int vlen_enc = vector_length_encoding(this);
18137     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18138   %}
18139   ins_pipe( pipe_slow );
18140 %}
18141 
18142 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18143   predicate(Matcher::vector_length(n) == 8);
18144   match(Set dst (RoundDoubleModeV src rmode));
18145   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18146   ins_encode %{
18147     assert(UseAVX > 2, "required");
18148     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18149   %}
18150   ins_pipe( pipe_slow );
18151 %}
18152 
18153 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18154   predicate(Matcher::vector_length(n) < 8);
18155   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18156   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18157   ins_encode %{
18158     assert(UseAVX > 0, "required");
18159     int vlen_enc = vector_length_encoding(this);
18160     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18161   %}
18162   ins_pipe( pipe_slow );
18163 %}
18164 
18165 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18166   predicate(Matcher::vector_length(n) == 8);
18167   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18168   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18169   ins_encode %{
18170     assert(UseAVX > 2, "required");
18171     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18172   %}
18173   ins_pipe( pipe_slow );
18174 %}
18175 
18176 instruct onspinwait() %{
18177   match(OnSpinWait);
18178   ins_cost(200);
18179 
18180   format %{
18181     $$template
18182     $$emit$$"pause\t! membar_onspinwait"
18183   %}
18184   ins_encode %{
18185     __ pause();
18186   %}
18187   ins_pipe(pipe_slow);
18188 %}
18189 
18190 // a * b + c
18191 instruct fmaD_reg(regD a, regD b, regD c) %{
18192   match(Set c (FmaD  c (Binary a b)));
18193   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18194   ins_cost(150);
18195   ins_encode %{
18196     assert(UseFMA, "Needs FMA instructions support.");
18197     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18198   %}
18199   ins_pipe( pipe_slow );
18200 %}
18201 
18202 // a * b + c
18203 instruct fmaF_reg(regF a, regF b, regF c) %{
18204   match(Set c (FmaF  c (Binary a b)));
18205   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18206   ins_cost(150);
18207   ins_encode %{
18208     assert(UseFMA, "Needs FMA instructions support.");
18209     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18210   %}
18211   ins_pipe( pipe_slow );
18212 %}
18213 
18214 // ====================VECTOR INSTRUCTIONS=====================================
18215 
18216 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18217 instruct MoveVec2Leg(legVec dst, vec src) %{
18218   match(Set dst src);
18219   format %{ "" %}
18220   ins_encode %{
18221     ShouldNotReachHere();
18222   %}
18223   ins_pipe( fpu_reg_reg );
18224 %}
18225 
18226 instruct MoveLeg2Vec(vec dst, legVec src) %{
18227   match(Set dst src);
18228   format %{ "" %}
18229   ins_encode %{
18230     ShouldNotReachHere();
18231   %}
18232   ins_pipe( fpu_reg_reg );
18233 %}
18234 
18235 // ============================================================================
18236 
18237 // Load vectors generic operand pattern
18238 instruct loadV(vec dst, memory mem) %{
18239   match(Set dst (LoadVector mem));
18240   ins_cost(125);
18241   format %{ "load_vector $dst,$mem" %}
18242   ins_encode %{
18243     BasicType bt = Matcher::vector_element_basic_type(this);
18244     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18245   %}
18246   ins_pipe( pipe_slow );
18247 %}
18248 
18249 // Store vectors generic operand pattern.
18250 instruct storeV(memory mem, vec src) %{
18251   match(Set mem (StoreVector mem src));
18252   ins_cost(145);
18253   format %{ "store_vector $mem,$src\n\t" %}
18254   ins_encode %{
18255     switch (Matcher::vector_length_in_bytes(this, $src)) {
18256       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
18257       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
18258       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
18259       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
18260       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18261       default: ShouldNotReachHere();
18262     }
18263   %}
18264   ins_pipe( pipe_slow );
18265 %}
18266 
18267 // ---------------------------------------- Gather ------------------------------------
18268 
18269 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18270 
18271 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18272   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18273             Matcher::vector_length_in_bytes(n) <= 32);
18274   match(Set dst (LoadVectorGather mem idx));
18275   effect(TEMP dst, TEMP tmp, TEMP mask);
18276   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18277   ins_encode %{
18278     int vlen_enc = vector_length_encoding(this);
18279     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18280     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18281     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18282     __ lea($tmp$$Register, $mem$$Address);
18283     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18284   %}
18285   ins_pipe( pipe_slow );
18286 %}
18287 
18288 
18289 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18290   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18291             !is_subword_type(Matcher::vector_element_basic_type(n)));
18292   match(Set dst (LoadVectorGather mem idx));
18293   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18294   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18295   ins_encode %{
18296     int vlen_enc = vector_length_encoding(this);
18297     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18298     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18299     __ lea($tmp$$Register, $mem$$Address);
18300     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18301   %}
18302   ins_pipe( pipe_slow );
18303 %}
18304 
18305 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18306   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18307             !is_subword_type(Matcher::vector_element_basic_type(n)));
18308   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18309   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18310   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18311   ins_encode %{
18312     assert(UseAVX > 2, "sanity");
18313     int vlen_enc = vector_length_encoding(this);
18314     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18315     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18316     // Note: Since gather instruction partially updates the opmask register used
18317     // for predication hense moving mask operand to a temporary.
18318     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18319     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18320     __ lea($tmp$$Register, $mem$$Address);
18321     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18322   %}
18323   ins_pipe( pipe_slow );
18324 %}
18325 
18326 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18327   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18328   match(Set dst (LoadVectorGather mem idx_base));
18329   effect(TEMP tmp, TEMP rtmp);
18330   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18331   ins_encode %{
18332     int vlen_enc = vector_length_encoding(this);
18333     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18334     __ lea($tmp$$Register, $mem$$Address);
18335     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18336   %}
18337   ins_pipe( pipe_slow );
18338 %}
18339 
18340 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18341                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18342   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18343   match(Set dst (LoadVectorGather mem idx_base));
18344   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18345   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18346   ins_encode %{
18347     int vlen_enc = vector_length_encoding(this);
18348     int vector_len = Matcher::vector_length(this);
18349     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18350     __ lea($tmp$$Register, $mem$$Address);
18351     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18352     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18353                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18354   %}
18355   ins_pipe( pipe_slow );
18356 %}
18357 
18358 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18359   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18360   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18361   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18362   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18363   ins_encode %{
18364     int vlen_enc = vector_length_encoding(this);
18365     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18366     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18367     __ lea($tmp$$Register, $mem$$Address);
18368     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18369     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18370   %}
18371   ins_pipe( pipe_slow );
18372 %}
18373 
18374 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18375                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18376   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18377   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18378   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18379   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18380   ins_encode %{
18381     int vlen_enc = vector_length_encoding(this);
18382     int vector_len = Matcher::vector_length(this);
18383     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18384     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18385     __ lea($tmp$$Register, $mem$$Address);
18386     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18387     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18388     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18389                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18390   %}
18391   ins_pipe( pipe_slow );
18392 %}
18393 
18394 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18395   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18396   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18397   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18398   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18399   ins_encode %{
18400     int vlen_enc = vector_length_encoding(this);
18401     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18402     __ lea($tmp$$Register, $mem$$Address);
18403     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18404     if (elem_bt == T_SHORT) {
18405       __ movl($mask_idx$$Register, 0x55555555);
18406       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18407     }
18408     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18409     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18410   %}
18411   ins_pipe( pipe_slow );
18412 %}
18413 
18414 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18415                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18416   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18417   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18418   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18419   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18420   ins_encode %{
18421     int vlen_enc = vector_length_encoding(this);
18422     int vector_len = Matcher::vector_length(this);
18423     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18424     __ lea($tmp$$Register, $mem$$Address);
18425     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18426     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18427     if (elem_bt == T_SHORT) {
18428       __ movl($mask_idx$$Register, 0x55555555);
18429       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18430     }
18431     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18432     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18433                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18434   %}
18435   ins_pipe( pipe_slow );
18436 %}
18437 
18438 // ====================Scatter=======================================
18439 
18440 // Scatter INT, LONG, FLOAT, DOUBLE
18441 
18442 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18443   predicate(UseAVX > 2);
18444   match(Set mem (StoreVectorScatter mem (Binary src idx)));
18445   effect(TEMP tmp, TEMP ktmp);
18446   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18447   ins_encode %{
18448     int vlen_enc = vector_length_encoding(this, $src);
18449     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18450 
18451     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18452     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18453 
18454     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18455     __ lea($tmp$$Register, $mem$$Address);
18456     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18457   %}
18458   ins_pipe( pipe_slow );
18459 %}
18460 
18461 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18462   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18463   effect(TEMP tmp, TEMP ktmp);
18464   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18465   ins_encode %{
18466     int vlen_enc = vector_length_encoding(this, $src);
18467     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18468     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18469     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18470     // Note: Since scatter instruction partially updates the opmask register used
18471     // for predication hense moving mask operand to a temporary.
18472     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18473     __ lea($tmp$$Register, $mem$$Address);
18474     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18475   %}
18476   ins_pipe( pipe_slow );
18477 %}
18478 
18479 // ====================REPLICATE=======================================
18480 
18481 // Replicate byte scalar to be vector
18482 instruct vReplB_reg(vec dst, rRegI src) %{
18483   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18484   match(Set dst (Replicate src));
18485   format %{ "replicateB $dst,$src" %}
18486   ins_encode %{
18487     uint vlen = Matcher::vector_length(this);
18488     if (UseAVX >= 2) {
18489       int vlen_enc = vector_length_encoding(this);
18490       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18491         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18492         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18493       } else {
18494         __ movdl($dst$$XMMRegister, $src$$Register);
18495         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18496       }
18497     } else {
18498        assert(UseAVX < 2, "");
18499       __ movdl($dst$$XMMRegister, $src$$Register);
18500       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18501       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18502       if (vlen >= 16) {
18503         assert(vlen == 16, "");
18504         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18505       }
18506     }
18507   %}
18508   ins_pipe( pipe_slow );
18509 %}
18510 
18511 instruct ReplB_mem(vec dst, memory mem) %{
18512   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18513   match(Set dst (Replicate (LoadB mem)));
18514   format %{ "replicateB $dst,$mem" %}
18515   ins_encode %{
18516     int vlen_enc = vector_length_encoding(this);
18517     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18518   %}
18519   ins_pipe( pipe_slow );
18520 %}
18521 
18522 // ====================ReplicateS=======================================
18523 
18524 instruct vReplS_reg(vec dst, rRegI src) %{
18525   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18526   match(Set dst (Replicate src));
18527   format %{ "replicateS $dst,$src" %}
18528   ins_encode %{
18529     uint vlen = Matcher::vector_length(this);
18530     int vlen_enc = vector_length_encoding(this);
18531     if (UseAVX >= 2) {
18532       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18533         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18534         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18535       } else {
18536         __ movdl($dst$$XMMRegister, $src$$Register);
18537         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18538       }
18539     } else {
18540       assert(UseAVX < 2, "");
18541       __ movdl($dst$$XMMRegister, $src$$Register);
18542       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18543       if (vlen >= 8) {
18544         assert(vlen == 8, "");
18545         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18546       }
18547     }
18548   %}
18549   ins_pipe( pipe_slow );
18550 %}
18551 
18552 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18553   match(Set dst (Replicate con));
18554   effect(TEMP rtmp);
18555   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18556   ins_encode %{
18557     int vlen_enc = vector_length_encoding(this);
18558     BasicType bt = Matcher::vector_element_basic_type(this);
18559     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18560     __ movl($rtmp$$Register, $con$$constant);
18561     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18562   %}
18563   ins_pipe( pipe_slow );
18564 %}
18565 
18566 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18567   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18568   match(Set dst (Replicate src));
18569   effect(TEMP rtmp);
18570   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18571   ins_encode %{
18572     int vlen_enc = vector_length_encoding(this);
18573     __ vmovw($rtmp$$Register, $src$$XMMRegister);
18574     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18575   %}
18576   ins_pipe( pipe_slow );
18577 %}
18578 
18579 instruct ReplS_mem(vec dst, memory mem) %{
18580   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18581   match(Set dst (Replicate (LoadS mem)));
18582   format %{ "replicateS $dst,$mem" %}
18583   ins_encode %{
18584     int vlen_enc = vector_length_encoding(this);
18585     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18586   %}
18587   ins_pipe( pipe_slow );
18588 %}
18589 
18590 // ====================ReplicateI=======================================
18591 
18592 instruct ReplI_reg(vec dst, rRegI src) %{
18593   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18594   match(Set dst (Replicate src));
18595   format %{ "replicateI $dst,$src" %}
18596   ins_encode %{
18597     uint vlen = Matcher::vector_length(this);
18598     int vlen_enc = vector_length_encoding(this);
18599     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18600       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18601     } else if (VM_Version::supports_avx2()) {
18602       __ movdl($dst$$XMMRegister, $src$$Register);
18603       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18604     } else {
18605       __ movdl($dst$$XMMRegister, $src$$Register);
18606       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18607     }
18608   %}
18609   ins_pipe( pipe_slow );
18610 %}
18611 
18612 instruct ReplI_mem(vec dst, memory mem) %{
18613   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18614   match(Set dst (Replicate (LoadI mem)));
18615   format %{ "replicateI $dst,$mem" %}
18616   ins_encode %{
18617     int vlen_enc = vector_length_encoding(this);
18618     if (VM_Version::supports_avx2()) {
18619       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18620     } else if (VM_Version::supports_avx()) {
18621       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18622     } else {
18623       __ movdl($dst$$XMMRegister, $mem$$Address);
18624       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18625     }
18626   %}
18627   ins_pipe( pipe_slow );
18628 %}
18629 
18630 instruct ReplI_imm(vec dst, immI con) %{
18631   predicate(Matcher::is_non_long_integral_vector(n));
18632   match(Set dst (Replicate con));
18633   format %{ "replicateI $dst,$con" %}
18634   ins_encode %{
18635     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18636                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18637                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
18638     BasicType bt = Matcher::vector_element_basic_type(this);
18639     int vlen = Matcher::vector_length_in_bytes(this);
18640     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18641   %}
18642   ins_pipe( pipe_slow );
18643 %}
18644 
18645 // Replicate scalar zero to be vector
18646 instruct ReplI_zero(vec dst, immI_0 zero) %{
18647   predicate(Matcher::is_non_long_integral_vector(n));
18648   match(Set dst (Replicate zero));
18649   format %{ "replicateI $dst,$zero" %}
18650   ins_encode %{
18651     int vlen_enc = vector_length_encoding(this);
18652     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18653       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18654     } else {
18655       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18656     }
18657   %}
18658   ins_pipe( fpu_reg_reg );
18659 %}
18660 
18661 instruct ReplI_M1(vec dst, immI_M1 con) %{
18662   predicate(Matcher::is_non_long_integral_vector(n));
18663   match(Set dst (Replicate con));
18664   format %{ "vallones $dst" %}
18665   ins_encode %{
18666     int vector_len = vector_length_encoding(this);
18667     __ vallones($dst$$XMMRegister, vector_len);
18668   %}
18669   ins_pipe( pipe_slow );
18670 %}
18671 
18672 // ====================ReplicateL=======================================
18673 
18674 // Replicate long (8 byte) scalar to be vector
18675 instruct ReplL_reg(vec dst, rRegL src) %{
18676   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18677   match(Set dst (Replicate src));
18678   format %{ "replicateL $dst,$src" %}
18679   ins_encode %{
18680     int vlen = Matcher::vector_length(this);
18681     int vlen_enc = vector_length_encoding(this);
18682     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18683       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18684     } else if (VM_Version::supports_avx2()) {
18685       __ movdq($dst$$XMMRegister, $src$$Register);
18686       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18687     } else {
18688       __ movdq($dst$$XMMRegister, $src$$Register);
18689       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18690     }
18691   %}
18692   ins_pipe( pipe_slow );
18693 %}
18694 
18695 instruct ReplL_mem(vec dst, memory mem) %{
18696   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18697   match(Set dst (Replicate (LoadL mem)));
18698   format %{ "replicateL $dst,$mem" %}
18699   ins_encode %{
18700     int vlen_enc = vector_length_encoding(this);
18701     if (VM_Version::supports_avx2()) {
18702       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18703     } else if (VM_Version::supports_sse3()) {
18704       __ movddup($dst$$XMMRegister, $mem$$Address);
18705     } else {
18706       __ movq($dst$$XMMRegister, $mem$$Address);
18707       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18708     }
18709   %}
18710   ins_pipe( pipe_slow );
18711 %}
18712 
18713 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18714 instruct ReplL_imm(vec dst, immL con) %{
18715   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18716   match(Set dst (Replicate con));
18717   format %{ "replicateL $dst,$con" %}
18718   ins_encode %{
18719     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18720     int vlen = Matcher::vector_length_in_bytes(this);
18721     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18722   %}
18723   ins_pipe( pipe_slow );
18724 %}
18725 
18726 instruct ReplL_zero(vec dst, immL0 zero) %{
18727   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18728   match(Set dst (Replicate zero));
18729   format %{ "replicateL $dst,$zero" %}
18730   ins_encode %{
18731     int vlen_enc = vector_length_encoding(this);
18732     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18733       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18734     } else {
18735       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18736     }
18737   %}
18738   ins_pipe( fpu_reg_reg );
18739 %}
18740 
18741 instruct ReplL_M1(vec dst, immL_M1 con) %{
18742   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18743   match(Set dst (Replicate con));
18744   format %{ "vallones $dst" %}
18745   ins_encode %{
18746     int vector_len = vector_length_encoding(this);
18747     __ vallones($dst$$XMMRegister, vector_len);
18748   %}
18749   ins_pipe( pipe_slow );
18750 %}
18751 
18752 // ====================ReplicateF=======================================
18753 
18754 instruct vReplF_reg(vec dst, vlRegF src) %{
18755   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18756   match(Set dst (Replicate src));
18757   format %{ "replicateF $dst,$src" %}
18758   ins_encode %{
18759     uint vlen = Matcher::vector_length(this);
18760     int vlen_enc = vector_length_encoding(this);
18761     if (vlen <= 4) {
18762       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18763     } else if (VM_Version::supports_avx2()) {
18764       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18765     } else {
18766       assert(vlen == 8, "sanity");
18767       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18768       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18769     }
18770   %}
18771   ins_pipe( pipe_slow );
18772 %}
18773 
18774 instruct ReplF_reg(vec dst, vlRegF src) %{
18775   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18776   match(Set dst (Replicate src));
18777   format %{ "replicateF $dst,$src" %}
18778   ins_encode %{
18779     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
18780   %}
18781   ins_pipe( pipe_slow );
18782 %}
18783 
18784 instruct ReplF_mem(vec dst, memory mem) %{
18785   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18786   match(Set dst (Replicate (LoadF mem)));
18787   format %{ "replicateF $dst,$mem" %}
18788   ins_encode %{
18789     int vlen_enc = vector_length_encoding(this);
18790     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18791   %}
18792   ins_pipe( pipe_slow );
18793 %}
18794 
18795 // Replicate float scalar immediate to be vector by loading from const table.
18796 instruct ReplF_imm(vec dst, immF con) %{
18797   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18798   match(Set dst (Replicate con));
18799   format %{ "replicateF $dst,$con" %}
18800   ins_encode %{
18801     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
18802                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
18803     int vlen = Matcher::vector_length_in_bytes(this);
18804     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
18805   %}
18806   ins_pipe( pipe_slow );
18807 %}
18808 
18809 instruct ReplF_zero(vec dst, immF0 zero) %{
18810   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18811   match(Set dst (Replicate zero));
18812   format %{ "replicateF $dst,$zero" %}
18813   ins_encode %{
18814     int vlen_enc = vector_length_encoding(this);
18815     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18816       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18817     } else {
18818       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18819     }
18820   %}
18821   ins_pipe( fpu_reg_reg );
18822 %}
18823 
18824 // ====================ReplicateD=======================================
18825 
18826 // Replicate double (8 bytes) scalar to be vector
18827 instruct vReplD_reg(vec dst, vlRegD src) %{
18828   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18829   match(Set dst (Replicate src));
18830   format %{ "replicateD $dst,$src" %}
18831   ins_encode %{
18832     uint vlen = Matcher::vector_length(this);
18833     int vlen_enc = vector_length_encoding(this);
18834     if (vlen <= 2) {
18835       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18836     } else if (VM_Version::supports_avx2()) {
18837       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18838     } else {
18839       assert(vlen == 4, "sanity");
18840       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18841       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18842     }
18843   %}
18844   ins_pipe( pipe_slow );
18845 %}
18846 
18847 instruct ReplD_reg(vec dst, vlRegD src) %{
18848   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18849   match(Set dst (Replicate src));
18850   format %{ "replicateD $dst,$src" %}
18851   ins_encode %{
18852     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
18853   %}
18854   ins_pipe( pipe_slow );
18855 %}
18856 
18857 instruct ReplD_mem(vec dst, memory mem) %{
18858   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18859   match(Set dst (Replicate (LoadD mem)));
18860   format %{ "replicateD $dst,$mem" %}
18861   ins_encode %{
18862     if (Matcher::vector_length(this) >= 4) {
18863       int vlen_enc = vector_length_encoding(this);
18864       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18865     } else {
18866       __ movddup($dst$$XMMRegister, $mem$$Address);
18867     }
18868   %}
18869   ins_pipe( pipe_slow );
18870 %}
18871 
18872 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
18873 instruct ReplD_imm(vec dst, immD con) %{
18874   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18875   match(Set dst (Replicate con));
18876   format %{ "replicateD $dst,$con" %}
18877   ins_encode %{
18878     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18879     int vlen = Matcher::vector_length_in_bytes(this);
18880     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
18881   %}
18882   ins_pipe( pipe_slow );
18883 %}
18884 
18885 instruct ReplD_zero(vec dst, immD0 zero) %{
18886   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18887   match(Set dst (Replicate zero));
18888   format %{ "replicateD $dst,$zero" %}
18889   ins_encode %{
18890     int vlen_enc = vector_length_encoding(this);
18891     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18892       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18893     } else {
18894       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18895     }
18896   %}
18897   ins_pipe( fpu_reg_reg );
18898 %}
18899 
18900 // ====================VECTOR INSERT=======================================
18901 
18902 instruct insert(vec dst, rRegI val, immU8 idx) %{
18903   predicate(Matcher::vector_length_in_bytes(n) < 32);
18904   match(Set dst (VectorInsert (Binary dst val) idx));
18905   format %{ "vector_insert $dst,$val,$idx" %}
18906   ins_encode %{
18907     assert(UseSSE >= 4, "required");
18908     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
18909 
18910     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18911 
18912     assert(is_integral_type(elem_bt), "");
18913     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18914 
18915     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
18916   %}
18917   ins_pipe( pipe_slow );
18918 %}
18919 
18920 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
18921   predicate(Matcher::vector_length_in_bytes(n) == 32);
18922   match(Set dst (VectorInsert (Binary src val) idx));
18923   effect(TEMP vtmp);
18924   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18925   ins_encode %{
18926     int vlen_enc = Assembler::AVX_256bit;
18927     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18928     int elem_per_lane = 16/type2aelembytes(elem_bt);
18929     int log2epr = log2(elem_per_lane);
18930 
18931     assert(is_integral_type(elem_bt), "sanity");
18932     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18933 
18934     uint x_idx = $idx$$constant & right_n_bits(log2epr);
18935     uint y_idx = ($idx$$constant >> log2epr) & 1;
18936     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18937     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18938     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18939   %}
18940   ins_pipe( pipe_slow );
18941 %}
18942 
18943 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
18944   predicate(Matcher::vector_length_in_bytes(n) == 64);
18945   match(Set dst (VectorInsert (Binary src val) idx));
18946   effect(TEMP vtmp);
18947   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18948   ins_encode %{
18949     assert(UseAVX > 2, "sanity");
18950 
18951     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18952     int elem_per_lane = 16/type2aelembytes(elem_bt);
18953     int log2epr = log2(elem_per_lane);
18954 
18955     assert(is_integral_type(elem_bt), "");
18956     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18957 
18958     uint x_idx = $idx$$constant & right_n_bits(log2epr);
18959     uint y_idx = ($idx$$constant >> log2epr) & 3;
18960     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18961     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18962     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18963   %}
18964   ins_pipe( pipe_slow );
18965 %}
18966 
18967 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
18968   predicate(Matcher::vector_length(n) == 2);
18969   match(Set dst (VectorInsert (Binary dst val) idx));
18970   format %{ "vector_insert $dst,$val,$idx" %}
18971   ins_encode %{
18972     assert(UseSSE >= 4, "required");
18973     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
18974     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18975 
18976     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
18977   %}
18978   ins_pipe( pipe_slow );
18979 %}
18980 
18981 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
18982   predicate(Matcher::vector_length(n) == 4);
18983   match(Set dst (VectorInsert (Binary src val) idx));
18984   effect(TEMP vtmp);
18985   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18986   ins_encode %{
18987     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
18988     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18989 
18990     uint x_idx = $idx$$constant & right_n_bits(1);
18991     uint y_idx = ($idx$$constant >> 1) & 1;
18992     int vlen_enc = Assembler::AVX_256bit;
18993     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18994     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18995     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18996   %}
18997   ins_pipe( pipe_slow );
18998 %}
18999 
19000 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19001   predicate(Matcher::vector_length(n) == 8);
19002   match(Set dst (VectorInsert (Binary src val) idx));
19003   effect(TEMP vtmp);
19004   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19005   ins_encode %{
19006     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19007     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19008 
19009     uint x_idx = $idx$$constant & right_n_bits(1);
19010     uint y_idx = ($idx$$constant >> 1) & 3;
19011     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19012     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19013     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19014   %}
19015   ins_pipe( pipe_slow );
19016 %}
19017 
19018 instruct insertF(vec dst, regF val, immU8 idx) %{
19019   predicate(Matcher::vector_length(n) < 8);
19020   match(Set dst (VectorInsert (Binary dst val) idx));
19021   format %{ "vector_insert $dst,$val,$idx" %}
19022   ins_encode %{
19023     assert(UseSSE >= 4, "sanity");
19024 
19025     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19026     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19027 
19028     uint x_idx = $idx$$constant & right_n_bits(2);
19029     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19030   %}
19031   ins_pipe( pipe_slow );
19032 %}
19033 
19034 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19035   predicate(Matcher::vector_length(n) >= 8);
19036   match(Set dst (VectorInsert (Binary src val) idx));
19037   effect(TEMP vtmp);
19038   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19039   ins_encode %{
19040     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19041     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19042 
19043     int vlen = Matcher::vector_length(this);
19044     uint x_idx = $idx$$constant & right_n_bits(2);
19045     if (vlen == 8) {
19046       uint y_idx = ($idx$$constant >> 2) & 1;
19047       int vlen_enc = Assembler::AVX_256bit;
19048       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19049       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19050       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19051     } else {
19052       assert(vlen == 16, "sanity");
19053       uint y_idx = ($idx$$constant >> 2) & 3;
19054       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19055       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19056       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19057     }
19058   %}
19059   ins_pipe( pipe_slow );
19060 %}
19061 
19062 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19063   predicate(Matcher::vector_length(n) == 2);
19064   match(Set dst (VectorInsert (Binary dst val) idx));
19065   effect(TEMP tmp);
19066   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19067   ins_encode %{
19068     assert(UseSSE >= 4, "sanity");
19069     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19070     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19071 
19072     __ movq($tmp$$Register, $val$$XMMRegister);
19073     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19074   %}
19075   ins_pipe( pipe_slow );
19076 %}
19077 
19078 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19079   predicate(Matcher::vector_length(n) == 4);
19080   match(Set dst (VectorInsert (Binary src val) idx));
19081   effect(TEMP vtmp, TEMP tmp);
19082   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19083   ins_encode %{
19084     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19085     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19086 
19087     uint x_idx = $idx$$constant & right_n_bits(1);
19088     uint y_idx = ($idx$$constant >> 1) & 1;
19089     int vlen_enc = Assembler::AVX_256bit;
19090     __ movq($tmp$$Register, $val$$XMMRegister);
19091     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19092     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19093     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19094   %}
19095   ins_pipe( pipe_slow );
19096 %}
19097 
19098 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19099   predicate(Matcher::vector_length(n) == 8);
19100   match(Set dst (VectorInsert (Binary src val) idx));
19101   effect(TEMP tmp, TEMP vtmp);
19102   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19103   ins_encode %{
19104     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19105     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19106 
19107     uint x_idx = $idx$$constant & right_n_bits(1);
19108     uint y_idx = ($idx$$constant >> 1) & 3;
19109     __ movq($tmp$$Register, $val$$XMMRegister);
19110     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19111     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19112     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19113   %}
19114   ins_pipe( pipe_slow );
19115 %}
19116 
19117 // ====================REDUCTION ARITHMETIC=======================================
19118 
19119 // =======================Int Reduction==========================================
19120 
19121 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19122   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19123   match(Set dst (AddReductionVI src1 src2));
19124   match(Set dst (MulReductionVI src1 src2));
19125   match(Set dst (AndReductionV  src1 src2));
19126   match(Set dst ( OrReductionV  src1 src2));
19127   match(Set dst (XorReductionV  src1 src2));
19128   match(Set dst (MinReductionV  src1 src2));
19129   match(Set dst (MaxReductionV  src1 src2));
19130   effect(TEMP vtmp1, TEMP vtmp2);
19131   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19132   ins_encode %{
19133     int opcode = this->ideal_Opcode();
19134     int vlen = Matcher::vector_length(this, $src2);
19135     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19136   %}
19137   ins_pipe( pipe_slow );
19138 %}
19139 
19140 // =======================Long Reduction==========================================
19141 
19142 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19143   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19144   match(Set dst (AddReductionVL src1 src2));
19145   match(Set dst (MulReductionVL src1 src2));
19146   match(Set dst (AndReductionV  src1 src2));
19147   match(Set dst ( OrReductionV  src1 src2));
19148   match(Set dst (XorReductionV  src1 src2));
19149   match(Set dst (MinReductionV  src1 src2));
19150   match(Set dst (MaxReductionV  src1 src2));
19151   effect(TEMP vtmp1, TEMP vtmp2);
19152   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19153   ins_encode %{
19154     int opcode = this->ideal_Opcode();
19155     int vlen = Matcher::vector_length(this, $src2);
19156     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19157   %}
19158   ins_pipe( pipe_slow );
19159 %}
19160 
19161 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19162   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19163   match(Set dst (AddReductionVL src1 src2));
19164   match(Set dst (MulReductionVL src1 src2));
19165   match(Set dst (AndReductionV  src1 src2));
19166   match(Set dst ( OrReductionV  src1 src2));
19167   match(Set dst (XorReductionV  src1 src2));
19168   match(Set dst (MinReductionV  src1 src2));
19169   match(Set dst (MaxReductionV  src1 src2));
19170   effect(TEMP vtmp1, TEMP vtmp2);
19171   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19172   ins_encode %{
19173     int opcode = this->ideal_Opcode();
19174     int vlen = Matcher::vector_length(this, $src2);
19175     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19176   %}
19177   ins_pipe( pipe_slow );
19178 %}
19179 
19180 // =======================Float Reduction==========================================
19181 
19182 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19183   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19184   match(Set dst (AddReductionVF dst src));
19185   match(Set dst (MulReductionVF dst src));
19186   effect(TEMP dst, TEMP vtmp);
19187   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
19188   ins_encode %{
19189     int opcode = this->ideal_Opcode();
19190     int vlen = Matcher::vector_length(this, $src);
19191     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19192   %}
19193   ins_pipe( pipe_slow );
19194 %}
19195 
19196 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19197   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19198   match(Set dst (AddReductionVF dst src));
19199   match(Set dst (MulReductionVF dst src));
19200   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19201   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19202   ins_encode %{
19203     int opcode = this->ideal_Opcode();
19204     int vlen = Matcher::vector_length(this, $src);
19205     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19206   %}
19207   ins_pipe( pipe_slow );
19208 %}
19209 
19210 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19211   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19212   match(Set dst (AddReductionVF dst src));
19213   match(Set dst (MulReductionVF dst src));
19214   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19215   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19216   ins_encode %{
19217     int opcode = this->ideal_Opcode();
19218     int vlen = Matcher::vector_length(this, $src);
19219     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19220   %}
19221   ins_pipe( pipe_slow );
19222 %}
19223 
19224 
19225 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19226   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19227   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19228   // src1 contains reduction identity
19229   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19230   match(Set dst (AddReductionVF src1 src2));
19231   match(Set dst (MulReductionVF src1 src2));
19232   effect(TEMP dst);
19233   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
19234   ins_encode %{
19235     int opcode = this->ideal_Opcode();
19236     int vlen = Matcher::vector_length(this, $src2);
19237     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19238   %}
19239   ins_pipe( pipe_slow );
19240 %}
19241 
19242 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19243   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19244   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19245   // src1 contains reduction identity
19246   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19247   match(Set dst (AddReductionVF src1 src2));
19248   match(Set dst (MulReductionVF src1 src2));
19249   effect(TEMP dst, TEMP vtmp);
19250   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19251   ins_encode %{
19252     int opcode = this->ideal_Opcode();
19253     int vlen = Matcher::vector_length(this, $src2);
19254     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19255   %}
19256   ins_pipe( pipe_slow );
19257 %}
19258 
19259 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19260   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19261   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19262   // src1 contains reduction identity
19263   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19264   match(Set dst (AddReductionVF src1 src2));
19265   match(Set dst (MulReductionVF src1 src2));
19266   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19267   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19268   ins_encode %{
19269     int opcode = this->ideal_Opcode();
19270     int vlen = Matcher::vector_length(this, $src2);
19271     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19272   %}
19273   ins_pipe( pipe_slow );
19274 %}
19275 
19276 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19277   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19278   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19279   // src1 contains reduction identity
19280   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19281   match(Set dst (AddReductionVF src1 src2));
19282   match(Set dst (MulReductionVF src1 src2));
19283   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19284   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19285   ins_encode %{
19286     int opcode = this->ideal_Opcode();
19287     int vlen = Matcher::vector_length(this, $src2);
19288     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19289   %}
19290   ins_pipe( pipe_slow );
19291 %}
19292 
19293 // =======================Double Reduction==========================================
19294 
19295 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19296   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19297   match(Set dst (AddReductionVD dst src));
19298   match(Set dst (MulReductionVD dst src));
19299   effect(TEMP dst, TEMP vtmp);
19300   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19301   ins_encode %{
19302     int opcode = this->ideal_Opcode();
19303     int vlen = Matcher::vector_length(this, $src);
19304     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19305 %}
19306   ins_pipe( pipe_slow );
19307 %}
19308 
19309 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19310   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19311   match(Set dst (AddReductionVD dst src));
19312   match(Set dst (MulReductionVD dst src));
19313   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19314   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19315   ins_encode %{
19316     int opcode = this->ideal_Opcode();
19317     int vlen = Matcher::vector_length(this, $src);
19318     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19319   %}
19320   ins_pipe( pipe_slow );
19321 %}
19322 
19323 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19324   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19325   match(Set dst (AddReductionVD dst src));
19326   match(Set dst (MulReductionVD dst src));
19327   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19328   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19329   ins_encode %{
19330     int opcode = this->ideal_Opcode();
19331     int vlen = Matcher::vector_length(this, $src);
19332     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19333   %}
19334   ins_pipe( pipe_slow );
19335 %}
19336 
19337 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19338   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19339   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19340   // src1 contains reduction identity
19341   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19342   match(Set dst (AddReductionVD src1 src2));
19343   match(Set dst (MulReductionVD src1 src2));
19344   effect(TEMP dst);
19345   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19346   ins_encode %{
19347     int opcode = this->ideal_Opcode();
19348     int vlen = Matcher::vector_length(this, $src2);
19349     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19350 %}
19351   ins_pipe( pipe_slow );
19352 %}
19353 
19354 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19355   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19356   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19357   // src1 contains reduction identity
19358   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19359   match(Set dst (AddReductionVD src1 src2));
19360   match(Set dst (MulReductionVD src1 src2));
19361   effect(TEMP dst, TEMP vtmp);
19362   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19363   ins_encode %{
19364     int opcode = this->ideal_Opcode();
19365     int vlen = Matcher::vector_length(this, $src2);
19366     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19367   %}
19368   ins_pipe( pipe_slow );
19369 %}
19370 
19371 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19372   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19373   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19374   // src1 contains reduction identity
19375   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19376   match(Set dst (AddReductionVD src1 src2));
19377   match(Set dst (MulReductionVD src1 src2));
19378   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19379   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19380   ins_encode %{
19381     int opcode = this->ideal_Opcode();
19382     int vlen = Matcher::vector_length(this, $src2);
19383     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19384   %}
19385   ins_pipe( pipe_slow );
19386 %}
19387 
19388 // =======================Byte Reduction==========================================
19389 
19390 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19391   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19392   match(Set dst (AddReductionVI src1 src2));
19393   match(Set dst (AndReductionV  src1 src2));
19394   match(Set dst ( OrReductionV  src1 src2));
19395   match(Set dst (XorReductionV  src1 src2));
19396   match(Set dst (MinReductionV  src1 src2));
19397   match(Set dst (MaxReductionV  src1 src2));
19398   effect(TEMP vtmp1, TEMP vtmp2);
19399   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19400   ins_encode %{
19401     int opcode = this->ideal_Opcode();
19402     int vlen = Matcher::vector_length(this, $src2);
19403     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19404   %}
19405   ins_pipe( pipe_slow );
19406 %}
19407 
19408 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19409   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19410   match(Set dst (AddReductionVI src1 src2));
19411   match(Set dst (AndReductionV  src1 src2));
19412   match(Set dst ( OrReductionV  src1 src2));
19413   match(Set dst (XorReductionV  src1 src2));
19414   match(Set dst (MinReductionV  src1 src2));
19415   match(Set dst (MaxReductionV  src1 src2));
19416   effect(TEMP vtmp1, TEMP vtmp2);
19417   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19418   ins_encode %{
19419     int opcode = this->ideal_Opcode();
19420     int vlen = Matcher::vector_length(this, $src2);
19421     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19422   %}
19423   ins_pipe( pipe_slow );
19424 %}
19425 
19426 // =======================Short Reduction==========================================
19427 
19428 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19429   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19430   match(Set dst (AddReductionVI src1 src2));
19431   match(Set dst (MulReductionVI src1 src2));
19432   match(Set dst (AndReductionV  src1 src2));
19433   match(Set dst ( OrReductionV  src1 src2));
19434   match(Set dst (XorReductionV  src1 src2));
19435   match(Set dst (MinReductionV  src1 src2));
19436   match(Set dst (MaxReductionV  src1 src2));
19437   effect(TEMP vtmp1, TEMP vtmp2);
19438   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19439   ins_encode %{
19440     int opcode = this->ideal_Opcode();
19441     int vlen = Matcher::vector_length(this, $src2);
19442     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19443   %}
19444   ins_pipe( pipe_slow );
19445 %}
19446 
19447 // =======================Mul Reduction==========================================
19448 
19449 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19450   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19451             Matcher::vector_length(n->in(2)) <= 32); // src2
19452   match(Set dst (MulReductionVI src1 src2));
19453   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19454   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19455   ins_encode %{
19456     int opcode = this->ideal_Opcode();
19457     int vlen = Matcher::vector_length(this, $src2);
19458     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19459   %}
19460   ins_pipe( pipe_slow );
19461 %}
19462 
19463 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19464   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19465             Matcher::vector_length(n->in(2)) == 64); // src2
19466   match(Set dst (MulReductionVI src1 src2));
19467   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19468   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19469   ins_encode %{
19470     int opcode = this->ideal_Opcode();
19471     int vlen = Matcher::vector_length(this, $src2);
19472     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19473   %}
19474   ins_pipe( pipe_slow );
19475 %}
19476 
19477 //--------------------Min/Max Float Reduction --------------------
19478 // Float Min Reduction
19479 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19480                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19481   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19482             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19483              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19484             Matcher::vector_length(n->in(2)) == 2);
19485   match(Set dst (MinReductionV src1 src2));
19486   match(Set dst (MaxReductionV src1 src2));
19487   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19488   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19489   ins_encode %{
19490     assert(UseAVX > 0, "sanity");
19491 
19492     int opcode = this->ideal_Opcode();
19493     int vlen = Matcher::vector_length(this, $src2);
19494     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19495                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19496   %}
19497   ins_pipe( pipe_slow );
19498 %}
19499 
19500 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19501                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19502   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19503             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19504              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19505             Matcher::vector_length(n->in(2)) >= 4);
19506   match(Set dst (MinReductionV src1 src2));
19507   match(Set dst (MaxReductionV src1 src2));
19508   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19509   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19510   ins_encode %{
19511     assert(UseAVX > 0, "sanity");
19512 
19513     int opcode = this->ideal_Opcode();
19514     int vlen = Matcher::vector_length(this, $src2);
19515     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19516                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19517   %}
19518   ins_pipe( pipe_slow );
19519 %}
19520 
19521 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19522                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19523   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19524             Matcher::vector_length(n->in(2)) == 2);
19525   match(Set dst (MinReductionV dst src));
19526   match(Set dst (MaxReductionV dst src));
19527   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19528   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19529   ins_encode %{
19530     assert(UseAVX > 0, "sanity");
19531 
19532     int opcode = this->ideal_Opcode();
19533     int vlen = Matcher::vector_length(this, $src);
19534     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19535                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19536   %}
19537   ins_pipe( pipe_slow );
19538 %}
19539 
19540 
19541 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19542                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19543   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19544             Matcher::vector_length(n->in(2)) >= 4);
19545   match(Set dst (MinReductionV dst src));
19546   match(Set dst (MaxReductionV dst src));
19547   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19548   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19549   ins_encode %{
19550     assert(UseAVX > 0, "sanity");
19551 
19552     int opcode = this->ideal_Opcode();
19553     int vlen = Matcher::vector_length(this, $src);
19554     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19555                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19556   %}
19557   ins_pipe( pipe_slow );
19558 %}
19559 
19560 instruct minmax_reduction2F_avx10(regF dst, immF src1, vec src2, vec xtmp1) %{
19561   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19562             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19563              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19564             Matcher::vector_length(n->in(2)) == 2);
19565   match(Set dst (MinReductionV src1 src2));
19566   match(Set dst (MaxReductionV src1 src2));
19567   effect(TEMP dst, TEMP xtmp1);
19568   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19569   ins_encode %{
19570     int opcode = this->ideal_Opcode();
19571     int vlen = Matcher::vector_length(this, $src2);
19572     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19573                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19574   %}
19575   ins_pipe( pipe_slow );
19576 %}
19577 
19578 instruct minmax_reductionF_avx10(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19579   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19580             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19581              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19582             Matcher::vector_length(n->in(2)) >= 4);
19583   match(Set dst (MinReductionV src1 src2));
19584   match(Set dst (MaxReductionV src1 src2));
19585   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19586   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19587   ins_encode %{
19588     int opcode = this->ideal_Opcode();
19589     int vlen = Matcher::vector_length(this, $src2);
19590     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19591                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19592   %}
19593   ins_pipe( pipe_slow );
19594 %}
19595 
19596 instruct minmax_reduction2F_avx10_av(regF dst, vec src, vec xtmp1) %{
19597   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19598             Matcher::vector_length(n->in(2)) == 2);
19599   match(Set dst (MinReductionV dst src));
19600   match(Set dst (MaxReductionV dst src));
19601   effect(TEMP dst, TEMP xtmp1);
19602   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19603   ins_encode %{
19604     int opcode = this->ideal_Opcode();
19605     int vlen = Matcher::vector_length(this, $src);
19606     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19607                          $xtmp1$$XMMRegister);
19608   %}
19609   ins_pipe( pipe_slow );
19610 %}
19611 
19612 instruct minmax_reductionF_avx10_av(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19613   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19614             Matcher::vector_length(n->in(2)) >= 4);
19615   match(Set dst (MinReductionV dst src));
19616   match(Set dst (MaxReductionV dst src));
19617   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19618   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19619   ins_encode %{
19620     int opcode = this->ideal_Opcode();
19621     int vlen = Matcher::vector_length(this, $src);
19622     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19623                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19624   %}
19625   ins_pipe( pipe_slow );
19626 %}
19627 
19628 //--------------------Min Double Reduction --------------------
19629 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19630                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19631   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19632             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19633              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19634             Matcher::vector_length(n->in(2)) == 2);
19635   match(Set dst (MinReductionV src1 src2));
19636   match(Set dst (MaxReductionV src1 src2));
19637   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19638   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19639   ins_encode %{
19640     assert(UseAVX > 0, "sanity");
19641 
19642     int opcode = this->ideal_Opcode();
19643     int vlen = Matcher::vector_length(this, $src2);
19644     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19645                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19646   %}
19647   ins_pipe( pipe_slow );
19648 %}
19649 
19650 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19651                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19652   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19653             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19654              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19655             Matcher::vector_length(n->in(2)) >= 4);
19656   match(Set dst (MinReductionV src1 src2));
19657   match(Set dst (MaxReductionV src1 src2));
19658   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19659   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19660   ins_encode %{
19661     assert(UseAVX > 0, "sanity");
19662 
19663     int opcode = this->ideal_Opcode();
19664     int vlen = Matcher::vector_length(this, $src2);
19665     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19666                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19667   %}
19668   ins_pipe( pipe_slow );
19669 %}
19670 
19671 
19672 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19673                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19674   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19675             Matcher::vector_length(n->in(2)) == 2);
19676   match(Set dst (MinReductionV dst src));
19677   match(Set dst (MaxReductionV dst src));
19678   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19679   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19680   ins_encode %{
19681     assert(UseAVX > 0, "sanity");
19682 
19683     int opcode = this->ideal_Opcode();
19684     int vlen = Matcher::vector_length(this, $src);
19685     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19686                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19687   %}
19688   ins_pipe( pipe_slow );
19689 %}
19690 
19691 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19692                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19693   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19694             Matcher::vector_length(n->in(2)) >= 4);
19695   match(Set dst (MinReductionV dst src));
19696   match(Set dst (MaxReductionV dst src));
19697   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19698   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19699   ins_encode %{
19700     assert(UseAVX > 0, "sanity");
19701 
19702     int opcode = this->ideal_Opcode();
19703     int vlen = Matcher::vector_length(this, $src);
19704     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19705                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19706   %}
19707   ins_pipe( pipe_slow );
19708 %}
19709 
19710 instruct minmax_reduction2D_avx10(regD dst, immD src1, vec src2, vec xtmp1) %{
19711   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19712             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19713              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19714             Matcher::vector_length(n->in(2)) == 2);
19715   match(Set dst (MinReductionV src1 src2));
19716   match(Set dst (MaxReductionV src1 src2));
19717   effect(TEMP dst, TEMP xtmp1);
19718   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19719   ins_encode %{
19720     int opcode = this->ideal_Opcode();
19721     int vlen = Matcher::vector_length(this, $src2);
19722     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
19723                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
19724   %}
19725   ins_pipe( pipe_slow );
19726 %}
19727 
19728 instruct minmax_reductionD_avx10(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
19729   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19730             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19731              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19732             Matcher::vector_length(n->in(2)) >= 4);
19733   match(Set dst (MinReductionV src1 src2));
19734   match(Set dst (MaxReductionV src1 src2));
19735   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19736   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
19737   ins_encode %{
19738     int opcode = this->ideal_Opcode();
19739     int vlen = Matcher::vector_length(this, $src2);
19740     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19741                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19742   %}
19743   ins_pipe( pipe_slow );
19744 %}
19745 
19746 
19747 instruct minmax_reduction2D_av_avx10(regD dst, vec src, vec xtmp1) %{
19748   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19749             Matcher::vector_length(n->in(2)) == 2);
19750   match(Set dst (MinReductionV dst src));
19751   match(Set dst (MaxReductionV dst src));
19752   effect(TEMP dst, TEMP xtmp1);
19753   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
19754   ins_encode %{
19755     int opcode = this->ideal_Opcode();
19756     int vlen = Matcher::vector_length(this, $src);
19757     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19758                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19759   %}
19760   ins_pipe( pipe_slow );
19761 %}
19762 
19763 instruct minmax_reductionD_av_avx10(regD dst, vec src, vec xtmp1, vec xtmp2) %{
19764   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19765             Matcher::vector_length(n->in(2)) >= 4);
19766   match(Set dst (MinReductionV dst src));
19767   match(Set dst (MaxReductionV dst src));
19768   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19769   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
19770   ins_encode %{
19771     int opcode = this->ideal_Opcode();
19772     int vlen = Matcher::vector_length(this, $src);
19773     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19774                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19775   %}
19776   ins_pipe( pipe_slow );
19777 %}
19778 
19779 // ====================VECTOR ARITHMETIC=======================================
19780 
19781 // --------------------------------- ADD --------------------------------------
19782 
19783 // Bytes vector add
19784 instruct vaddB(vec dst, vec src) %{
19785   predicate(UseAVX == 0);
19786   match(Set dst (AddVB dst src));
19787   format %{ "paddb   $dst,$src\t! add packedB" %}
19788   ins_encode %{
19789     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
19790   %}
19791   ins_pipe( pipe_slow );
19792 %}
19793 
19794 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
19795   predicate(UseAVX > 0);
19796   match(Set dst (AddVB src1 src2));
19797   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
19798   ins_encode %{
19799     int vlen_enc = vector_length_encoding(this);
19800     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19801   %}
19802   ins_pipe( pipe_slow );
19803 %}
19804 
19805 instruct vaddB_mem(vec dst, vec src, memory mem) %{
19806   predicate((UseAVX > 0) &&
19807             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19808   match(Set dst (AddVB src (LoadVector mem)));
19809   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
19810   ins_encode %{
19811     int vlen_enc = vector_length_encoding(this);
19812     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19813   %}
19814   ins_pipe( pipe_slow );
19815 %}
19816 
19817 // Shorts/Chars vector add
19818 instruct vaddS(vec dst, vec src) %{
19819   predicate(UseAVX == 0);
19820   match(Set dst (AddVS dst src));
19821   format %{ "paddw   $dst,$src\t! add packedS" %}
19822   ins_encode %{
19823     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
19824   %}
19825   ins_pipe( pipe_slow );
19826 %}
19827 
19828 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
19829   predicate(UseAVX > 0);
19830   match(Set dst (AddVS src1 src2));
19831   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
19832   ins_encode %{
19833     int vlen_enc = vector_length_encoding(this);
19834     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19835   %}
19836   ins_pipe( pipe_slow );
19837 %}
19838 
19839 instruct vaddS_mem(vec dst, vec src, memory mem) %{
19840   predicate((UseAVX > 0) &&
19841             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19842   match(Set dst (AddVS src (LoadVector mem)));
19843   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
19844   ins_encode %{
19845     int vlen_enc = vector_length_encoding(this);
19846     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19847   %}
19848   ins_pipe( pipe_slow );
19849 %}
19850 
19851 // Integers vector add
19852 instruct vaddI(vec dst, vec src) %{
19853   predicate(UseAVX == 0);
19854   match(Set dst (AddVI dst src));
19855   format %{ "paddd   $dst,$src\t! add packedI" %}
19856   ins_encode %{
19857     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
19858   %}
19859   ins_pipe( pipe_slow );
19860 %}
19861 
19862 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
19863   predicate(UseAVX > 0);
19864   match(Set dst (AddVI src1 src2));
19865   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
19866   ins_encode %{
19867     int vlen_enc = vector_length_encoding(this);
19868     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19869   %}
19870   ins_pipe( pipe_slow );
19871 %}
19872 
19873 
19874 instruct vaddI_mem(vec dst, vec src, memory mem) %{
19875   predicate((UseAVX > 0) &&
19876             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19877   match(Set dst (AddVI src (LoadVector mem)));
19878   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
19879   ins_encode %{
19880     int vlen_enc = vector_length_encoding(this);
19881     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19882   %}
19883   ins_pipe( pipe_slow );
19884 %}
19885 
19886 // Longs vector add
19887 instruct vaddL(vec dst, vec src) %{
19888   predicate(UseAVX == 0);
19889   match(Set dst (AddVL dst src));
19890   format %{ "paddq   $dst,$src\t! add packedL" %}
19891   ins_encode %{
19892     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
19893   %}
19894   ins_pipe( pipe_slow );
19895 %}
19896 
19897 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
19898   predicate(UseAVX > 0);
19899   match(Set dst (AddVL src1 src2));
19900   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
19901   ins_encode %{
19902     int vlen_enc = vector_length_encoding(this);
19903     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19904   %}
19905   ins_pipe( pipe_slow );
19906 %}
19907 
19908 instruct vaddL_mem(vec dst, vec src, memory mem) %{
19909   predicate((UseAVX > 0) &&
19910             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19911   match(Set dst (AddVL src (LoadVector mem)));
19912   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
19913   ins_encode %{
19914     int vlen_enc = vector_length_encoding(this);
19915     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19916   %}
19917   ins_pipe( pipe_slow );
19918 %}
19919 
19920 // Floats vector add
19921 instruct vaddF(vec dst, vec src) %{
19922   predicate(UseAVX == 0);
19923   match(Set dst (AddVF dst src));
19924   format %{ "addps   $dst,$src\t! add packedF" %}
19925   ins_encode %{
19926     __ addps($dst$$XMMRegister, $src$$XMMRegister);
19927   %}
19928   ins_pipe( pipe_slow );
19929 %}
19930 
19931 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
19932   predicate(UseAVX > 0);
19933   match(Set dst (AddVF src1 src2));
19934   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
19935   ins_encode %{
19936     int vlen_enc = vector_length_encoding(this);
19937     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19938   %}
19939   ins_pipe( pipe_slow );
19940 %}
19941 
19942 instruct vaddF_mem(vec dst, vec src, memory mem) %{
19943   predicate((UseAVX > 0) &&
19944             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19945   match(Set dst (AddVF src (LoadVector mem)));
19946   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
19947   ins_encode %{
19948     int vlen_enc = vector_length_encoding(this);
19949     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19950   %}
19951   ins_pipe( pipe_slow );
19952 %}
19953 
19954 // Doubles vector add
19955 instruct vaddD(vec dst, vec src) %{
19956   predicate(UseAVX == 0);
19957   match(Set dst (AddVD dst src));
19958   format %{ "addpd   $dst,$src\t! add packedD" %}
19959   ins_encode %{
19960     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
19961   %}
19962   ins_pipe( pipe_slow );
19963 %}
19964 
19965 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
19966   predicate(UseAVX > 0);
19967   match(Set dst (AddVD src1 src2));
19968   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
19969   ins_encode %{
19970     int vlen_enc = vector_length_encoding(this);
19971     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19972   %}
19973   ins_pipe( pipe_slow );
19974 %}
19975 
19976 instruct vaddD_mem(vec dst, vec src, memory mem) %{
19977   predicate((UseAVX > 0) &&
19978             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19979   match(Set dst (AddVD src (LoadVector mem)));
19980   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
19981   ins_encode %{
19982     int vlen_enc = vector_length_encoding(this);
19983     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19984   %}
19985   ins_pipe( pipe_slow );
19986 %}
19987 
19988 // --------------------------------- SUB --------------------------------------
19989 
19990 // Bytes vector sub
19991 instruct vsubB(vec dst, vec src) %{
19992   predicate(UseAVX == 0);
19993   match(Set dst (SubVB dst src));
19994   format %{ "psubb   $dst,$src\t! sub packedB" %}
19995   ins_encode %{
19996     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
19997   %}
19998   ins_pipe( pipe_slow );
19999 %}
20000 
20001 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20002   predicate(UseAVX > 0);
20003   match(Set dst (SubVB src1 src2));
20004   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
20005   ins_encode %{
20006     int vlen_enc = vector_length_encoding(this);
20007     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20008   %}
20009   ins_pipe( pipe_slow );
20010 %}
20011 
20012 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20013   predicate((UseAVX > 0) &&
20014             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20015   match(Set dst (SubVB src (LoadVector mem)));
20016   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
20017   ins_encode %{
20018     int vlen_enc = vector_length_encoding(this);
20019     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20020   %}
20021   ins_pipe( pipe_slow );
20022 %}
20023 
20024 // Shorts/Chars vector sub
20025 instruct vsubS(vec dst, vec src) %{
20026   predicate(UseAVX == 0);
20027   match(Set dst (SubVS dst src));
20028   format %{ "psubw   $dst,$src\t! sub packedS" %}
20029   ins_encode %{
20030     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20031   %}
20032   ins_pipe( pipe_slow );
20033 %}
20034 
20035 
20036 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20037   predicate(UseAVX > 0);
20038   match(Set dst (SubVS src1 src2));
20039   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
20040   ins_encode %{
20041     int vlen_enc = vector_length_encoding(this);
20042     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20043   %}
20044   ins_pipe( pipe_slow );
20045 %}
20046 
20047 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20048   predicate((UseAVX > 0) &&
20049             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20050   match(Set dst (SubVS src (LoadVector mem)));
20051   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
20052   ins_encode %{
20053     int vlen_enc = vector_length_encoding(this);
20054     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20055   %}
20056   ins_pipe( pipe_slow );
20057 %}
20058 
20059 // Integers vector sub
20060 instruct vsubI(vec dst, vec src) %{
20061   predicate(UseAVX == 0);
20062   match(Set dst (SubVI dst src));
20063   format %{ "psubd   $dst,$src\t! sub packedI" %}
20064   ins_encode %{
20065     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20066   %}
20067   ins_pipe( pipe_slow );
20068 %}
20069 
20070 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20071   predicate(UseAVX > 0);
20072   match(Set dst (SubVI src1 src2));
20073   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
20074   ins_encode %{
20075     int vlen_enc = vector_length_encoding(this);
20076     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20077   %}
20078   ins_pipe( pipe_slow );
20079 %}
20080 
20081 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20082   predicate((UseAVX > 0) &&
20083             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20084   match(Set dst (SubVI src (LoadVector mem)));
20085   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
20086   ins_encode %{
20087     int vlen_enc = vector_length_encoding(this);
20088     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20089   %}
20090   ins_pipe( pipe_slow );
20091 %}
20092 
20093 // Longs vector sub
20094 instruct vsubL(vec dst, vec src) %{
20095   predicate(UseAVX == 0);
20096   match(Set dst (SubVL dst src));
20097   format %{ "psubq   $dst,$src\t! sub packedL" %}
20098   ins_encode %{
20099     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20100   %}
20101   ins_pipe( pipe_slow );
20102 %}
20103 
20104 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20105   predicate(UseAVX > 0);
20106   match(Set dst (SubVL src1 src2));
20107   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
20108   ins_encode %{
20109     int vlen_enc = vector_length_encoding(this);
20110     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20111   %}
20112   ins_pipe( pipe_slow );
20113 %}
20114 
20115 
20116 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20117   predicate((UseAVX > 0) &&
20118             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20119   match(Set dst (SubVL src (LoadVector mem)));
20120   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
20121   ins_encode %{
20122     int vlen_enc = vector_length_encoding(this);
20123     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20124   %}
20125   ins_pipe( pipe_slow );
20126 %}
20127 
20128 // Floats vector sub
20129 instruct vsubF(vec dst, vec src) %{
20130   predicate(UseAVX == 0);
20131   match(Set dst (SubVF dst src));
20132   format %{ "subps   $dst,$src\t! sub packedF" %}
20133   ins_encode %{
20134     __ subps($dst$$XMMRegister, $src$$XMMRegister);
20135   %}
20136   ins_pipe( pipe_slow );
20137 %}
20138 
20139 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20140   predicate(UseAVX > 0);
20141   match(Set dst (SubVF src1 src2));
20142   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
20143   ins_encode %{
20144     int vlen_enc = vector_length_encoding(this);
20145     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20146   %}
20147   ins_pipe( pipe_slow );
20148 %}
20149 
20150 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20151   predicate((UseAVX > 0) &&
20152             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20153   match(Set dst (SubVF src (LoadVector mem)));
20154   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
20155   ins_encode %{
20156     int vlen_enc = vector_length_encoding(this);
20157     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20158   %}
20159   ins_pipe( pipe_slow );
20160 %}
20161 
20162 // Doubles vector sub
20163 instruct vsubD(vec dst, vec src) %{
20164   predicate(UseAVX == 0);
20165   match(Set dst (SubVD dst src));
20166   format %{ "subpd   $dst,$src\t! sub packedD" %}
20167   ins_encode %{
20168     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20169   %}
20170   ins_pipe( pipe_slow );
20171 %}
20172 
20173 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20174   predicate(UseAVX > 0);
20175   match(Set dst (SubVD src1 src2));
20176   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
20177   ins_encode %{
20178     int vlen_enc = vector_length_encoding(this);
20179     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20180   %}
20181   ins_pipe( pipe_slow );
20182 %}
20183 
20184 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20185   predicate((UseAVX > 0) &&
20186             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20187   match(Set dst (SubVD src (LoadVector mem)));
20188   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
20189   ins_encode %{
20190     int vlen_enc = vector_length_encoding(this);
20191     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20192   %}
20193   ins_pipe( pipe_slow );
20194 %}
20195 
20196 // --------------------------------- MUL --------------------------------------
20197 
20198 // Byte vector mul
20199 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20200   predicate(Matcher::vector_length_in_bytes(n) <= 8);
20201   match(Set dst (MulVB src1 src2));
20202   effect(TEMP dst, TEMP xtmp);
20203   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20204   ins_encode %{
20205     assert(UseSSE > 3, "required");
20206     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20207     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20208     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20209     __ psllw($dst$$XMMRegister, 8);
20210     __ psrlw($dst$$XMMRegister, 8);
20211     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20212   %}
20213   ins_pipe( pipe_slow );
20214 %}
20215 
20216 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20217   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20218   match(Set dst (MulVB src1 src2));
20219   effect(TEMP dst, TEMP xtmp);
20220   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20221   ins_encode %{
20222     assert(UseSSE > 3, "required");
20223     // Odd-index elements
20224     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20225     __ psrlw($dst$$XMMRegister, 8);
20226     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20227     __ psrlw($xtmp$$XMMRegister, 8);
20228     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20229     __ psllw($dst$$XMMRegister, 8);
20230     // Even-index elements
20231     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20232     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20233     __ psllw($xtmp$$XMMRegister, 8);
20234     __ psrlw($xtmp$$XMMRegister, 8);
20235     // Combine
20236     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20237   %}
20238   ins_pipe( pipe_slow );
20239 %}
20240 
20241 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20242   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20243   match(Set dst (MulVB src1 src2));
20244   effect(TEMP xtmp1, TEMP xtmp2);
20245   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20246   ins_encode %{
20247     int vlen_enc = vector_length_encoding(this);
20248     // Odd-index elements
20249     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20250     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20251     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20252     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20253     // Even-index elements
20254     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20255     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20256     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20257     // Combine
20258     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20259   %}
20260   ins_pipe( pipe_slow );
20261 %}
20262 
20263 // Shorts/Chars vector mul
20264 instruct vmulS(vec dst, vec src) %{
20265   predicate(UseAVX == 0);
20266   match(Set dst (MulVS dst src));
20267   format %{ "pmullw  $dst,$src\t! mul packedS" %}
20268   ins_encode %{
20269     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20270   %}
20271   ins_pipe( pipe_slow );
20272 %}
20273 
20274 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20275   predicate(UseAVX > 0);
20276   match(Set dst (MulVS src1 src2));
20277   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20278   ins_encode %{
20279     int vlen_enc = vector_length_encoding(this);
20280     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20281   %}
20282   ins_pipe( pipe_slow );
20283 %}
20284 
20285 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20286   predicate((UseAVX > 0) &&
20287             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20288   match(Set dst (MulVS src (LoadVector mem)));
20289   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20290   ins_encode %{
20291     int vlen_enc = vector_length_encoding(this);
20292     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20293   %}
20294   ins_pipe( pipe_slow );
20295 %}
20296 
20297 // Integers vector mul
20298 instruct vmulI(vec dst, vec src) %{
20299   predicate(UseAVX == 0);
20300   match(Set dst (MulVI dst src));
20301   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20302   ins_encode %{
20303     assert(UseSSE > 3, "required");
20304     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20305   %}
20306   ins_pipe( pipe_slow );
20307 %}
20308 
20309 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20310   predicate(UseAVX > 0);
20311   match(Set dst (MulVI src1 src2));
20312   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20313   ins_encode %{
20314     int vlen_enc = vector_length_encoding(this);
20315     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20316   %}
20317   ins_pipe( pipe_slow );
20318 %}
20319 
20320 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20321   predicate((UseAVX > 0) &&
20322             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20323   match(Set dst (MulVI src (LoadVector mem)));
20324   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20325   ins_encode %{
20326     int vlen_enc = vector_length_encoding(this);
20327     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20328   %}
20329   ins_pipe( pipe_slow );
20330 %}
20331 
20332 // Longs vector mul
20333 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20334   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20335              VM_Version::supports_avx512dq()) ||
20336             VM_Version::supports_avx512vldq());
20337   match(Set dst (MulVL src1 src2));
20338   ins_cost(500);
20339   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20340   ins_encode %{
20341     assert(UseAVX > 2, "required");
20342     int vlen_enc = vector_length_encoding(this);
20343     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20344   %}
20345   ins_pipe( pipe_slow );
20346 %}
20347 
20348 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20349   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20350              VM_Version::supports_avx512dq()) ||
20351             (Matcher::vector_length_in_bytes(n) > 8 &&
20352              VM_Version::supports_avx512vldq()));
20353   match(Set dst (MulVL src (LoadVector mem)));
20354   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20355   ins_cost(500);
20356   ins_encode %{
20357     assert(UseAVX > 2, "required");
20358     int vlen_enc = vector_length_encoding(this);
20359     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20360   %}
20361   ins_pipe( pipe_slow );
20362 %}
20363 
20364 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20365   predicate(UseAVX == 0);
20366   match(Set dst (MulVL src1 src2));
20367   ins_cost(500);
20368   effect(TEMP dst, TEMP xtmp);
20369   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20370   ins_encode %{
20371     assert(VM_Version::supports_sse4_1(), "required");
20372     // Get the lo-hi products, only the lower 32 bits is in concerns
20373     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20374     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20375     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20376     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20377     __ psllq($dst$$XMMRegister, 32);
20378     // Get the lo-lo products
20379     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20380     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20381     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20382   %}
20383   ins_pipe( pipe_slow );
20384 %}
20385 
20386 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20387   predicate(UseAVX > 0 &&
20388             ((Matcher::vector_length_in_bytes(n) == 64 &&
20389               !VM_Version::supports_avx512dq()) ||
20390              (Matcher::vector_length_in_bytes(n) < 64 &&
20391               !VM_Version::supports_avx512vldq())));
20392   match(Set dst (MulVL src1 src2));
20393   effect(TEMP xtmp1, TEMP xtmp2);
20394   ins_cost(500);
20395   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20396   ins_encode %{
20397     int vlen_enc = vector_length_encoding(this);
20398     // Get the lo-hi products, only the lower 32 bits is in concerns
20399     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20400     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20401     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20402     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20403     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20404     // Get the lo-lo products
20405     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20406     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20407   %}
20408   ins_pipe( pipe_slow );
20409 %}
20410 
20411 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20412   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20413   match(Set dst (MulVL src1 src2));
20414   ins_cost(100);
20415   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20416   ins_encode %{
20417     int vlen_enc = vector_length_encoding(this);
20418     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20419   %}
20420   ins_pipe( pipe_slow );
20421 %}
20422 
20423 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20424   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20425   match(Set dst (MulVL src1 src2));
20426   ins_cost(100);
20427   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20428   ins_encode %{
20429     int vlen_enc = vector_length_encoding(this);
20430     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20431   %}
20432   ins_pipe( pipe_slow );
20433 %}
20434 
20435 // Floats vector mul
20436 instruct vmulF(vec dst, vec src) %{
20437   predicate(UseAVX == 0);
20438   match(Set dst (MulVF dst src));
20439   format %{ "mulps   $dst,$src\t! mul packedF" %}
20440   ins_encode %{
20441     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20442   %}
20443   ins_pipe( pipe_slow );
20444 %}
20445 
20446 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20447   predicate(UseAVX > 0);
20448   match(Set dst (MulVF src1 src2));
20449   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
20450   ins_encode %{
20451     int vlen_enc = vector_length_encoding(this);
20452     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20453   %}
20454   ins_pipe( pipe_slow );
20455 %}
20456 
20457 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20458   predicate((UseAVX > 0) &&
20459             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20460   match(Set dst (MulVF src (LoadVector mem)));
20461   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
20462   ins_encode %{
20463     int vlen_enc = vector_length_encoding(this);
20464     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20465   %}
20466   ins_pipe( pipe_slow );
20467 %}
20468 
20469 // Doubles vector mul
20470 instruct vmulD(vec dst, vec src) %{
20471   predicate(UseAVX == 0);
20472   match(Set dst (MulVD dst src));
20473   format %{ "mulpd   $dst,$src\t! mul packedD" %}
20474   ins_encode %{
20475     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20476   %}
20477   ins_pipe( pipe_slow );
20478 %}
20479 
20480 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20481   predicate(UseAVX > 0);
20482   match(Set dst (MulVD src1 src2));
20483   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
20484   ins_encode %{
20485     int vlen_enc = vector_length_encoding(this);
20486     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20487   %}
20488   ins_pipe( pipe_slow );
20489 %}
20490 
20491 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20492   predicate((UseAVX > 0) &&
20493             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20494   match(Set dst (MulVD src (LoadVector mem)));
20495   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
20496   ins_encode %{
20497     int vlen_enc = vector_length_encoding(this);
20498     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20499   %}
20500   ins_pipe( pipe_slow );
20501 %}
20502 
20503 // --------------------------------- DIV --------------------------------------
20504 
20505 // Floats vector div
20506 instruct vdivF(vec dst, vec src) %{
20507   predicate(UseAVX == 0);
20508   match(Set dst (DivVF dst src));
20509   format %{ "divps   $dst,$src\t! div packedF" %}
20510   ins_encode %{
20511     __ divps($dst$$XMMRegister, $src$$XMMRegister);
20512   %}
20513   ins_pipe( pipe_slow );
20514 %}
20515 
20516 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20517   predicate(UseAVX > 0);
20518   match(Set dst (DivVF src1 src2));
20519   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
20520   ins_encode %{
20521     int vlen_enc = vector_length_encoding(this);
20522     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20523   %}
20524   ins_pipe( pipe_slow );
20525 %}
20526 
20527 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20528   predicate((UseAVX > 0) &&
20529             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20530   match(Set dst (DivVF src (LoadVector mem)));
20531   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
20532   ins_encode %{
20533     int vlen_enc = vector_length_encoding(this);
20534     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20535   %}
20536   ins_pipe( pipe_slow );
20537 %}
20538 
20539 // Doubles vector div
20540 instruct vdivD(vec dst, vec src) %{
20541   predicate(UseAVX == 0);
20542   match(Set dst (DivVD dst src));
20543   format %{ "divpd   $dst,$src\t! div packedD" %}
20544   ins_encode %{
20545     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20546   %}
20547   ins_pipe( pipe_slow );
20548 %}
20549 
20550 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20551   predicate(UseAVX > 0);
20552   match(Set dst (DivVD src1 src2));
20553   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
20554   ins_encode %{
20555     int vlen_enc = vector_length_encoding(this);
20556     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20557   %}
20558   ins_pipe( pipe_slow );
20559 %}
20560 
20561 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20562   predicate((UseAVX > 0) &&
20563             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20564   match(Set dst (DivVD src (LoadVector mem)));
20565   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
20566   ins_encode %{
20567     int vlen_enc = vector_length_encoding(this);
20568     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20569   %}
20570   ins_pipe( pipe_slow );
20571 %}
20572 
20573 // ------------------------------ MinMax ---------------------------------------
20574 
20575 // Byte, Short, Int vector Min/Max
20576 instruct minmax_reg_sse(vec dst, vec src) %{
20577   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20578             UseAVX == 0);
20579   match(Set dst (MinV dst src));
20580   match(Set dst (MaxV dst src));
20581   format %{ "vector_minmax  $dst,$src\t!  " %}
20582   ins_encode %{
20583     assert(UseSSE >= 4, "required");
20584 
20585     int opcode = this->ideal_Opcode();
20586     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20587     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20588   %}
20589   ins_pipe( pipe_slow );
20590 %}
20591 
20592 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20593   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20594             UseAVX > 0);
20595   match(Set dst (MinV src1 src2));
20596   match(Set dst (MaxV src1 src2));
20597   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
20598   ins_encode %{
20599     int opcode = this->ideal_Opcode();
20600     int vlen_enc = vector_length_encoding(this);
20601     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20602 
20603     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20604   %}
20605   ins_pipe( pipe_slow );
20606 %}
20607 
20608 // Long vector Min/Max
20609 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20610   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20611             UseAVX == 0);
20612   match(Set dst (MinV dst src));
20613   match(Set dst (MaxV src dst));
20614   effect(TEMP dst, TEMP tmp);
20615   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
20616   ins_encode %{
20617     assert(UseSSE >= 4, "required");
20618 
20619     int opcode = this->ideal_Opcode();
20620     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20621     assert(elem_bt == T_LONG, "sanity");
20622 
20623     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20624   %}
20625   ins_pipe( pipe_slow );
20626 %}
20627 
20628 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20629   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20630             UseAVX > 0 && !VM_Version::supports_avx512vl());
20631   match(Set dst (MinV src1 src2));
20632   match(Set dst (MaxV src1 src2));
20633   effect(TEMP dst);
20634   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
20635   ins_encode %{
20636     int vlen_enc = vector_length_encoding(this);
20637     int opcode = this->ideal_Opcode();
20638     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20639     assert(elem_bt == T_LONG, "sanity");
20640 
20641     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20642   %}
20643   ins_pipe( pipe_slow );
20644 %}
20645 
20646 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20647   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20648             Matcher::vector_element_basic_type(n) == T_LONG);
20649   match(Set dst (MinV src1 src2));
20650   match(Set dst (MaxV src1 src2));
20651   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
20652   ins_encode %{
20653     assert(UseAVX > 2, "required");
20654 
20655     int vlen_enc = vector_length_encoding(this);
20656     int opcode = this->ideal_Opcode();
20657     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20658     assert(elem_bt == T_LONG, "sanity");
20659 
20660     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20661   %}
20662   ins_pipe( pipe_slow );
20663 %}
20664 
20665 // Float/Double vector Min/Max
20666 instruct minmaxFP_avx10_reg(vec dst, vec a, vec b) %{
20667   predicate(VM_Version::supports_avx10_2() &&
20668             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20669   match(Set dst (MinV a b));
20670   match(Set dst (MaxV a b));
20671   format %{ "vector_minmaxFP  $dst, $a, $b" %}
20672   ins_encode %{
20673     int vlen_enc = vector_length_encoding(this);
20674     int opcode = this->ideal_Opcode();
20675     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20676     __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20677   %}
20678   ins_pipe( pipe_slow );
20679 %}
20680 
20681 // Float/Double vector Min/Max
20682 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20683   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20684             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20685             UseAVX > 0);
20686   match(Set dst (MinV a b));
20687   match(Set dst (MaxV a b));
20688   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20689   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20690   ins_encode %{
20691     assert(UseAVX > 0, "required");
20692 
20693     int opcode = this->ideal_Opcode();
20694     int vlen_enc = vector_length_encoding(this);
20695     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20696 
20697     __ vminmax_fp(opcode, elem_bt,
20698                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20699                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20700   %}
20701   ins_pipe( pipe_slow );
20702 %}
20703 
20704 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20705   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20706             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20707   match(Set dst (MinV a b));
20708   match(Set dst (MaxV a b));
20709   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20710   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20711   ins_encode %{
20712     assert(UseAVX > 2, "required");
20713 
20714     int opcode = this->ideal_Opcode();
20715     int vlen_enc = vector_length_encoding(this);
20716     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20717 
20718     __ evminmax_fp(opcode, elem_bt,
20719                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20720                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20721   %}
20722   ins_pipe( pipe_slow );
20723 %}
20724 
20725 // ------------------------------ Unsigned vector Min/Max ----------------------
20726 
20727 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
20728   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20729   match(Set dst (UMinV a b));
20730   match(Set dst (UMaxV a b));
20731   format %{ "vector_uminmax $dst,$a,$b\t!" %}
20732   ins_encode %{
20733     int opcode = this->ideal_Opcode();
20734     int vlen_enc = vector_length_encoding(this);
20735     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20736     assert(is_integral_type(elem_bt), "");
20737     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20738   %}
20739   ins_pipe( pipe_slow );
20740 %}
20741 
20742 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
20743   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20744   match(Set dst (UMinV a (LoadVector b)));
20745   match(Set dst (UMaxV a (LoadVector b)));
20746   format %{ "vector_uminmax $dst,$a,$b\t!" %}
20747   ins_encode %{
20748     int opcode = this->ideal_Opcode();
20749     int vlen_enc = vector_length_encoding(this);
20750     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20751     assert(is_integral_type(elem_bt), "");
20752     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
20753   %}
20754   ins_pipe( pipe_slow );
20755 %}
20756 
20757 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
20758   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
20759   match(Set dst (UMinV a b));
20760   match(Set dst (UMaxV a b));
20761   effect(TEMP xtmp1, TEMP xtmp2);
20762   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
20763   ins_encode %{
20764     int opcode = this->ideal_Opcode();
20765     int vlen_enc = vector_length_encoding(this);
20766     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20767   %}
20768   ins_pipe( pipe_slow );
20769 %}
20770 
20771 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
20772   match(Set dst (UMinV (Binary dst src2) mask));
20773   match(Set dst (UMaxV (Binary dst src2) mask));
20774   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20775   ins_encode %{
20776     int vlen_enc = vector_length_encoding(this);
20777     BasicType bt = Matcher::vector_element_basic_type(this);
20778     int opc = this->ideal_Opcode();
20779     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20780                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
20781   %}
20782   ins_pipe( pipe_slow );
20783 %}
20784 
20785 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
20786   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
20787   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
20788   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20789   ins_encode %{
20790     int vlen_enc = vector_length_encoding(this);
20791     BasicType bt = Matcher::vector_element_basic_type(this);
20792     int opc = this->ideal_Opcode();
20793     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20794                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
20795   %}
20796   ins_pipe( pipe_slow );
20797 %}
20798 
20799 // --------------------------------- Signum/CopySign ---------------------------
20800 
20801 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
20802   match(Set dst (SignumF dst (Binary zero one)));
20803   effect(KILL cr);
20804   format %{ "signumF $dst, $dst" %}
20805   ins_encode %{
20806     int opcode = this->ideal_Opcode();
20807     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20808   %}
20809   ins_pipe( pipe_slow );
20810 %}
20811 
20812 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
20813   match(Set dst (SignumD dst (Binary zero one)));
20814   effect(KILL cr);
20815   format %{ "signumD $dst, $dst" %}
20816   ins_encode %{
20817     int opcode = this->ideal_Opcode();
20818     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20819   %}
20820   ins_pipe( pipe_slow );
20821 %}
20822 
20823 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
20824   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
20825   match(Set dst (SignumVF src (Binary zero one)));
20826   match(Set dst (SignumVD src (Binary zero one)));
20827   effect(TEMP dst, TEMP xtmp1);
20828   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
20829   ins_encode %{
20830     int opcode = this->ideal_Opcode();
20831     int vec_enc = vector_length_encoding(this);
20832     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20833                          $xtmp1$$XMMRegister, vec_enc);
20834   %}
20835   ins_pipe( pipe_slow );
20836 %}
20837 
20838 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
20839   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
20840   match(Set dst (SignumVF src (Binary zero one)));
20841   match(Set dst (SignumVD src (Binary zero one)));
20842   effect(TEMP dst, TEMP ktmp1);
20843   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
20844   ins_encode %{
20845     int opcode = this->ideal_Opcode();
20846     int vec_enc = vector_length_encoding(this);
20847     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20848                           $ktmp1$$KRegister, vec_enc);
20849   %}
20850   ins_pipe( pipe_slow );
20851 %}
20852 
20853 // ---------------------------------------
20854 // For copySign use 0xE4 as writemask for vpternlog
20855 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
20856 // C (xmm2) is set to 0x7FFFFFFF
20857 // Wherever xmm2 is 0, we want to pick from B (sign)
20858 // Wherever xmm2 is 1, we want to pick from A (src)
20859 //
20860 // A B C Result
20861 // 0 0 0 0
20862 // 0 0 1 0
20863 // 0 1 0 1
20864 // 0 1 1 0
20865 // 1 0 0 0
20866 // 1 0 1 1
20867 // 1 1 0 1
20868 // 1 1 1 1
20869 //
20870 // Result going from high bit to low bit is 0x11100100 = 0xe4
20871 // ---------------------------------------
20872 
20873 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
20874   match(Set dst (CopySignF dst src));
20875   effect(TEMP tmp1, TEMP tmp2);
20876   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20877   ins_encode %{
20878     __ movl($tmp2$$Register, 0x7FFFFFFF);
20879     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
20880     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20881   %}
20882   ins_pipe( pipe_slow );
20883 %}
20884 
20885 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
20886   match(Set dst (CopySignD dst (Binary src zero)));
20887   ins_cost(100);
20888   effect(TEMP tmp1, TEMP tmp2);
20889   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20890   ins_encode %{
20891     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
20892     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
20893     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20894   %}
20895   ins_pipe( pipe_slow );
20896 %}
20897 
20898 //----------------------------- CompressBits/ExpandBits ------------------------
20899 
20900 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20901   predicate(n->bottom_type()->isa_int());
20902   match(Set dst (CompressBits src mask));
20903   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
20904   ins_encode %{
20905     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
20906   %}
20907   ins_pipe( pipe_slow );
20908 %}
20909 
20910 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20911   predicate(n->bottom_type()->isa_int());
20912   match(Set dst (ExpandBits src mask));
20913   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
20914   ins_encode %{
20915     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
20916   %}
20917   ins_pipe( pipe_slow );
20918 %}
20919 
20920 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20921   predicate(n->bottom_type()->isa_int());
20922   match(Set dst (CompressBits src (LoadI mask)));
20923   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
20924   ins_encode %{
20925     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
20926   %}
20927   ins_pipe( pipe_slow );
20928 %}
20929 
20930 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20931   predicate(n->bottom_type()->isa_int());
20932   match(Set dst (ExpandBits src (LoadI mask)));
20933   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
20934   ins_encode %{
20935     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
20936   %}
20937   ins_pipe( pipe_slow );
20938 %}
20939 
20940 // --------------------------------- Sqrt --------------------------------------
20941 
20942 instruct vsqrtF_reg(vec dst, vec src) %{
20943   match(Set dst (SqrtVF src));
20944   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
20945   ins_encode %{
20946     assert(UseAVX > 0, "required");
20947     int vlen_enc = vector_length_encoding(this);
20948     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20949   %}
20950   ins_pipe( pipe_slow );
20951 %}
20952 
20953 instruct vsqrtF_mem(vec dst, memory mem) %{
20954   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
20955   match(Set dst (SqrtVF (LoadVector mem)));
20956   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
20957   ins_encode %{
20958     assert(UseAVX > 0, "required");
20959     int vlen_enc = vector_length_encoding(this);
20960     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
20961   %}
20962   ins_pipe( pipe_slow );
20963 %}
20964 
20965 // Floating point vector sqrt
20966 instruct vsqrtD_reg(vec dst, vec src) %{
20967   match(Set dst (SqrtVD src));
20968   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
20969   ins_encode %{
20970     assert(UseAVX > 0, "required");
20971     int vlen_enc = vector_length_encoding(this);
20972     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20973   %}
20974   ins_pipe( pipe_slow );
20975 %}
20976 
20977 instruct vsqrtD_mem(vec dst, memory mem) %{
20978   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
20979   match(Set dst (SqrtVD (LoadVector mem)));
20980   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
20981   ins_encode %{
20982     assert(UseAVX > 0, "required");
20983     int vlen_enc = vector_length_encoding(this);
20984     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
20985   %}
20986   ins_pipe( pipe_slow );
20987 %}
20988 
20989 // ------------------------------ Shift ---------------------------------------
20990 
20991 // Left and right shift count vectors are the same on x86
20992 // (only lowest bits of xmm reg are used for count).
20993 instruct vshiftcnt(vec dst, rRegI cnt) %{
20994   match(Set dst (LShiftCntV cnt));
20995   match(Set dst (RShiftCntV cnt));
20996   format %{ "movdl    $dst,$cnt\t! load shift count" %}
20997   ins_encode %{
20998     __ movdl($dst$$XMMRegister, $cnt$$Register);
20999   %}
21000   ins_pipe( pipe_slow );
21001 %}
21002 
21003 // Byte vector shift
21004 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21005   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21006   match(Set dst ( LShiftVB src shift));
21007   match(Set dst ( RShiftVB src shift));
21008   match(Set dst (URShiftVB src shift));
21009   effect(TEMP dst, USE src, USE shift, TEMP tmp);
21010   format %{"vector_byte_shift $dst,$src,$shift" %}
21011   ins_encode %{
21012     assert(UseSSE > 3, "required");
21013     int opcode = this->ideal_Opcode();
21014     bool sign = (opcode != Op_URShiftVB);
21015     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21016     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21017     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21018     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21019     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21020   %}
21021   ins_pipe( pipe_slow );
21022 %}
21023 
21024 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21025   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21026             UseAVX <= 1);
21027   match(Set dst ( LShiftVB src shift));
21028   match(Set dst ( RShiftVB src shift));
21029   match(Set dst (URShiftVB src shift));
21030   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21031   format %{"vector_byte_shift $dst,$src,$shift" %}
21032   ins_encode %{
21033     assert(UseSSE > 3, "required");
21034     int opcode = this->ideal_Opcode();
21035     bool sign = (opcode != Op_URShiftVB);
21036     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21037     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21038     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21039     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21040     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21041     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21042     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21043     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21044     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21045   %}
21046   ins_pipe( pipe_slow );
21047 %}
21048 
21049 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21050   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21051             UseAVX > 1);
21052   match(Set dst ( LShiftVB src shift));
21053   match(Set dst ( RShiftVB src shift));
21054   match(Set dst (URShiftVB src shift));
21055   effect(TEMP dst, TEMP tmp);
21056   format %{"vector_byte_shift $dst,$src,$shift" %}
21057   ins_encode %{
21058     int opcode = this->ideal_Opcode();
21059     bool sign = (opcode != Op_URShiftVB);
21060     int vlen_enc = Assembler::AVX_256bit;
21061     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21062     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21063     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21064     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21065     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21066   %}
21067   ins_pipe( pipe_slow );
21068 %}
21069 
21070 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21071   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21072   match(Set dst ( LShiftVB src shift));
21073   match(Set dst ( RShiftVB src shift));
21074   match(Set dst (URShiftVB src shift));
21075   effect(TEMP dst, TEMP tmp);
21076   format %{"vector_byte_shift $dst,$src,$shift" %}
21077   ins_encode %{
21078     assert(UseAVX > 1, "required");
21079     int opcode = this->ideal_Opcode();
21080     bool sign = (opcode != Op_URShiftVB);
21081     int vlen_enc = Assembler::AVX_256bit;
21082     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21083     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21084     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21085     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21086     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21087     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21088     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21089     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21090     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21091   %}
21092   ins_pipe( pipe_slow );
21093 %}
21094 
21095 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21096   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21097   match(Set dst ( LShiftVB src shift));
21098   match(Set dst  (RShiftVB src shift));
21099   match(Set dst (URShiftVB src shift));
21100   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21101   format %{"vector_byte_shift $dst,$src,$shift" %}
21102   ins_encode %{
21103     assert(UseAVX > 2, "required");
21104     int opcode = this->ideal_Opcode();
21105     bool sign = (opcode != Op_URShiftVB);
21106     int vlen_enc = Assembler::AVX_512bit;
21107     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21108     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21109     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21110     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21111     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21112     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21113     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21114     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21115     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21116     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21117     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21118     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21119   %}
21120   ins_pipe( pipe_slow );
21121 %}
21122 
21123 // Shorts vector logical right shift produces incorrect Java result
21124 // for negative data because java code convert short value into int with
21125 // sign extension before a shift. But char vectors are fine since chars are
21126 // unsigned values.
21127 // Shorts/Chars vector left shift
21128 instruct vshiftS(vec dst, vec src, vec shift) %{
21129   predicate(!n->as_ShiftV()->is_var_shift());
21130   match(Set dst ( LShiftVS src shift));
21131   match(Set dst ( RShiftVS src shift));
21132   match(Set dst (URShiftVS src shift));
21133   effect(TEMP dst, USE src, USE shift);
21134   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
21135   ins_encode %{
21136     int opcode = this->ideal_Opcode();
21137     if (UseAVX > 0) {
21138       int vlen_enc = vector_length_encoding(this);
21139       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21140     } else {
21141       int vlen = Matcher::vector_length(this);
21142       if (vlen == 2) {
21143         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21144         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21145       } else if (vlen == 4) {
21146         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21147         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21148       } else {
21149         assert (vlen == 8, "sanity");
21150         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21151         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21152       }
21153     }
21154   %}
21155   ins_pipe( pipe_slow );
21156 %}
21157 
21158 // Integers vector left shift
21159 instruct vshiftI(vec dst, vec src, vec shift) %{
21160   predicate(!n->as_ShiftV()->is_var_shift());
21161   match(Set dst ( LShiftVI src shift));
21162   match(Set dst ( RShiftVI src shift));
21163   match(Set dst (URShiftVI src shift));
21164   effect(TEMP dst, USE src, USE shift);
21165   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
21166   ins_encode %{
21167     int opcode = this->ideal_Opcode();
21168     if (UseAVX > 0) {
21169       int vlen_enc = vector_length_encoding(this);
21170       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21171     } else {
21172       int vlen = Matcher::vector_length(this);
21173       if (vlen == 2) {
21174         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21175         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21176       } else {
21177         assert(vlen == 4, "sanity");
21178         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21179         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21180       }
21181     }
21182   %}
21183   ins_pipe( pipe_slow );
21184 %}
21185 
21186 // Integers vector left constant shift
21187 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21188   match(Set dst (LShiftVI src (LShiftCntV shift)));
21189   match(Set dst (RShiftVI src (RShiftCntV shift)));
21190   match(Set dst (URShiftVI src (RShiftCntV shift)));
21191   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
21192   ins_encode %{
21193     int opcode = this->ideal_Opcode();
21194     if (UseAVX > 0) {
21195       int vector_len = vector_length_encoding(this);
21196       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21197     } else {
21198       int vlen = Matcher::vector_length(this);
21199       if (vlen == 2) {
21200         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21201         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21202       } else {
21203         assert(vlen == 4, "sanity");
21204         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21205         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21206       }
21207     }
21208   %}
21209   ins_pipe( pipe_slow );
21210 %}
21211 
21212 // Longs vector shift
21213 instruct vshiftL(vec dst, vec src, vec shift) %{
21214   predicate(!n->as_ShiftV()->is_var_shift());
21215   match(Set dst ( LShiftVL src shift));
21216   match(Set dst (URShiftVL src shift));
21217   effect(TEMP dst, USE src, USE shift);
21218   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
21219   ins_encode %{
21220     int opcode = this->ideal_Opcode();
21221     if (UseAVX > 0) {
21222       int vlen_enc = vector_length_encoding(this);
21223       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21224     } else {
21225       assert(Matcher::vector_length(this) == 2, "");
21226       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21227       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21228     }
21229   %}
21230   ins_pipe( pipe_slow );
21231 %}
21232 
21233 // Longs vector constant shift
21234 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21235   match(Set dst (LShiftVL src (LShiftCntV shift)));
21236   match(Set dst (URShiftVL src (RShiftCntV shift)));
21237   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
21238   ins_encode %{
21239     int opcode = this->ideal_Opcode();
21240     if (UseAVX > 0) {
21241       int vector_len = vector_length_encoding(this);
21242       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21243     } else {
21244       assert(Matcher::vector_length(this) == 2, "");
21245       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21246       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21247     }
21248   %}
21249   ins_pipe( pipe_slow );
21250 %}
21251 
21252 // -------------------ArithmeticRightShift -----------------------------------
21253 // Long vector arithmetic right shift
21254 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21255   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21256   match(Set dst (RShiftVL src shift));
21257   effect(TEMP dst, TEMP tmp);
21258   format %{ "vshiftq $dst,$src,$shift" %}
21259   ins_encode %{
21260     uint vlen = Matcher::vector_length(this);
21261     if (vlen == 2) {
21262       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21263       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21264       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21265       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21266       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21267       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21268     } else {
21269       assert(vlen == 4, "sanity");
21270       assert(UseAVX > 1, "required");
21271       int vlen_enc = Assembler::AVX_256bit;
21272       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21273       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21274       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21275       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21276       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21277     }
21278   %}
21279   ins_pipe( pipe_slow );
21280 %}
21281 
21282 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21283   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21284   match(Set dst (RShiftVL src shift));
21285   format %{ "vshiftq $dst,$src,$shift" %}
21286   ins_encode %{
21287     int vlen_enc = vector_length_encoding(this);
21288     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21289   %}
21290   ins_pipe( pipe_slow );
21291 %}
21292 
21293 // ------------------- Variable Shift -----------------------------
21294 // Byte variable shift
21295 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21296   predicate(Matcher::vector_length(n) <= 8 &&
21297             n->as_ShiftV()->is_var_shift() &&
21298             !VM_Version::supports_avx512bw());
21299   match(Set dst ( LShiftVB src shift));
21300   match(Set dst ( RShiftVB src shift));
21301   match(Set dst (URShiftVB src shift));
21302   effect(TEMP dst, TEMP vtmp);
21303   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21304   ins_encode %{
21305     assert(UseAVX >= 2, "required");
21306 
21307     int opcode = this->ideal_Opcode();
21308     int vlen_enc = Assembler::AVX_128bit;
21309     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21310     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21311   %}
21312   ins_pipe( pipe_slow );
21313 %}
21314 
21315 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21316   predicate(Matcher::vector_length(n) == 16 &&
21317             n->as_ShiftV()->is_var_shift() &&
21318             !VM_Version::supports_avx512bw());
21319   match(Set dst ( LShiftVB src shift));
21320   match(Set dst ( RShiftVB src shift));
21321   match(Set dst (URShiftVB src shift));
21322   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21323   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21324   ins_encode %{
21325     assert(UseAVX >= 2, "required");
21326 
21327     int opcode = this->ideal_Opcode();
21328     int vlen_enc = Assembler::AVX_128bit;
21329     // Shift lower half and get word result in dst
21330     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21331 
21332     // Shift upper half and get word result in vtmp1
21333     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21334     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21335     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21336 
21337     // Merge and down convert the two word results to byte in dst
21338     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21339   %}
21340   ins_pipe( pipe_slow );
21341 %}
21342 
21343 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21344   predicate(Matcher::vector_length(n) == 32 &&
21345             n->as_ShiftV()->is_var_shift() &&
21346             !VM_Version::supports_avx512bw());
21347   match(Set dst ( LShiftVB src shift));
21348   match(Set dst ( RShiftVB src shift));
21349   match(Set dst (URShiftVB src shift));
21350   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21351   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21352   ins_encode %{
21353     assert(UseAVX >= 2, "required");
21354 
21355     int opcode = this->ideal_Opcode();
21356     int vlen_enc = Assembler::AVX_128bit;
21357     // Process lower 128 bits and get result in dst
21358     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21359     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21360     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21361     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21362     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21363 
21364     // Process higher 128 bits and get result in vtmp3
21365     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21366     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21367     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21368     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21369     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21370     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21371     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21372 
21373     // Merge the two results in dst
21374     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21375   %}
21376   ins_pipe( pipe_slow );
21377 %}
21378 
21379 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21380   predicate(Matcher::vector_length(n) <= 32 &&
21381             n->as_ShiftV()->is_var_shift() &&
21382             VM_Version::supports_avx512bw());
21383   match(Set dst ( LShiftVB src shift));
21384   match(Set dst ( RShiftVB src shift));
21385   match(Set dst (URShiftVB src shift));
21386   effect(TEMP dst, TEMP vtmp);
21387   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21388   ins_encode %{
21389     assert(UseAVX > 2, "required");
21390 
21391     int opcode = this->ideal_Opcode();
21392     int vlen_enc = vector_length_encoding(this);
21393     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21394   %}
21395   ins_pipe( pipe_slow );
21396 %}
21397 
21398 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21399   predicate(Matcher::vector_length(n) == 64 &&
21400             n->as_ShiftV()->is_var_shift() &&
21401             VM_Version::supports_avx512bw());
21402   match(Set dst ( LShiftVB src shift));
21403   match(Set dst ( RShiftVB src shift));
21404   match(Set dst (URShiftVB src shift));
21405   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21406   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21407   ins_encode %{
21408     assert(UseAVX > 2, "required");
21409 
21410     int opcode = this->ideal_Opcode();
21411     int vlen_enc = Assembler::AVX_256bit;
21412     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21413     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21414     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21415     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21416     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21417   %}
21418   ins_pipe( pipe_slow );
21419 %}
21420 
21421 // Short variable shift
21422 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21423   predicate(Matcher::vector_length(n) <= 8 &&
21424             n->as_ShiftV()->is_var_shift() &&
21425             !VM_Version::supports_avx512bw());
21426   match(Set dst ( LShiftVS src shift));
21427   match(Set dst ( RShiftVS src shift));
21428   match(Set dst (URShiftVS src shift));
21429   effect(TEMP dst, TEMP vtmp);
21430   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21431   ins_encode %{
21432     assert(UseAVX >= 2, "required");
21433 
21434     int opcode = this->ideal_Opcode();
21435     bool sign = (opcode != Op_URShiftVS);
21436     int vlen_enc = Assembler::AVX_256bit;
21437     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21438     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21439     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21440     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21441     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21442     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21443   %}
21444   ins_pipe( pipe_slow );
21445 %}
21446 
21447 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21448   predicate(Matcher::vector_length(n) == 16 &&
21449             n->as_ShiftV()->is_var_shift() &&
21450             !VM_Version::supports_avx512bw());
21451   match(Set dst ( LShiftVS src shift));
21452   match(Set dst ( RShiftVS src shift));
21453   match(Set dst (URShiftVS src shift));
21454   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21455   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21456   ins_encode %{
21457     assert(UseAVX >= 2, "required");
21458 
21459     int opcode = this->ideal_Opcode();
21460     bool sign = (opcode != Op_URShiftVS);
21461     int vlen_enc = Assembler::AVX_256bit;
21462     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21463     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21464     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21465     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21466     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21467 
21468     // Shift upper half, with result in dst using vtmp1 as TEMP
21469     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21470     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21471     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21472     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21473     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21474     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21475 
21476     // Merge lower and upper half result into dst
21477     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21478     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21479   %}
21480   ins_pipe( pipe_slow );
21481 %}
21482 
21483 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21484   predicate(n->as_ShiftV()->is_var_shift() &&
21485             VM_Version::supports_avx512bw());
21486   match(Set dst ( LShiftVS src shift));
21487   match(Set dst ( RShiftVS src shift));
21488   match(Set dst (URShiftVS src shift));
21489   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21490   ins_encode %{
21491     assert(UseAVX > 2, "required");
21492 
21493     int opcode = this->ideal_Opcode();
21494     int vlen_enc = vector_length_encoding(this);
21495     if (!VM_Version::supports_avx512vl()) {
21496       vlen_enc = Assembler::AVX_512bit;
21497     }
21498     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21499   %}
21500   ins_pipe( pipe_slow );
21501 %}
21502 
21503 //Integer variable shift
21504 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21505   predicate(n->as_ShiftV()->is_var_shift());
21506   match(Set dst ( LShiftVI src shift));
21507   match(Set dst ( RShiftVI src shift));
21508   match(Set dst (URShiftVI src shift));
21509   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21510   ins_encode %{
21511     assert(UseAVX >= 2, "required");
21512 
21513     int opcode = this->ideal_Opcode();
21514     int vlen_enc = vector_length_encoding(this);
21515     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21516   %}
21517   ins_pipe( pipe_slow );
21518 %}
21519 
21520 //Long variable shift
21521 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21522   predicate(n->as_ShiftV()->is_var_shift());
21523   match(Set dst ( LShiftVL src shift));
21524   match(Set dst (URShiftVL src shift));
21525   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21526   ins_encode %{
21527     assert(UseAVX >= 2, "required");
21528 
21529     int opcode = this->ideal_Opcode();
21530     int vlen_enc = vector_length_encoding(this);
21531     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21532   %}
21533   ins_pipe( pipe_slow );
21534 %}
21535 
21536 //Long variable right shift arithmetic
21537 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21538   predicate(Matcher::vector_length(n) <= 4 &&
21539             n->as_ShiftV()->is_var_shift() &&
21540             UseAVX == 2);
21541   match(Set dst (RShiftVL src shift));
21542   effect(TEMP dst, TEMP vtmp);
21543   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21544   ins_encode %{
21545     int opcode = this->ideal_Opcode();
21546     int vlen_enc = vector_length_encoding(this);
21547     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21548                  $vtmp$$XMMRegister);
21549   %}
21550   ins_pipe( pipe_slow );
21551 %}
21552 
21553 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21554   predicate(n->as_ShiftV()->is_var_shift() &&
21555             UseAVX > 2);
21556   match(Set dst (RShiftVL src shift));
21557   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21558   ins_encode %{
21559     int opcode = this->ideal_Opcode();
21560     int vlen_enc = vector_length_encoding(this);
21561     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21562   %}
21563   ins_pipe( pipe_slow );
21564 %}
21565 
21566 // --------------------------------- AND --------------------------------------
21567 
21568 instruct vand(vec dst, vec src) %{
21569   predicate(UseAVX == 0);
21570   match(Set dst (AndV dst src));
21571   format %{ "pand    $dst,$src\t! and vectors" %}
21572   ins_encode %{
21573     __ pand($dst$$XMMRegister, $src$$XMMRegister);
21574   %}
21575   ins_pipe( pipe_slow );
21576 %}
21577 
21578 instruct vand_reg(vec dst, vec src1, vec src2) %{
21579   predicate(UseAVX > 0);
21580   match(Set dst (AndV src1 src2));
21581   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
21582   ins_encode %{
21583     int vlen_enc = vector_length_encoding(this);
21584     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21585   %}
21586   ins_pipe( pipe_slow );
21587 %}
21588 
21589 instruct vand_mem(vec dst, vec src, memory mem) %{
21590   predicate((UseAVX > 0) &&
21591             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21592   match(Set dst (AndV src (LoadVector mem)));
21593   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
21594   ins_encode %{
21595     int vlen_enc = vector_length_encoding(this);
21596     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21597   %}
21598   ins_pipe( pipe_slow );
21599 %}
21600 
21601 // --------------------------------- OR ---------------------------------------
21602 
21603 instruct vor(vec dst, vec src) %{
21604   predicate(UseAVX == 0);
21605   match(Set dst (OrV dst src));
21606   format %{ "por     $dst,$src\t! or vectors" %}
21607   ins_encode %{
21608     __ por($dst$$XMMRegister, $src$$XMMRegister);
21609   %}
21610   ins_pipe( pipe_slow );
21611 %}
21612 
21613 instruct vor_reg(vec dst, vec src1, vec src2) %{
21614   predicate(UseAVX > 0);
21615   match(Set dst (OrV src1 src2));
21616   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
21617   ins_encode %{
21618     int vlen_enc = vector_length_encoding(this);
21619     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21620   %}
21621   ins_pipe( pipe_slow );
21622 %}
21623 
21624 instruct vor_mem(vec dst, vec src, memory mem) %{
21625   predicate((UseAVX > 0) &&
21626             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21627   match(Set dst (OrV src (LoadVector mem)));
21628   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
21629   ins_encode %{
21630     int vlen_enc = vector_length_encoding(this);
21631     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21632   %}
21633   ins_pipe( pipe_slow );
21634 %}
21635 
21636 // --------------------------------- XOR --------------------------------------
21637 
21638 instruct vxor(vec dst, vec src) %{
21639   predicate(UseAVX == 0);
21640   match(Set dst (XorV dst src));
21641   format %{ "pxor    $dst,$src\t! xor vectors" %}
21642   ins_encode %{
21643     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21644   %}
21645   ins_pipe( pipe_slow );
21646 %}
21647 
21648 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21649   predicate(UseAVX > 0);
21650   match(Set dst (XorV src1 src2));
21651   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
21652   ins_encode %{
21653     int vlen_enc = vector_length_encoding(this);
21654     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21655   %}
21656   ins_pipe( pipe_slow );
21657 %}
21658 
21659 instruct vxor_mem(vec dst, vec src, memory mem) %{
21660   predicate((UseAVX > 0) &&
21661             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21662   match(Set dst (XorV src (LoadVector mem)));
21663   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
21664   ins_encode %{
21665     int vlen_enc = vector_length_encoding(this);
21666     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21667   %}
21668   ins_pipe( pipe_slow );
21669 %}
21670 
21671 // --------------------------------- VectorCast --------------------------------------
21672 
21673 instruct vcastBtoX(vec dst, vec src) %{
21674   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21675   match(Set dst (VectorCastB2X src));
21676   format %{ "vector_cast_b2x $dst,$src\t!" %}
21677   ins_encode %{
21678     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21679     int vlen_enc = vector_length_encoding(this);
21680     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21681   %}
21682   ins_pipe( pipe_slow );
21683 %}
21684 
21685 instruct vcastBtoD(legVec dst, legVec src) %{
21686   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21687   match(Set dst (VectorCastB2X src));
21688   format %{ "vector_cast_b2x $dst,$src\t!" %}
21689   ins_encode %{
21690     int vlen_enc = vector_length_encoding(this);
21691     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21692   %}
21693   ins_pipe( pipe_slow );
21694 %}
21695 
21696 instruct castStoX(vec dst, vec src) %{
21697   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21698             Matcher::vector_length(n->in(1)) <= 8 && // src
21699             Matcher::vector_element_basic_type(n) == T_BYTE);
21700   match(Set dst (VectorCastS2X src));
21701   format %{ "vector_cast_s2x $dst,$src" %}
21702   ins_encode %{
21703     assert(UseAVX > 0, "required");
21704 
21705     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21706     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21707   %}
21708   ins_pipe( pipe_slow );
21709 %}
21710 
21711 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21712   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21713             Matcher::vector_length(n->in(1)) == 16 && // src
21714             Matcher::vector_element_basic_type(n) == T_BYTE);
21715   effect(TEMP dst, TEMP vtmp);
21716   match(Set dst (VectorCastS2X src));
21717   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21718   ins_encode %{
21719     assert(UseAVX > 0, "required");
21720 
21721     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
21722     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21723     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
21724     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21725   %}
21726   ins_pipe( pipe_slow );
21727 %}
21728 
21729 instruct vcastStoX_evex(vec dst, vec src) %{
21730   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
21731             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21732   match(Set dst (VectorCastS2X src));
21733   format %{ "vector_cast_s2x $dst,$src\t!" %}
21734   ins_encode %{
21735     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21736     int src_vlen_enc = vector_length_encoding(this, $src);
21737     int vlen_enc = vector_length_encoding(this);
21738     switch (to_elem_bt) {
21739       case T_BYTE:
21740         if (!VM_Version::supports_avx512vl()) {
21741           vlen_enc = Assembler::AVX_512bit;
21742         }
21743         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21744         break;
21745       case T_INT:
21746         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21747         break;
21748       case T_FLOAT:
21749         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21750         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21751         break;
21752       case T_LONG:
21753         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21754         break;
21755       case T_DOUBLE: {
21756         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
21757         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
21758         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21759         break;
21760       }
21761       default:
21762         ShouldNotReachHere();
21763     }
21764   %}
21765   ins_pipe( pipe_slow );
21766 %}
21767 
21768 instruct castItoX(vec dst, vec src) %{
21769   predicate(UseAVX <= 2 &&
21770             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
21771             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21772   match(Set dst (VectorCastI2X src));
21773   format %{ "vector_cast_i2x $dst,$src" %}
21774   ins_encode %{
21775     assert(UseAVX > 0, "required");
21776 
21777     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21778     int vlen_enc = vector_length_encoding(this, $src);
21779 
21780     if (to_elem_bt == T_BYTE) {
21781       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21782       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21783       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21784     } else {
21785       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21786       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21787       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21788     }
21789   %}
21790   ins_pipe( pipe_slow );
21791 %}
21792 
21793 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
21794   predicate(UseAVX <= 2 &&
21795             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
21796             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21797   match(Set dst (VectorCastI2X src));
21798   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
21799   effect(TEMP dst, TEMP vtmp);
21800   ins_encode %{
21801     assert(UseAVX > 0, "required");
21802 
21803     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21804     int vlen_enc = vector_length_encoding(this, $src);
21805 
21806     if (to_elem_bt == T_BYTE) {
21807       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21808       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21809       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21810       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21811     } else {
21812       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21813       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21814       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21815       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21816     }
21817   %}
21818   ins_pipe( pipe_slow );
21819 %}
21820 
21821 instruct vcastItoX_evex(vec dst, vec src) %{
21822   predicate(UseAVX > 2 ||
21823             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21824   match(Set dst (VectorCastI2X src));
21825   format %{ "vector_cast_i2x $dst,$src\t!" %}
21826   ins_encode %{
21827     assert(UseAVX > 0, "required");
21828 
21829     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
21830     int src_vlen_enc = vector_length_encoding(this, $src);
21831     int dst_vlen_enc = vector_length_encoding(this);
21832     switch (dst_elem_bt) {
21833       case T_BYTE:
21834         if (!VM_Version::supports_avx512vl()) {
21835           src_vlen_enc = Assembler::AVX_512bit;
21836         }
21837         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21838         break;
21839       case T_SHORT:
21840         if (!VM_Version::supports_avx512vl()) {
21841           src_vlen_enc = Assembler::AVX_512bit;
21842         }
21843         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21844         break;
21845       case T_FLOAT:
21846         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21847         break;
21848       case T_LONG:
21849         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21850         break;
21851       case T_DOUBLE:
21852         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21853         break;
21854       default:
21855         ShouldNotReachHere();
21856     }
21857   %}
21858   ins_pipe( pipe_slow );
21859 %}
21860 
21861 instruct vcastLtoBS(vec dst, vec src) %{
21862   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
21863             UseAVX <= 2);
21864   match(Set dst (VectorCastL2X src));
21865   format %{ "vector_cast_l2x  $dst,$src" %}
21866   ins_encode %{
21867     assert(UseAVX > 0, "required");
21868 
21869     int vlen = Matcher::vector_length_in_bytes(this, $src);
21870     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
21871     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
21872                                                       : ExternalAddress(vector_int_to_short_mask());
21873     if (vlen <= 16) {
21874       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
21875       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21876       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21877     } else {
21878       assert(vlen <= 32, "required");
21879       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
21880       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
21881       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21882       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21883     }
21884     if (to_elem_bt == T_BYTE) {
21885       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21886     }
21887   %}
21888   ins_pipe( pipe_slow );
21889 %}
21890 
21891 instruct vcastLtoX_evex(vec dst, vec src) %{
21892   predicate(UseAVX > 2 ||
21893             (Matcher::vector_element_basic_type(n) == T_INT ||
21894              Matcher::vector_element_basic_type(n) == T_FLOAT ||
21895              Matcher::vector_element_basic_type(n) == T_DOUBLE));
21896   match(Set dst (VectorCastL2X src));
21897   format %{ "vector_cast_l2x  $dst,$src\t!" %}
21898   ins_encode %{
21899     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21900     int vlen = Matcher::vector_length_in_bytes(this, $src);
21901     int vlen_enc = vector_length_encoding(this, $src);
21902     switch (to_elem_bt) {
21903       case T_BYTE:
21904         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21905           vlen_enc = Assembler::AVX_512bit;
21906         }
21907         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21908         break;
21909       case T_SHORT:
21910         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21911           vlen_enc = Assembler::AVX_512bit;
21912         }
21913         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21914         break;
21915       case T_INT:
21916         if (vlen == 8) {
21917           if ($dst$$XMMRegister != $src$$XMMRegister) {
21918             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21919           }
21920         } else if (vlen == 16) {
21921           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
21922         } else if (vlen == 32) {
21923           if (UseAVX > 2) {
21924             if (!VM_Version::supports_avx512vl()) {
21925               vlen_enc = Assembler::AVX_512bit;
21926             }
21927             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21928           } else {
21929             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
21930             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
21931           }
21932         } else { // vlen == 64
21933           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21934         }
21935         break;
21936       case T_FLOAT:
21937         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21938         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21939         break;
21940       case T_DOUBLE:
21941         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21942         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21943         break;
21944 
21945       default: assert(false, "%s", type2name(to_elem_bt));
21946     }
21947   %}
21948   ins_pipe( pipe_slow );
21949 %}
21950 
21951 instruct vcastFtoD_reg(vec dst, vec src) %{
21952   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
21953   match(Set dst (VectorCastF2X src));
21954   format %{ "vector_cast_f2d  $dst,$src\t!" %}
21955   ins_encode %{
21956     int vlen_enc = vector_length_encoding(this);
21957     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21958   %}
21959   ins_pipe( pipe_slow );
21960 %}
21961 
21962 
21963 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
21964   predicate(!VM_Version::supports_avx10_2() &&
21965             !VM_Version::supports_avx512vl() &&
21966             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
21967             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
21968             is_integral_type(Matcher::vector_element_basic_type(n)));
21969   match(Set dst (VectorCastF2X src));
21970   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
21971   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
21972   ins_encode %{
21973     int vlen_enc = vector_length_encoding(this, $src);
21974     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21975     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
21976     // 32 bit addresses for register indirect addressing mode since stub constants
21977     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
21978     // However, targets are free to increase this limit, but having a large code cache size
21979     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
21980     // cap we save a temporary register allocation which in limiting case can prevent
21981     // spilling in high register pressure blocks.
21982     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21983                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
21984                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
21985   %}
21986   ins_pipe( pipe_slow );
21987 %}
21988 
21989 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
21990   predicate(!VM_Version::supports_avx10_2() &&
21991             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
21992             is_integral_type(Matcher::vector_element_basic_type(n)));
21993   match(Set dst (VectorCastF2X src));
21994   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
21995   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
21996   ins_encode %{
21997     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21998     if (to_elem_bt == T_LONG) {
21999       int vlen_enc = vector_length_encoding(this);
22000       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22001                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22002                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22003     } else {
22004       int vlen_enc = vector_length_encoding(this, $src);
22005       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22006                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22007                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22008     }
22009   %}
22010   ins_pipe( pipe_slow );
22011 %}
22012 
22013 instruct castFtoX_reg_avx10(vec dst, vec src) %{
22014   predicate(VM_Version::supports_avx10_2() &&
22015             is_integral_type(Matcher::vector_element_basic_type(n)));
22016   match(Set dst (VectorCastF2X src));
22017   format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
22018   ins_encode %{
22019     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22020     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22021     __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22022   %}
22023   ins_pipe( pipe_slow );
22024 %}
22025 
22026 instruct castFtoX_mem_avx10(vec dst, memory src) %{
22027   predicate(VM_Version::supports_avx10_2() &&
22028             is_integral_type(Matcher::vector_element_basic_type(n)));
22029   match(Set dst (VectorCastF2X (LoadVector src)));
22030   format %{ "vector_cast_f2x_avx10 $dst, $src\t!" %}
22031   ins_encode %{
22032     int vlen = Matcher::vector_length(this);
22033     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22034     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22035     __ vector_castF2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22036   %}
22037   ins_pipe( pipe_slow );
22038 %}
22039 
22040 instruct vcastDtoF_reg(vec dst, vec src) %{
22041   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22042   match(Set dst (VectorCastD2X src));
22043   format %{ "vector_cast_d2x  $dst,$src\t!" %}
22044   ins_encode %{
22045     int vlen_enc = vector_length_encoding(this, $src);
22046     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22047   %}
22048   ins_pipe( pipe_slow );
22049 %}
22050 
22051 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22052   predicate(!VM_Version::supports_avx10_2() &&
22053             !VM_Version::supports_avx512vl() &&
22054             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22055             is_integral_type(Matcher::vector_element_basic_type(n)));
22056   match(Set dst (VectorCastD2X src));
22057   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22058   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22059   ins_encode %{
22060     int vlen_enc = vector_length_encoding(this, $src);
22061     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22062     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22063                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22064                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22065   %}
22066   ins_pipe( pipe_slow );
22067 %}
22068 
22069 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22070   predicate(!VM_Version::supports_avx10_2() &&
22071             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22072             is_integral_type(Matcher::vector_element_basic_type(n)));
22073   match(Set dst (VectorCastD2X src));
22074   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22075   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22076   ins_encode %{
22077     int vlen_enc = vector_length_encoding(this, $src);
22078     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22079     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22080                               ExternalAddress(vector_float_signflip());
22081     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22082                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22083   %}
22084   ins_pipe( pipe_slow );
22085 %}
22086 
22087 instruct castDtoX_reg_avx10(vec dst, vec src) %{
22088   predicate(VM_Version::supports_avx10_2() &&
22089             is_integral_type(Matcher::vector_element_basic_type(n)));
22090   match(Set dst (VectorCastD2X src));
22091   format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
22092   ins_encode %{
22093     int vlen_enc = vector_length_encoding(this, $src);
22094     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22095     __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22096   %}
22097   ins_pipe( pipe_slow );
22098 %}
22099 
22100 instruct castDtoX_mem_avx10(vec dst, memory src) %{
22101   predicate(VM_Version::supports_avx10_2() &&
22102             is_integral_type(Matcher::vector_element_basic_type(n)));
22103   match(Set dst (VectorCastD2X (LoadVector src)));
22104   format %{ "vector_cast_d2x_avx10 $dst, $src\t!" %}
22105   ins_encode %{
22106     int vlen = Matcher::vector_length(this);
22107     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22108     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22109     __ vector_castD2X_avx10(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22110   %}
22111   ins_pipe( pipe_slow );
22112 %}
22113 
22114 instruct vucast(vec dst, vec src) %{
22115   match(Set dst (VectorUCastB2X src));
22116   match(Set dst (VectorUCastS2X src));
22117   match(Set dst (VectorUCastI2X src));
22118   format %{ "vector_ucast $dst,$src\t!" %}
22119   ins_encode %{
22120     assert(UseAVX > 0, "required");
22121 
22122     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22123     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22124     int vlen_enc = vector_length_encoding(this);
22125     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22126   %}
22127   ins_pipe( pipe_slow );
22128 %}
22129 
22130 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22131   predicate(!VM_Version::supports_avx512vl() &&
22132             Matcher::vector_length_in_bytes(n) < 64 &&
22133             Matcher::vector_element_basic_type(n) == T_INT);
22134   match(Set dst (RoundVF src));
22135   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22136   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22137   ins_encode %{
22138     int vlen_enc = vector_length_encoding(this);
22139     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22140     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22141                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22142                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22143   %}
22144   ins_pipe( pipe_slow );
22145 %}
22146 
22147 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22148   predicate((VM_Version::supports_avx512vl() ||
22149              Matcher::vector_length_in_bytes(n) == 64) &&
22150              Matcher::vector_element_basic_type(n) == T_INT);
22151   match(Set dst (RoundVF src));
22152   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22153   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22154   ins_encode %{
22155     int vlen_enc = vector_length_encoding(this);
22156     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22157     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22158                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22159                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22160   %}
22161   ins_pipe( pipe_slow );
22162 %}
22163 
22164 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22165   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22166   match(Set dst (RoundVD src));
22167   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
22168   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22169   ins_encode %{
22170     int vlen_enc = vector_length_encoding(this);
22171     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22172     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22173                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22174                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22175   %}
22176   ins_pipe( pipe_slow );
22177 %}
22178 
22179 // --------------------------------- VectorMaskCmp --------------------------------------
22180 
22181 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22182   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22183             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
22184             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22185             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22186   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22187   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22188   ins_encode %{
22189     int vlen_enc = vector_length_encoding(this, $src1);
22190     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22191     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22192       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22193     } else {
22194       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22195     }
22196   %}
22197   ins_pipe( pipe_slow );
22198 %}
22199 
22200 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22201   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22202             n->bottom_type()->isa_vectmask() == nullptr &&
22203             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22204   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22205   effect(TEMP ktmp);
22206   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22207   ins_encode %{
22208     int vlen_enc = Assembler::AVX_512bit;
22209     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22210     KRegister mask = k0; // The comparison itself is not being masked.
22211     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22212       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22213       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22214     } else {
22215       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22216       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22217     }
22218   %}
22219   ins_pipe( pipe_slow );
22220 %}
22221 
22222 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22223   predicate(n->bottom_type()->isa_vectmask() &&
22224             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22225   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22226   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22227   ins_encode %{
22228     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22229     int vlen_enc = vector_length_encoding(this, $src1);
22230     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22231     KRegister mask = k0; // The comparison itself is not being masked.
22232     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22233       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22234     } else {
22235       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22236     }
22237   %}
22238   ins_pipe( pipe_slow );
22239 %}
22240 
22241 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22242   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22243             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22244             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22245             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22246             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22247             (n->in(2)->get_int() == BoolTest::eq ||
22248              n->in(2)->get_int() == BoolTest::lt ||
22249              n->in(2)->get_int() == BoolTest::gt)); // cond
22250   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22251   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22252   ins_encode %{
22253     int vlen_enc = vector_length_encoding(this, $src1);
22254     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22255     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22256     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22257   %}
22258   ins_pipe( pipe_slow );
22259 %}
22260 
22261 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22262   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22263             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22264             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22265             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22266             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22267             (n->in(2)->get_int() == BoolTest::ne ||
22268              n->in(2)->get_int() == BoolTest::le ||
22269              n->in(2)->get_int() == BoolTest::ge)); // cond
22270   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22271   effect(TEMP dst, TEMP xtmp);
22272   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22273   ins_encode %{
22274     int vlen_enc = vector_length_encoding(this, $src1);
22275     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22276     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22277     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22278   %}
22279   ins_pipe( pipe_slow );
22280 %}
22281 
22282 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22283   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22284             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22285             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22286             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22287             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22288   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22289   effect(TEMP dst, TEMP xtmp);
22290   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22291   ins_encode %{
22292     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22293     int vlen_enc = vector_length_encoding(this, $src1);
22294     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22295     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22296 
22297     if (vlen_enc == Assembler::AVX_128bit) {
22298       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22299     } else {
22300       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22301     }
22302     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22303     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22304     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22305   %}
22306   ins_pipe( pipe_slow );
22307 %}
22308 
22309 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22310   predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22311              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22312              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22313   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22314   effect(TEMP ktmp);
22315   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22316   ins_encode %{
22317     assert(UseAVX > 2, "required");
22318 
22319     int vlen_enc = vector_length_encoding(this, $src1);
22320     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22321     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22322     KRegister mask = k0; // The comparison itself is not being masked.
22323     bool merge = false;
22324     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22325 
22326     switch (src1_elem_bt) {
22327       case T_INT: {
22328         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22329         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22330         break;
22331       }
22332       case T_LONG: {
22333         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22334         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22335         break;
22336       }
22337       default: assert(false, "%s", type2name(src1_elem_bt));
22338     }
22339   %}
22340   ins_pipe( pipe_slow );
22341 %}
22342 
22343 
22344 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22345   predicate(n->bottom_type()->isa_vectmask() &&
22346             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22347   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22348   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22349   ins_encode %{
22350     assert(UseAVX > 2, "required");
22351     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22352 
22353     int vlen_enc = vector_length_encoding(this, $src1);
22354     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22355     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22356     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22357 
22358     // Comparison i
22359     switch (src1_elem_bt) {
22360       case T_BYTE: {
22361         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22362         break;
22363       }
22364       case T_SHORT: {
22365         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22366         break;
22367       }
22368       case T_INT: {
22369         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22370         break;
22371       }
22372       case T_LONG: {
22373         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22374         break;
22375       }
22376       default: assert(false, "%s", type2name(src1_elem_bt));
22377     }
22378   %}
22379   ins_pipe( pipe_slow );
22380 %}
22381 
22382 // Extract
22383 
22384 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22385   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22386   match(Set dst (ExtractI src idx));
22387   match(Set dst (ExtractS src idx));
22388   match(Set dst (ExtractB src idx));
22389   format %{ "extractI $dst,$src,$idx\t!" %}
22390   ins_encode %{
22391     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22392 
22393     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22394     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22395   %}
22396   ins_pipe( pipe_slow );
22397 %}
22398 
22399 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22400   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22401             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
22402   match(Set dst (ExtractI src idx));
22403   match(Set dst (ExtractS src idx));
22404   match(Set dst (ExtractB src idx));
22405   effect(TEMP vtmp);
22406   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22407   ins_encode %{
22408     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22409 
22410     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22411     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22412     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22413   %}
22414   ins_pipe( pipe_slow );
22415 %}
22416 
22417 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22418   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22419   match(Set dst (ExtractL src idx));
22420   format %{ "extractL $dst,$src,$idx\t!" %}
22421   ins_encode %{
22422     assert(UseSSE >= 4, "required");
22423     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22424 
22425     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22426   %}
22427   ins_pipe( pipe_slow );
22428 %}
22429 
22430 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22431   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22432             Matcher::vector_length(n->in(1)) == 8);  // src
22433   match(Set dst (ExtractL src idx));
22434   effect(TEMP vtmp);
22435   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22436   ins_encode %{
22437     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22438 
22439     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22440     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22441   %}
22442   ins_pipe( pipe_slow );
22443 %}
22444 
22445 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22446   predicate(Matcher::vector_length(n->in(1)) <= 4);
22447   match(Set dst (ExtractF src idx));
22448   effect(TEMP dst, TEMP vtmp);
22449   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22450   ins_encode %{
22451     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22452 
22453     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22454   %}
22455   ins_pipe( pipe_slow );
22456 %}
22457 
22458 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22459   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22460             Matcher::vector_length(n->in(1)/*src*/) == 16);
22461   match(Set dst (ExtractF src idx));
22462   effect(TEMP vtmp);
22463   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22464   ins_encode %{
22465     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22466 
22467     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22468     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22469   %}
22470   ins_pipe( pipe_slow );
22471 %}
22472 
22473 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22474   predicate(Matcher::vector_length(n->in(1)) == 2); // src
22475   match(Set dst (ExtractD src idx));
22476   format %{ "extractD $dst,$src,$idx\t!" %}
22477   ins_encode %{
22478     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22479 
22480     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22481   %}
22482   ins_pipe( pipe_slow );
22483 %}
22484 
22485 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22486   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22487             Matcher::vector_length(n->in(1)) == 8);  // src
22488   match(Set dst (ExtractD src idx));
22489   effect(TEMP vtmp);
22490   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22491   ins_encode %{
22492     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22493 
22494     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22495     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22496   %}
22497   ins_pipe( pipe_slow );
22498 %}
22499 
22500 // --------------------------------- Vector Blend --------------------------------------
22501 
22502 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22503   predicate(UseAVX == 0);
22504   match(Set dst (VectorBlend (Binary dst src) mask));
22505   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
22506   effect(TEMP tmp);
22507   ins_encode %{
22508     assert(UseSSE >= 4, "required");
22509 
22510     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22511       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22512     }
22513     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22514   %}
22515   ins_pipe( pipe_slow );
22516 %}
22517 
22518 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22519   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22520             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22521             Matcher::vector_length_in_bytes(n) <= 32 &&
22522             is_integral_type(Matcher::vector_element_basic_type(n)));
22523   match(Set dst (VectorBlend (Binary src1 src2) mask));
22524   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22525   ins_encode %{
22526     int vlen_enc = vector_length_encoding(this);
22527     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22528   %}
22529   ins_pipe( pipe_slow );
22530 %}
22531 
22532 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22533   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22534             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22535             Matcher::vector_length_in_bytes(n) <= 32 &&
22536             !is_integral_type(Matcher::vector_element_basic_type(n)));
22537   match(Set dst (VectorBlend (Binary src1 src2) mask));
22538   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22539   ins_encode %{
22540     int vlen_enc = vector_length_encoding(this);
22541     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22542   %}
22543   ins_pipe( pipe_slow );
22544 %}
22545 
22546 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22547   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22548             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22549             Matcher::vector_length_in_bytes(n) <= 32);
22550   match(Set dst (VectorBlend (Binary src1 src2) mask));
22551   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22552   effect(TEMP vtmp, TEMP dst);
22553   ins_encode %{
22554     int vlen_enc = vector_length_encoding(this);
22555     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22556     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22557     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
22558   %}
22559   ins_pipe( pipe_slow );
22560 %}
22561 
22562 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22563   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22564             n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22565   match(Set dst (VectorBlend (Binary src1 src2) mask));
22566   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22567   effect(TEMP ktmp);
22568   ins_encode %{
22569      int vlen_enc = Assembler::AVX_512bit;
22570      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22571     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22572     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22573   %}
22574   ins_pipe( pipe_slow );
22575 %}
22576 
22577 
22578 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22579   predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22580             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22581              VM_Version::supports_avx512bw()));
22582   match(Set dst (VectorBlend (Binary src1 src2) mask));
22583   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22584   ins_encode %{
22585     int vlen_enc = vector_length_encoding(this);
22586     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22587     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22588   %}
22589   ins_pipe( pipe_slow );
22590 %}
22591 
22592 // --------------------------------- ABS --------------------------------------
22593 // a = |a|
22594 instruct vabsB_reg(vec dst, vec src) %{
22595   match(Set dst (AbsVB  src));
22596   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22597   ins_encode %{
22598     uint vlen = Matcher::vector_length(this);
22599     if (vlen <= 16) {
22600       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22601     } else {
22602       int vlen_enc = vector_length_encoding(this);
22603       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22604     }
22605   %}
22606   ins_pipe( pipe_slow );
22607 %}
22608 
22609 instruct vabsS_reg(vec dst, vec src) %{
22610   match(Set dst (AbsVS  src));
22611   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22612   ins_encode %{
22613     uint vlen = Matcher::vector_length(this);
22614     if (vlen <= 8) {
22615       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22616     } else {
22617       int vlen_enc = vector_length_encoding(this);
22618       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22619     }
22620   %}
22621   ins_pipe( pipe_slow );
22622 %}
22623 
22624 instruct vabsI_reg(vec dst, vec src) %{
22625   match(Set dst (AbsVI  src));
22626   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22627   ins_encode %{
22628     uint vlen = Matcher::vector_length(this);
22629     if (vlen <= 4) {
22630       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22631     } else {
22632       int vlen_enc = vector_length_encoding(this);
22633       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22634     }
22635   %}
22636   ins_pipe( pipe_slow );
22637 %}
22638 
22639 instruct vabsL_reg(vec dst, vec src) %{
22640   match(Set dst (AbsVL  src));
22641   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22642   ins_encode %{
22643     assert(UseAVX > 2, "required");
22644     int vlen_enc = vector_length_encoding(this);
22645     if (!VM_Version::supports_avx512vl()) {
22646       vlen_enc = Assembler::AVX_512bit;
22647     }
22648     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22649   %}
22650   ins_pipe( pipe_slow );
22651 %}
22652 
22653 // --------------------------------- ABSNEG --------------------------------------
22654 
22655 instruct vabsnegF(vec dst, vec src) %{
22656   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22657   match(Set dst (AbsVF src));
22658   match(Set dst (NegVF src));
22659   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22660   ins_cost(150);
22661   ins_encode %{
22662     int opcode = this->ideal_Opcode();
22663     int vlen = Matcher::vector_length(this);
22664     if (vlen == 2) {
22665       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22666     } else {
22667       assert(vlen == 8 || vlen == 16, "required");
22668       int vlen_enc = vector_length_encoding(this);
22669       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22670     }
22671   %}
22672   ins_pipe( pipe_slow );
22673 %}
22674 
22675 instruct vabsneg4F(vec dst) %{
22676   predicate(Matcher::vector_length(n) == 4);
22677   match(Set dst (AbsVF dst));
22678   match(Set dst (NegVF dst));
22679   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22680   ins_cost(150);
22681   ins_encode %{
22682     int opcode = this->ideal_Opcode();
22683     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22684   %}
22685   ins_pipe( pipe_slow );
22686 %}
22687 
22688 instruct vabsnegD(vec dst, vec src) %{
22689   match(Set dst (AbsVD  src));
22690   match(Set dst (NegVD  src));
22691   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22692   ins_encode %{
22693     int opcode = this->ideal_Opcode();
22694     uint vlen = Matcher::vector_length(this);
22695     if (vlen == 2) {
22696       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22697     } else {
22698       int vlen_enc = vector_length_encoding(this);
22699       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22700     }
22701   %}
22702   ins_pipe( pipe_slow );
22703 %}
22704 
22705 //------------------------------------- VectorTest --------------------------------------------
22706 
22707 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22708   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22709   match(Set cr (VectorTest src1 src2));
22710   effect(TEMP vtmp);
22711   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
22712   ins_encode %{
22713     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22714     int vlen = Matcher::vector_length_in_bytes(this, $src1);
22715     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22716   %}
22717   ins_pipe( pipe_slow );
22718 %}
22719 
22720 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
22721   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
22722   match(Set cr (VectorTest src1 src2));
22723   format %{ "vptest_ge16  $src1, $src2\n\t" %}
22724   ins_encode %{
22725     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22726     int vlen = Matcher::vector_length_in_bytes(this, $src1);
22727     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
22728   %}
22729   ins_pipe( pipe_slow );
22730 %}
22731 
22732 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22733   predicate((Matcher::vector_length(n->in(1)) < 8 ||
22734              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22735             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
22736   match(Set cr (VectorTest src1 src2));
22737   effect(TEMP tmp);
22738   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
22739   ins_encode %{
22740     uint masklen = Matcher::vector_length(this, $src1);
22741     __ kmovwl($tmp$$Register, $src1$$KRegister);
22742     __ andl($tmp$$Register, (1 << masklen) - 1);
22743     __ cmpl($tmp$$Register, (1 << masklen) - 1);
22744   %}
22745   ins_pipe( pipe_slow );
22746 %}
22747 
22748 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22749   predicate((Matcher::vector_length(n->in(1)) < 8 ||
22750              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22751             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
22752   match(Set cr (VectorTest src1 src2));
22753   effect(TEMP tmp);
22754   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
22755   ins_encode %{
22756     uint masklen = Matcher::vector_length(this, $src1);
22757     __ kmovwl($tmp$$Register, $src1$$KRegister);
22758     __ andl($tmp$$Register, (1 << masklen) - 1);
22759   %}
22760   ins_pipe( pipe_slow );
22761 %}
22762 
22763 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
22764   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
22765             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
22766   match(Set cr (VectorTest src1 src2));
22767   format %{ "ktest_ge8  $src1, $src2\n\t" %}
22768   ins_encode %{
22769     uint masklen = Matcher::vector_length(this, $src1);
22770     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
22771   %}
22772   ins_pipe( pipe_slow );
22773 %}
22774 
22775 //------------------------------------- LoadMask --------------------------------------------
22776 
22777 instruct loadMask(legVec dst, legVec src) %{
22778   predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
22779   match(Set dst (VectorLoadMask src));
22780   effect(TEMP dst);
22781   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
22782   ins_encode %{
22783     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22784     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22785     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
22786   %}
22787   ins_pipe( pipe_slow );
22788 %}
22789 
22790 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
22791   predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
22792   match(Set dst (VectorLoadMask src));
22793   effect(TEMP xtmp);
22794   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
22795   ins_encode %{
22796     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22797                         true, Assembler::AVX_512bit);
22798   %}
22799   ins_pipe( pipe_slow );
22800 %}
22801 
22802 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
22803   predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
22804   match(Set dst (VectorLoadMask src));
22805   effect(TEMP xtmp);
22806   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
22807   ins_encode %{
22808     int vlen_enc = vector_length_encoding(in(1));
22809     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22810                         false, vlen_enc);
22811   %}
22812   ins_pipe( pipe_slow );
22813 %}
22814 
22815 //------------------------------------- StoreMask --------------------------------------------
22816 
22817 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
22818   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22819   match(Set dst (VectorStoreMask src size));
22820   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22821   ins_encode %{
22822     int vlen = Matcher::vector_length(this);
22823     if (vlen <= 16 && UseAVX <= 2) {
22824       assert(UseSSE >= 3, "required");
22825       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22826     } else {
22827       assert(UseAVX > 0, "required");
22828       int src_vlen_enc = vector_length_encoding(this, $src);
22829       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22830     }
22831   %}
22832   ins_pipe( pipe_slow );
22833 %}
22834 
22835 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
22836   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22837   match(Set dst (VectorStoreMask src size));
22838   effect(TEMP_DEF dst, TEMP xtmp);
22839   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22840   ins_encode %{
22841     int vlen_enc = Assembler::AVX_128bit;
22842     int vlen = Matcher::vector_length(this);
22843     if (vlen <= 8) {
22844       assert(UseSSE >= 3, "required");
22845       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22846       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22847       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22848     } else {
22849       assert(UseAVX > 0, "required");
22850       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22851       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22852       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22853     }
22854   %}
22855   ins_pipe( pipe_slow );
22856 %}
22857 
22858 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
22859   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22860   match(Set dst (VectorStoreMask src size));
22861   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22862   effect(TEMP_DEF dst, TEMP xtmp);
22863   ins_encode %{
22864     int vlen_enc = Assembler::AVX_128bit;
22865     int vlen = Matcher::vector_length(this);
22866     if (vlen <= 4) {
22867       assert(UseSSE >= 3, "required");
22868       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22869       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22870       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22871       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22872     } else {
22873       assert(UseAVX > 0, "required");
22874       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22875       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22876       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22877       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22878       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22879     }
22880   %}
22881   ins_pipe( pipe_slow );
22882 %}
22883 
22884 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
22885   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
22886   match(Set dst (VectorStoreMask src size));
22887   effect(TEMP_DEF dst, TEMP xtmp);
22888   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22889   ins_encode %{
22890     assert(UseSSE >= 3, "required");
22891     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22892     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
22893     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
22894     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22895     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22896   %}
22897   ins_pipe( pipe_slow );
22898 %}
22899 
22900 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
22901   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
22902   match(Set dst (VectorStoreMask src size));
22903   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
22904   effect(TEMP_DEF dst, TEMP vtmp);
22905   ins_encode %{
22906     int vlen_enc = Assembler::AVX_128bit;
22907     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
22908     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22909     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
22910     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22911     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22912     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22913     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22914   %}
22915   ins_pipe( pipe_slow );
22916 %}
22917 
22918 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
22919   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22920   match(Set dst (VectorStoreMask src size));
22921   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22922   ins_encode %{
22923     int src_vlen_enc = vector_length_encoding(this, $src);
22924     int dst_vlen_enc = vector_length_encoding(this);
22925     if (!VM_Version::supports_avx512vl()) {
22926       src_vlen_enc = Assembler::AVX_512bit;
22927     }
22928     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22929     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22930   %}
22931   ins_pipe( pipe_slow );
22932 %}
22933 
22934 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
22935   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22936   match(Set dst (VectorStoreMask src size));
22937   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22938   ins_encode %{
22939     int src_vlen_enc = vector_length_encoding(this, $src);
22940     int dst_vlen_enc = vector_length_encoding(this);
22941     if (!VM_Version::supports_avx512vl()) {
22942       src_vlen_enc = Assembler::AVX_512bit;
22943     }
22944     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22945     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22946   %}
22947   ins_pipe( pipe_slow );
22948 %}
22949 
22950 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
22951   predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
22952   match(Set dst (VectorStoreMask mask size));
22953   effect(TEMP_DEF dst);
22954   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
22955   ins_encode %{
22956     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
22957     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
22958                  false, Assembler::AVX_512bit, noreg);
22959     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
22960   %}
22961   ins_pipe( pipe_slow );
22962 %}
22963 
22964 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
22965   predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
22966   match(Set dst (VectorStoreMask mask size));
22967   effect(TEMP_DEF dst);
22968   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
22969   ins_encode %{
22970     int dst_vlen_enc = vector_length_encoding(this);
22971     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
22972     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22973   %}
22974   ins_pipe( pipe_slow );
22975 %}
22976 
22977 instruct vmaskcast_evex(kReg dst) %{
22978   match(Set dst (VectorMaskCast dst));
22979   ins_cost(0);
22980   format %{ "vector_mask_cast $dst" %}
22981   ins_encode %{
22982     // empty
22983   %}
22984   ins_pipe(empty);
22985 %}
22986 
22987 instruct vmaskcast(vec dst) %{
22988   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
22989   match(Set dst (VectorMaskCast dst));
22990   ins_cost(0);
22991   format %{ "vector_mask_cast $dst" %}
22992   ins_encode %{
22993     // empty
22994   %}
22995   ins_pipe(empty);
22996 %}
22997 
22998 instruct vmaskcast_avx(vec dst, vec src) %{
22999   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23000   match(Set dst (VectorMaskCast src));
23001   format %{ "vector_mask_cast $dst, $src" %}
23002   ins_encode %{
23003     int vlen = Matcher::vector_length(this);
23004     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23005     BasicType dst_bt = Matcher::vector_element_basic_type(this);
23006     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23007   %}
23008   ins_pipe(pipe_slow);
23009 %}
23010 
23011 //-------------------------------- Load Iota Indices ----------------------------------
23012 
23013 instruct loadIotaIndices(vec dst, immI_0 src) %{
23014   match(Set dst (VectorLoadConst src));
23015   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23016   ins_encode %{
23017      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23018      BasicType bt = Matcher::vector_element_basic_type(this);
23019      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23020   %}
23021   ins_pipe( pipe_slow );
23022 %}
23023 
23024 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23025   match(Set dst (PopulateIndex src1 src2));
23026   effect(TEMP dst, TEMP vtmp);
23027   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23028   ins_encode %{
23029      assert($src2$$constant == 1, "required");
23030      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23031      int vlen_enc = vector_length_encoding(this);
23032      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23033      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23034      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23035      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23036   %}
23037   ins_pipe( pipe_slow );
23038 %}
23039 
23040 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23041   match(Set dst (PopulateIndex src1 src2));
23042   effect(TEMP dst, TEMP vtmp);
23043   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23044   ins_encode %{
23045      assert($src2$$constant == 1, "required");
23046      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23047      int vlen_enc = vector_length_encoding(this);
23048      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23049      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23050      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23051      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23052   %}
23053   ins_pipe( pipe_slow );
23054 %}
23055 
23056 //-------------------------------- Rearrange ----------------------------------
23057 
23058 // LoadShuffle/Rearrange for Byte
23059 instruct rearrangeB(vec dst, vec shuffle) %{
23060   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23061             Matcher::vector_length(n) < 32);
23062   match(Set dst (VectorRearrange dst shuffle));
23063   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23064   ins_encode %{
23065     assert(UseSSE >= 4, "required");
23066     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23067   %}
23068   ins_pipe( pipe_slow );
23069 %}
23070 
23071 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23072   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23073             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23074   match(Set dst (VectorRearrange src shuffle));
23075   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23076   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23077   ins_encode %{
23078     assert(UseAVX >= 2, "required");
23079     // Swap src into vtmp1
23080     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23081     // Shuffle swapped src to get entries from other 128 bit lane
23082     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23083     // Shuffle original src to get entries from self 128 bit lane
23084     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23085     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23086     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23087     // Perform the blend
23088     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23089   %}
23090   ins_pipe( pipe_slow );
23091 %}
23092 
23093 
23094 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23095   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23096             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23097   match(Set dst (VectorRearrange src shuffle));
23098   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23099   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23100   ins_encode %{
23101     int vlen_enc = vector_length_encoding(this);
23102     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23103                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23104                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23105   %}
23106   ins_pipe( pipe_slow );
23107 %}
23108 
23109 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23110   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23111             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23112   match(Set dst (VectorRearrange src shuffle));
23113   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23114   ins_encode %{
23115     int vlen_enc = vector_length_encoding(this);
23116     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23117   %}
23118   ins_pipe( pipe_slow );
23119 %}
23120 
23121 // LoadShuffle/Rearrange for Short
23122 
23123 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23124   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23125             !VM_Version::supports_avx512bw());
23126   match(Set dst (VectorLoadShuffle src));
23127   effect(TEMP dst, TEMP vtmp);
23128   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23129   ins_encode %{
23130     // Create a byte shuffle mask from short shuffle mask
23131     // only byte shuffle instruction available on these platforms
23132     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23133     if (UseAVX == 0) {
23134       assert(vlen_in_bytes <= 16, "required");
23135       // Multiply each shuffle by two to get byte index
23136       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23137       __ psllw($vtmp$$XMMRegister, 1);
23138 
23139       // Duplicate to create 2 copies of byte index
23140       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23141       __ psllw($dst$$XMMRegister, 8);
23142       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23143 
23144       // Add one to get alternate byte index
23145       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23146       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23147     } else {
23148       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23149       int vlen_enc = vector_length_encoding(this);
23150       // Multiply each shuffle by two to get byte index
23151       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23152 
23153       // Duplicate to create 2 copies of byte index
23154       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
23155       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23156 
23157       // Add one to get alternate byte index
23158       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23159     }
23160   %}
23161   ins_pipe( pipe_slow );
23162 %}
23163 
23164 instruct rearrangeS(vec dst, vec shuffle) %{
23165   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23166             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23167   match(Set dst (VectorRearrange dst shuffle));
23168   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23169   ins_encode %{
23170     assert(UseSSE >= 4, "required");
23171     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23172   %}
23173   ins_pipe( pipe_slow );
23174 %}
23175 
23176 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23177   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23178             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23179   match(Set dst (VectorRearrange src shuffle));
23180   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23181   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23182   ins_encode %{
23183     assert(UseAVX >= 2, "required");
23184     // Swap src into vtmp1
23185     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23186     // Shuffle swapped src to get entries from other 128 bit lane
23187     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23188     // Shuffle original src to get entries from self 128 bit lane
23189     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23190     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23191     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23192     // Perform the blend
23193     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23194   %}
23195   ins_pipe( pipe_slow );
23196 %}
23197 
23198 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23199   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23200             VM_Version::supports_avx512bw());
23201   match(Set dst (VectorRearrange src shuffle));
23202   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23203   ins_encode %{
23204     int vlen_enc = vector_length_encoding(this);
23205     if (!VM_Version::supports_avx512vl()) {
23206       vlen_enc = Assembler::AVX_512bit;
23207     }
23208     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23209   %}
23210   ins_pipe( pipe_slow );
23211 %}
23212 
23213 // LoadShuffle/Rearrange for Integer and Float
23214 
23215 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23216   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23217             Matcher::vector_length(n) == 4 && UseAVX == 0);
23218   match(Set dst (VectorLoadShuffle src));
23219   effect(TEMP dst, TEMP vtmp);
23220   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23221   ins_encode %{
23222     assert(UseSSE >= 4, "required");
23223 
23224     // Create a byte shuffle mask from int shuffle mask
23225     // only byte shuffle instruction available on these platforms
23226 
23227     // Duplicate and multiply each shuffle by 4
23228     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23229     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23230     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23231     __ psllw($vtmp$$XMMRegister, 2);
23232 
23233     // Duplicate again to create 4 copies of byte index
23234     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23235     __ psllw($dst$$XMMRegister, 8);
23236     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23237 
23238     // Add 3,2,1,0 to get alternate byte index
23239     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23240     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23241   %}
23242   ins_pipe( pipe_slow );
23243 %}
23244 
23245 instruct rearrangeI(vec dst, vec shuffle) %{
23246   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23247             UseAVX == 0);
23248   match(Set dst (VectorRearrange dst shuffle));
23249   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23250   ins_encode %{
23251     assert(UseSSE >= 4, "required");
23252     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23253   %}
23254   ins_pipe( pipe_slow );
23255 %}
23256 
23257 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23258   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23259             UseAVX > 0);
23260   match(Set dst (VectorRearrange src shuffle));
23261   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23262   ins_encode %{
23263     int vlen_enc = vector_length_encoding(this);
23264     BasicType bt = Matcher::vector_element_basic_type(this);
23265     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23266   %}
23267   ins_pipe( pipe_slow );
23268 %}
23269 
23270 // LoadShuffle/Rearrange for Long and Double
23271 
23272 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23273   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23274             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23275   match(Set dst (VectorLoadShuffle src));
23276   effect(TEMP dst, TEMP vtmp);
23277   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23278   ins_encode %{
23279     assert(UseAVX >= 2, "required");
23280 
23281     int vlen_enc = vector_length_encoding(this);
23282     // Create a double word shuffle mask from long shuffle mask
23283     // only double word shuffle instruction available on these platforms
23284 
23285     // Multiply each shuffle by two to get double word index
23286     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23287 
23288     // Duplicate each double word shuffle
23289     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23290     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23291 
23292     // Add one to get alternate double word index
23293     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23294   %}
23295   ins_pipe( pipe_slow );
23296 %}
23297 
23298 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23299   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23300             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23301   match(Set dst (VectorRearrange src shuffle));
23302   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23303   ins_encode %{
23304     assert(UseAVX >= 2, "required");
23305 
23306     int vlen_enc = vector_length_encoding(this);
23307     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23308   %}
23309   ins_pipe( pipe_slow );
23310 %}
23311 
23312 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23313   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23314             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23315   match(Set dst (VectorRearrange src shuffle));
23316   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23317   ins_encode %{
23318     assert(UseAVX > 2, "required");
23319 
23320     int vlen_enc = vector_length_encoding(this);
23321     if (vlen_enc == Assembler::AVX_128bit) {
23322       vlen_enc = Assembler::AVX_256bit;
23323     }
23324     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23325   %}
23326   ins_pipe( pipe_slow );
23327 %}
23328 
23329 // --------------------------------- FMA --------------------------------------
23330 // a * b + c
23331 
23332 instruct vfmaF_reg(vec a, vec b, vec c) %{
23333   match(Set c (FmaVF  c (Binary a b)));
23334   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23335   ins_cost(150);
23336   ins_encode %{
23337     assert(UseFMA, "not enabled");
23338     int vlen_enc = vector_length_encoding(this);
23339     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23340   %}
23341   ins_pipe( pipe_slow );
23342 %}
23343 
23344 instruct vfmaF_mem(vec a, memory b, vec c) %{
23345   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23346   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23347   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23348   ins_cost(150);
23349   ins_encode %{
23350     assert(UseFMA, "not enabled");
23351     int vlen_enc = vector_length_encoding(this);
23352     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23353   %}
23354   ins_pipe( pipe_slow );
23355 %}
23356 
23357 instruct vfmaD_reg(vec a, vec b, vec c) %{
23358   match(Set c (FmaVD  c (Binary a b)));
23359   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23360   ins_cost(150);
23361   ins_encode %{
23362     assert(UseFMA, "not enabled");
23363     int vlen_enc = vector_length_encoding(this);
23364     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23365   %}
23366   ins_pipe( pipe_slow );
23367 %}
23368 
23369 instruct vfmaD_mem(vec a, memory b, vec c) %{
23370   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23371   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23372   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23373   ins_cost(150);
23374   ins_encode %{
23375     assert(UseFMA, "not enabled");
23376     int vlen_enc = vector_length_encoding(this);
23377     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23378   %}
23379   ins_pipe( pipe_slow );
23380 %}
23381 
23382 // --------------------------------- Vector Multiply Add --------------------------------------
23383 
23384 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23385   predicate(UseAVX == 0);
23386   match(Set dst (MulAddVS2VI dst src1));
23387   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23388   ins_encode %{
23389     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23390   %}
23391   ins_pipe( pipe_slow );
23392 %}
23393 
23394 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23395   predicate(UseAVX > 0);
23396   match(Set dst (MulAddVS2VI src1 src2));
23397   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23398   ins_encode %{
23399     int vlen_enc = vector_length_encoding(this);
23400     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23401   %}
23402   ins_pipe( pipe_slow );
23403 %}
23404 
23405 // --------------------------------- Vector Multiply Add Add ----------------------------------
23406 
23407 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23408   predicate(VM_Version::supports_avx512_vnni());
23409   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23410   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23411   ins_encode %{
23412     assert(UseAVX > 2, "required");
23413     int vlen_enc = vector_length_encoding(this);
23414     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23415   %}
23416   ins_pipe( pipe_slow );
23417   ins_cost(10);
23418 %}
23419 
23420 // --------------------------------- PopCount --------------------------------------
23421 
23422 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23423   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23424   match(Set dst (PopCountVI src));
23425   match(Set dst (PopCountVL src));
23426   format %{ "vector_popcount_integral $dst, $src" %}
23427   ins_encode %{
23428     int opcode = this->ideal_Opcode();
23429     int vlen_enc = vector_length_encoding(this, $src);
23430     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23431     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23432   %}
23433   ins_pipe( pipe_slow );
23434 %}
23435 
23436 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23437   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23438   match(Set dst (PopCountVI src mask));
23439   match(Set dst (PopCountVL src mask));
23440   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23441   ins_encode %{
23442     int vlen_enc = vector_length_encoding(this, $src);
23443     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23444     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23445     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23446   %}
23447   ins_pipe( pipe_slow );
23448 %}
23449 
23450 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23451   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23452   match(Set dst (PopCountVI src));
23453   match(Set dst (PopCountVL src));
23454   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23455   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23456   ins_encode %{
23457     int opcode = this->ideal_Opcode();
23458     int vlen_enc = vector_length_encoding(this, $src);
23459     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23460     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23461                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23462   %}
23463   ins_pipe( pipe_slow );
23464 %}
23465 
23466 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23467 
23468 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23469   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23470                                               Matcher::vector_length_in_bytes(n->in(1))));
23471   match(Set dst (CountTrailingZerosV src));
23472   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23473   ins_cost(400);
23474   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23475   ins_encode %{
23476     int vlen_enc = vector_length_encoding(this, $src);
23477     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23478     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23479                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23480   %}
23481   ins_pipe( pipe_slow );
23482 %}
23483 
23484 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23485   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23486             VM_Version::supports_avx512cd() &&
23487             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23488   match(Set dst (CountTrailingZerosV src));
23489   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23490   ins_cost(400);
23491   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23492   ins_encode %{
23493     int vlen_enc = vector_length_encoding(this, $src);
23494     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23495     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23496                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23497   %}
23498   ins_pipe( pipe_slow );
23499 %}
23500 
23501 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23502   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23503   match(Set dst (CountTrailingZerosV src));
23504   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23505   ins_cost(400);
23506   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23507   ins_encode %{
23508     int vlen_enc = vector_length_encoding(this, $src);
23509     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23510     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23511                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23512                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23513   %}
23514   ins_pipe( pipe_slow );
23515 %}
23516 
23517 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23518   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23519   match(Set dst (CountTrailingZerosV src));
23520   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23521   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23522   ins_encode %{
23523     int vlen_enc = vector_length_encoding(this, $src);
23524     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23525     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23526                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23527   %}
23528   ins_pipe( pipe_slow );
23529 %}
23530 
23531 
23532 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23533 
23534 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23535   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23536   effect(TEMP dst);
23537   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23538   ins_encode %{
23539     int vector_len = vector_length_encoding(this);
23540     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23541   %}
23542   ins_pipe( pipe_slow );
23543 %}
23544 
23545 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23546   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23547   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23548   effect(TEMP dst);
23549   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23550   ins_encode %{
23551     int vector_len = vector_length_encoding(this);
23552     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23553   %}
23554   ins_pipe( pipe_slow );
23555 %}
23556 
23557 // --------------------------------- Rotation Operations ----------------------------------
23558 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23559   match(Set dst (RotateLeftV src shift));
23560   match(Set dst (RotateRightV src shift));
23561   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23562   ins_encode %{
23563     int opcode      = this->ideal_Opcode();
23564     int vector_len  = vector_length_encoding(this);
23565     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23566     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23567   %}
23568   ins_pipe( pipe_slow );
23569 %}
23570 
23571 instruct vprorate(vec dst, vec src, vec shift) %{
23572   match(Set dst (RotateLeftV src shift));
23573   match(Set dst (RotateRightV src shift));
23574   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23575   ins_encode %{
23576     int opcode      = this->ideal_Opcode();
23577     int vector_len  = vector_length_encoding(this);
23578     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23579     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23580   %}
23581   ins_pipe( pipe_slow );
23582 %}
23583 
23584 // ---------------------------------- Masked Operations ------------------------------------
23585 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23586   predicate(!n->in(3)->bottom_type()->isa_vectmask());
23587   match(Set dst (LoadVectorMasked mem mask));
23588   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23589   ins_encode %{
23590     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23591     int vlen_enc = vector_length_encoding(this);
23592     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23593   %}
23594   ins_pipe( pipe_slow );
23595 %}
23596 
23597 
23598 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23599   predicate(n->in(3)->bottom_type()->isa_vectmask());
23600   match(Set dst (LoadVectorMasked mem mask));
23601   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23602   ins_encode %{
23603     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
23604     int vector_len = vector_length_encoding(this);
23605     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23606   %}
23607   ins_pipe( pipe_slow );
23608 %}
23609 
23610 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23611   predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23612   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23613   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23614   ins_encode %{
23615     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23616     int vlen_enc = vector_length_encoding(src_node);
23617     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23618     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23619   %}
23620   ins_pipe( pipe_slow );
23621 %}
23622 
23623 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23624   predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23625   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23626   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23627   ins_encode %{
23628     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23629     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23630     int vlen_enc = vector_length_encoding(src_node);
23631     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23632   %}
23633   ins_pipe( pipe_slow );
23634 %}
23635 
23636 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23637   match(Set addr (VerifyVectorAlignment addr mask));
23638   effect(KILL cr);
23639   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23640   ins_encode %{
23641     Label Lskip;
23642     // check if masked bits of addr are zero
23643     __ testq($addr$$Register, $mask$$constant);
23644     __ jccb(Assembler::equal, Lskip);
23645     __ stop("verify_vector_alignment found a misaligned vector memory access");
23646     __ bind(Lskip);
23647   %}
23648   ins_pipe(pipe_slow);
23649 %}
23650 
23651 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23652   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23653   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23654   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23655   ins_encode %{
23656     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23657     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23658 
23659     Label DONE;
23660     int vlen_enc = vector_length_encoding(this, $src1);
23661     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23662 
23663     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23664     __ mov64($dst$$Register, -1L);
23665     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23666     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23667     __ jccb(Assembler::carrySet, DONE);
23668     __ kmovql($dst$$Register, $ktmp1$$KRegister);
23669     __ notq($dst$$Register);
23670     __ tzcntq($dst$$Register, $dst$$Register);
23671     __ bind(DONE);
23672   %}
23673   ins_pipe( pipe_slow );
23674 %}
23675 
23676 
23677 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23678   match(Set dst (VectorMaskGen len));
23679   effect(TEMP temp, KILL cr);
23680   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23681   ins_encode %{
23682     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23683   %}
23684   ins_pipe( pipe_slow );
23685 %}
23686 
23687 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23688   match(Set dst (VectorMaskGen len));
23689   format %{ "vector_mask_gen $len \t! vector mask generator" %}
23690   effect(TEMP temp);
23691   ins_encode %{
23692     __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
23693     __ kmovql($dst$$KRegister, $temp$$Register);
23694   %}
23695   ins_pipe( pipe_slow );
23696 %}
23697 
23698 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23699   predicate(n->in(1)->bottom_type()->isa_vectmask());
23700   match(Set dst (VectorMaskToLong mask));
23701   effect(TEMP dst, KILL cr);
23702   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23703   ins_encode %{
23704     int opcode = this->ideal_Opcode();
23705     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23706     int mask_len = Matcher::vector_length(this, $mask);
23707     int mask_size = mask_len * type2aelembytes(mbt);
23708     int vlen_enc = vector_length_encoding(this, $mask);
23709     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23710                              $dst$$Register, mask_len, mask_size, vlen_enc);
23711   %}
23712   ins_pipe( pipe_slow );
23713 %}
23714 
23715 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23716   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23717   match(Set dst (VectorMaskToLong mask));
23718   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23719   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23720   ins_encode %{
23721     int opcode = this->ideal_Opcode();
23722     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23723     int mask_len = Matcher::vector_length(this, $mask);
23724     int vlen_enc = vector_length_encoding(this, $mask);
23725     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23726                              $dst$$Register, mask_len, mbt, vlen_enc);
23727   %}
23728   ins_pipe( pipe_slow );
23729 %}
23730 
23731 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
23732   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23733   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
23734   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
23735   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23736   ins_encode %{
23737     int opcode = this->ideal_Opcode();
23738     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23739     int mask_len = Matcher::vector_length(this, $mask);
23740     int vlen_enc = vector_length_encoding(this, $mask);
23741     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23742                              $dst$$Register, mask_len, mbt, vlen_enc);
23743   %}
23744   ins_pipe( pipe_slow );
23745 %}
23746 
23747 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23748   predicate(n->in(1)->bottom_type()->isa_vectmask());
23749   match(Set dst (VectorMaskTrueCount mask));
23750   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23751   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
23752   ins_encode %{
23753     int opcode = this->ideal_Opcode();
23754     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23755     int mask_len = Matcher::vector_length(this, $mask);
23756     int mask_size = mask_len * type2aelembytes(mbt);
23757     int vlen_enc = vector_length_encoding(this, $mask);
23758     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23759                              $tmp$$Register, mask_len, mask_size, vlen_enc);
23760   %}
23761   ins_pipe( pipe_slow );
23762 %}
23763 
23764 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23765   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23766   match(Set dst (VectorMaskTrueCount mask));
23767   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23768   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23769   ins_encode %{
23770     int opcode = this->ideal_Opcode();
23771     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23772     int mask_len = Matcher::vector_length(this, $mask);
23773     int vlen_enc = vector_length_encoding(this, $mask);
23774     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23775                              $tmp$$Register, mask_len, mbt, vlen_enc);
23776   %}
23777   ins_pipe( pipe_slow );
23778 %}
23779 
23780 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23781   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23782   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
23783   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23784   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23785   ins_encode %{
23786     int opcode = this->ideal_Opcode();
23787     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23788     int mask_len = Matcher::vector_length(this, $mask);
23789     int vlen_enc = vector_length_encoding(this, $mask);
23790     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23791                              $tmp$$Register, mask_len, mbt, vlen_enc);
23792   %}
23793   ins_pipe( pipe_slow );
23794 %}
23795 
23796 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23797   predicate(n->in(1)->bottom_type()->isa_vectmask());
23798   match(Set dst (VectorMaskFirstTrue mask));
23799   match(Set dst (VectorMaskLastTrue mask));
23800   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23801   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
23802   ins_encode %{
23803     int opcode = this->ideal_Opcode();
23804     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23805     int mask_len = Matcher::vector_length(this, $mask);
23806     int mask_size = mask_len * type2aelembytes(mbt);
23807     int vlen_enc = vector_length_encoding(this, $mask);
23808     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23809                              $tmp$$Register, mask_len, mask_size, vlen_enc);
23810   %}
23811   ins_pipe( pipe_slow );
23812 %}
23813 
23814 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23815   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23816   match(Set dst (VectorMaskFirstTrue mask));
23817   match(Set dst (VectorMaskLastTrue mask));
23818   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23819   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23820   ins_encode %{
23821     int opcode = this->ideal_Opcode();
23822     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23823     int mask_len = Matcher::vector_length(this, $mask);
23824     int vlen_enc = vector_length_encoding(this, $mask);
23825     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23826                              $tmp$$Register, mask_len, mbt, vlen_enc);
23827   %}
23828   ins_pipe( pipe_slow );
23829 %}
23830 
23831 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23832   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23833   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
23834   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
23835   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23836   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23837   ins_encode %{
23838     int opcode = this->ideal_Opcode();
23839     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23840     int mask_len = Matcher::vector_length(this, $mask);
23841     int vlen_enc = vector_length_encoding(this, $mask);
23842     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23843                              $tmp$$Register, mask_len, mbt, vlen_enc);
23844   %}
23845   ins_pipe( pipe_slow );
23846 %}
23847 
23848 // --------------------------------- Compress/Expand Operations ---------------------------
23849 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
23850   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
23851   match(Set dst (CompressV src mask));
23852   match(Set dst (ExpandV src mask));
23853   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
23854   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
23855   ins_encode %{
23856     int opcode = this->ideal_Opcode();
23857     int vlen_enc = vector_length_encoding(this);
23858     BasicType bt  = Matcher::vector_element_basic_type(this);
23859     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
23860                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
23861   %}
23862   ins_pipe( pipe_slow );
23863 %}
23864 
23865 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
23866   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
23867   match(Set dst (CompressV src mask));
23868   match(Set dst (ExpandV src mask));
23869   format %{ "vector_compress_expand $dst, $src, $mask" %}
23870   ins_encode %{
23871     int opcode = this->ideal_Opcode();
23872     int vector_len = vector_length_encoding(this);
23873     BasicType bt  = Matcher::vector_element_basic_type(this);
23874     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
23875   %}
23876   ins_pipe( pipe_slow );
23877 %}
23878 
23879 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
23880   match(Set dst (CompressM mask));
23881   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
23882   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
23883   ins_encode %{
23884     assert(this->in(1)->bottom_type()->isa_vectmask(), "");
23885     int mask_len = Matcher::vector_length(this);
23886     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
23887   %}
23888   ins_pipe( pipe_slow );
23889 %}
23890 
23891 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
23892 
23893 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23894   predicate(!VM_Version::supports_gfni());
23895   match(Set dst (ReverseV src));
23896   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23897   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23898   ins_encode %{
23899     int vec_enc = vector_length_encoding(this);
23900     BasicType bt = Matcher::vector_element_basic_type(this);
23901     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23902                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23903   %}
23904   ins_pipe( pipe_slow );
23905 %}
23906 
23907 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
23908   predicate(VM_Version::supports_gfni());
23909   match(Set dst (ReverseV src));
23910   effect(TEMP dst, TEMP xtmp);
23911   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
23912   ins_encode %{
23913     int vec_enc = vector_length_encoding(this);
23914     BasicType bt  = Matcher::vector_element_basic_type(this);
23915     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
23916     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
23917                                $xtmp$$XMMRegister);
23918   %}
23919   ins_pipe( pipe_slow );
23920 %}
23921 
23922 instruct vreverse_byte_reg(vec dst, vec src) %{
23923   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
23924   match(Set dst (ReverseBytesV src));
23925   effect(TEMP dst);
23926   format %{ "vector_reverse_byte $dst, $src" %}
23927   ins_encode %{
23928     int vec_enc = vector_length_encoding(this);
23929     BasicType bt = Matcher::vector_element_basic_type(this);
23930     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
23931   %}
23932   ins_pipe( pipe_slow );
23933 %}
23934 
23935 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23936   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
23937   match(Set dst (ReverseBytesV src));
23938   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23939   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23940   ins_encode %{
23941     int vec_enc = vector_length_encoding(this);
23942     BasicType bt = Matcher::vector_element_basic_type(this);
23943     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23944                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23945   %}
23946   ins_pipe( pipe_slow );
23947 %}
23948 
23949 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
23950 
23951 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
23952   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23953                                               Matcher::vector_length_in_bytes(n->in(1))));
23954   match(Set dst (CountLeadingZerosV src));
23955   format %{ "vector_count_leading_zeros $dst, $src" %}
23956   ins_encode %{
23957      int vlen_enc = vector_length_encoding(this, $src);
23958      BasicType bt = Matcher::vector_element_basic_type(this, $src);
23959      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23960                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
23961   %}
23962   ins_pipe( pipe_slow );
23963 %}
23964 
23965 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
23966   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23967                                               Matcher::vector_length_in_bytes(n->in(1))));
23968   match(Set dst (CountLeadingZerosV src mask));
23969   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
23970   ins_encode %{
23971     int vlen_enc = vector_length_encoding(this, $src);
23972     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23973     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23974     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
23975                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
23976   %}
23977   ins_pipe( pipe_slow );
23978 %}
23979 
23980 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
23981   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23982             VM_Version::supports_avx512cd() &&
23983             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23984   match(Set dst (CountLeadingZerosV src));
23985   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
23986   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
23987   ins_encode %{
23988     int vlen_enc = vector_length_encoding(this, $src);
23989     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23990     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23991                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
23992   %}
23993   ins_pipe( pipe_slow );
23994 %}
23995 
23996 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
23997   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23998   match(Set dst (CountLeadingZerosV src));
23999   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24000   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24001   ins_encode %{
24002     int vlen_enc = vector_length_encoding(this, $src);
24003     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24004     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24005                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24006                                        $rtmp$$Register, true, vlen_enc);
24007   %}
24008   ins_pipe( pipe_slow );
24009 %}
24010 
24011 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24012   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24013             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24014   match(Set dst (CountLeadingZerosV src));
24015   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24016   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24017   ins_encode %{
24018     int vlen_enc = vector_length_encoding(this, $src);
24019     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24020     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24021                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24022   %}
24023   ins_pipe( pipe_slow );
24024 %}
24025 
24026 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24027   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24028             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24029   match(Set dst (CountLeadingZerosV src));
24030   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24031   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24032   ins_encode %{
24033     int vlen_enc = vector_length_encoding(this, $src);
24034     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24035     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24036                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24037   %}
24038   ins_pipe( pipe_slow );
24039 %}
24040 
24041 // ---------------------------------- Vector Masked Operations ------------------------------------
24042 
24043 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24044   match(Set dst (AddVB (Binary dst src2) mask));
24045   match(Set dst (AddVS (Binary dst src2) mask));
24046   match(Set dst (AddVI (Binary dst src2) mask));
24047   match(Set dst (AddVL (Binary dst src2) mask));
24048   match(Set dst (AddVF (Binary dst src2) mask));
24049   match(Set dst (AddVD (Binary dst src2) mask));
24050   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24051   ins_encode %{
24052     int vlen_enc = vector_length_encoding(this);
24053     BasicType bt = Matcher::vector_element_basic_type(this);
24054     int opc = this->ideal_Opcode();
24055     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24056                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24057   %}
24058   ins_pipe( pipe_slow );
24059 %}
24060 
24061 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24062   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24063   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24064   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24065   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24066   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24067   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24068   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24069   ins_encode %{
24070     int vlen_enc = vector_length_encoding(this);
24071     BasicType bt = Matcher::vector_element_basic_type(this);
24072     int opc = this->ideal_Opcode();
24073     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24074                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24075   %}
24076   ins_pipe( pipe_slow );
24077 %}
24078 
24079 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24080   match(Set dst (XorV (Binary dst src2) mask));
24081   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24082   ins_encode %{
24083     int vlen_enc = vector_length_encoding(this);
24084     BasicType bt = Matcher::vector_element_basic_type(this);
24085     int opc = this->ideal_Opcode();
24086     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24087                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24088   %}
24089   ins_pipe( pipe_slow );
24090 %}
24091 
24092 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24093   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24094   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24095   ins_encode %{
24096     int vlen_enc = vector_length_encoding(this);
24097     BasicType bt = Matcher::vector_element_basic_type(this);
24098     int opc = this->ideal_Opcode();
24099     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24100                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24101   %}
24102   ins_pipe( pipe_slow );
24103 %}
24104 
24105 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24106   match(Set dst (OrV (Binary dst src2) mask));
24107   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24108   ins_encode %{
24109     int vlen_enc = vector_length_encoding(this);
24110     BasicType bt = Matcher::vector_element_basic_type(this);
24111     int opc = this->ideal_Opcode();
24112     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24113                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24114   %}
24115   ins_pipe( pipe_slow );
24116 %}
24117 
24118 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24119   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24120   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24121   ins_encode %{
24122     int vlen_enc = vector_length_encoding(this);
24123     BasicType bt = Matcher::vector_element_basic_type(this);
24124     int opc = this->ideal_Opcode();
24125     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24126                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24127   %}
24128   ins_pipe( pipe_slow );
24129 %}
24130 
24131 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24132   match(Set dst (AndV (Binary dst src2) mask));
24133   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24134   ins_encode %{
24135     int vlen_enc = vector_length_encoding(this);
24136     BasicType bt = Matcher::vector_element_basic_type(this);
24137     int opc = this->ideal_Opcode();
24138     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24139                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24140   %}
24141   ins_pipe( pipe_slow );
24142 %}
24143 
24144 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24145   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24146   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24147   ins_encode %{
24148     int vlen_enc = vector_length_encoding(this);
24149     BasicType bt = Matcher::vector_element_basic_type(this);
24150     int opc = this->ideal_Opcode();
24151     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24152                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24153   %}
24154   ins_pipe( pipe_slow );
24155 %}
24156 
24157 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24158   match(Set dst (SubVB (Binary dst src2) mask));
24159   match(Set dst (SubVS (Binary dst src2) mask));
24160   match(Set dst (SubVI (Binary dst src2) mask));
24161   match(Set dst (SubVL (Binary dst src2) mask));
24162   match(Set dst (SubVF (Binary dst src2) mask));
24163   match(Set dst (SubVD (Binary dst src2) mask));
24164   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24165   ins_encode %{
24166     int vlen_enc = vector_length_encoding(this);
24167     BasicType bt = Matcher::vector_element_basic_type(this);
24168     int opc = this->ideal_Opcode();
24169     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24170                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24171   %}
24172   ins_pipe( pipe_slow );
24173 %}
24174 
24175 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24176   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24177   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24178   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24179   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24180   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24181   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24182   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24183   ins_encode %{
24184     int vlen_enc = vector_length_encoding(this);
24185     BasicType bt = Matcher::vector_element_basic_type(this);
24186     int opc = this->ideal_Opcode();
24187     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24188                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24189   %}
24190   ins_pipe( pipe_slow );
24191 %}
24192 
24193 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24194   match(Set dst (MulVS (Binary dst src2) mask));
24195   match(Set dst (MulVI (Binary dst src2) mask));
24196   match(Set dst (MulVL (Binary dst src2) mask));
24197   match(Set dst (MulVF (Binary dst src2) mask));
24198   match(Set dst (MulVD (Binary dst src2) mask));
24199   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24200   ins_encode %{
24201     int vlen_enc = vector_length_encoding(this);
24202     BasicType bt = Matcher::vector_element_basic_type(this);
24203     int opc = this->ideal_Opcode();
24204     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24205                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24206   %}
24207   ins_pipe( pipe_slow );
24208 %}
24209 
24210 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24211   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24212   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24213   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24214   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24215   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24216   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24217   ins_encode %{
24218     int vlen_enc = vector_length_encoding(this);
24219     BasicType bt = Matcher::vector_element_basic_type(this);
24220     int opc = this->ideal_Opcode();
24221     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24222                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24223   %}
24224   ins_pipe( pipe_slow );
24225 %}
24226 
24227 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24228   match(Set dst (SqrtVF dst mask));
24229   match(Set dst (SqrtVD dst mask));
24230   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24231   ins_encode %{
24232     int vlen_enc = vector_length_encoding(this);
24233     BasicType bt = Matcher::vector_element_basic_type(this);
24234     int opc = this->ideal_Opcode();
24235     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24236                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24237   %}
24238   ins_pipe( pipe_slow );
24239 %}
24240 
24241 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24242   match(Set dst (DivVF (Binary dst src2) mask));
24243   match(Set dst (DivVD (Binary dst src2) mask));
24244   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24245   ins_encode %{
24246     int vlen_enc = vector_length_encoding(this);
24247     BasicType bt = Matcher::vector_element_basic_type(this);
24248     int opc = this->ideal_Opcode();
24249     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24250                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24251   %}
24252   ins_pipe( pipe_slow );
24253 %}
24254 
24255 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24256   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24257   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24258   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24259   ins_encode %{
24260     int vlen_enc = vector_length_encoding(this);
24261     BasicType bt = Matcher::vector_element_basic_type(this);
24262     int opc = this->ideal_Opcode();
24263     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24264                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24265   %}
24266   ins_pipe( pipe_slow );
24267 %}
24268 
24269 
24270 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24271   match(Set dst (RotateLeftV (Binary dst shift) mask));
24272   match(Set dst (RotateRightV (Binary dst shift) mask));
24273   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24274   ins_encode %{
24275     int vlen_enc = vector_length_encoding(this);
24276     BasicType bt = Matcher::vector_element_basic_type(this);
24277     int opc = this->ideal_Opcode();
24278     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24279                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24280   %}
24281   ins_pipe( pipe_slow );
24282 %}
24283 
24284 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24285   match(Set dst (RotateLeftV (Binary dst src2) mask));
24286   match(Set dst (RotateRightV (Binary dst src2) mask));
24287   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24288   ins_encode %{
24289     int vlen_enc = vector_length_encoding(this);
24290     BasicType bt = Matcher::vector_element_basic_type(this);
24291     int opc = this->ideal_Opcode();
24292     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24293                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24294   %}
24295   ins_pipe( pipe_slow );
24296 %}
24297 
24298 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24299   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24300   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24301   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24302   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24303   ins_encode %{
24304     int vlen_enc = vector_length_encoding(this);
24305     BasicType bt = Matcher::vector_element_basic_type(this);
24306     int opc = this->ideal_Opcode();
24307     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24308                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24309   %}
24310   ins_pipe( pipe_slow );
24311 %}
24312 
24313 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24314   predicate(!n->as_ShiftV()->is_var_shift());
24315   match(Set dst (LShiftVS (Binary dst src2) mask));
24316   match(Set dst (LShiftVI (Binary dst src2) mask));
24317   match(Set dst (LShiftVL (Binary dst src2) mask));
24318   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24319   ins_encode %{
24320     int vlen_enc = vector_length_encoding(this);
24321     BasicType bt = Matcher::vector_element_basic_type(this);
24322     int opc = this->ideal_Opcode();
24323     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24324                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24325   %}
24326   ins_pipe( pipe_slow );
24327 %}
24328 
24329 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24330   predicate(n->as_ShiftV()->is_var_shift());
24331   match(Set dst (LShiftVS (Binary dst src2) mask));
24332   match(Set dst (LShiftVI (Binary dst src2) mask));
24333   match(Set dst (LShiftVL (Binary dst src2) mask));
24334   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24335   ins_encode %{
24336     int vlen_enc = vector_length_encoding(this);
24337     BasicType bt = Matcher::vector_element_basic_type(this);
24338     int opc = this->ideal_Opcode();
24339     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24340                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24341   %}
24342   ins_pipe( pipe_slow );
24343 %}
24344 
24345 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24346   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24347   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24348   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24349   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24350   ins_encode %{
24351     int vlen_enc = vector_length_encoding(this);
24352     BasicType bt = Matcher::vector_element_basic_type(this);
24353     int opc = this->ideal_Opcode();
24354     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24355                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24356   %}
24357   ins_pipe( pipe_slow );
24358 %}
24359 
24360 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24361   predicate(!n->as_ShiftV()->is_var_shift());
24362   match(Set dst (RShiftVS (Binary dst src2) mask));
24363   match(Set dst (RShiftVI (Binary dst src2) mask));
24364   match(Set dst (RShiftVL (Binary dst src2) mask));
24365   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24366   ins_encode %{
24367     int vlen_enc = vector_length_encoding(this);
24368     BasicType bt = Matcher::vector_element_basic_type(this);
24369     int opc = this->ideal_Opcode();
24370     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24371                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24372   %}
24373   ins_pipe( pipe_slow );
24374 %}
24375 
24376 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24377   predicate(n->as_ShiftV()->is_var_shift());
24378   match(Set dst (RShiftVS (Binary dst src2) mask));
24379   match(Set dst (RShiftVI (Binary dst src2) mask));
24380   match(Set dst (RShiftVL (Binary dst src2) mask));
24381   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24382   ins_encode %{
24383     int vlen_enc = vector_length_encoding(this);
24384     BasicType bt = Matcher::vector_element_basic_type(this);
24385     int opc = this->ideal_Opcode();
24386     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24387                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24388   %}
24389   ins_pipe( pipe_slow );
24390 %}
24391 
24392 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24393   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24394   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24395   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24396   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24397   ins_encode %{
24398     int vlen_enc = vector_length_encoding(this);
24399     BasicType bt = Matcher::vector_element_basic_type(this);
24400     int opc = this->ideal_Opcode();
24401     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24402                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24403   %}
24404   ins_pipe( pipe_slow );
24405 %}
24406 
24407 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24408   predicate(!n->as_ShiftV()->is_var_shift());
24409   match(Set dst (URShiftVS (Binary dst src2) mask));
24410   match(Set dst (URShiftVI (Binary dst src2) mask));
24411   match(Set dst (URShiftVL (Binary dst src2) mask));
24412   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24413   ins_encode %{
24414     int vlen_enc = vector_length_encoding(this);
24415     BasicType bt = Matcher::vector_element_basic_type(this);
24416     int opc = this->ideal_Opcode();
24417     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24418                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24419   %}
24420   ins_pipe( pipe_slow );
24421 %}
24422 
24423 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24424   predicate(n->as_ShiftV()->is_var_shift());
24425   match(Set dst (URShiftVS (Binary dst src2) mask));
24426   match(Set dst (URShiftVI (Binary dst src2) mask));
24427   match(Set dst (URShiftVL (Binary dst src2) mask));
24428   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24429   ins_encode %{
24430     int vlen_enc = vector_length_encoding(this);
24431     BasicType bt = Matcher::vector_element_basic_type(this);
24432     int opc = this->ideal_Opcode();
24433     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24434                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24435   %}
24436   ins_pipe( pipe_slow );
24437 %}
24438 
24439 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24440   match(Set dst (MaxV (Binary dst src2) mask));
24441   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24442   ins_encode %{
24443     int vlen_enc = vector_length_encoding(this);
24444     BasicType bt = Matcher::vector_element_basic_type(this);
24445     int opc = this->ideal_Opcode();
24446     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24447                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24448   %}
24449   ins_pipe( pipe_slow );
24450 %}
24451 
24452 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24453   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24454   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24455   ins_encode %{
24456     int vlen_enc = vector_length_encoding(this);
24457     BasicType bt = Matcher::vector_element_basic_type(this);
24458     int opc = this->ideal_Opcode();
24459     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24460                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24461   %}
24462   ins_pipe( pipe_slow );
24463 %}
24464 
24465 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24466   match(Set dst (MinV (Binary dst src2) mask));
24467   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24468   ins_encode %{
24469     int vlen_enc = vector_length_encoding(this);
24470     BasicType bt = Matcher::vector_element_basic_type(this);
24471     int opc = this->ideal_Opcode();
24472     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24473                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24474   %}
24475   ins_pipe( pipe_slow );
24476 %}
24477 
24478 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24479   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24480   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24481   ins_encode %{
24482     int vlen_enc = vector_length_encoding(this);
24483     BasicType bt = Matcher::vector_element_basic_type(this);
24484     int opc = this->ideal_Opcode();
24485     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24486                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24487   %}
24488   ins_pipe( pipe_slow );
24489 %}
24490 
24491 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24492   match(Set dst (VectorRearrange (Binary dst src2) mask));
24493   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24494   ins_encode %{
24495     int vlen_enc = vector_length_encoding(this);
24496     BasicType bt = Matcher::vector_element_basic_type(this);
24497     int opc = this->ideal_Opcode();
24498     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24499                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24500   %}
24501   ins_pipe( pipe_slow );
24502 %}
24503 
24504 instruct vabs_masked(vec dst, kReg mask) %{
24505   match(Set dst (AbsVB dst mask));
24506   match(Set dst (AbsVS dst mask));
24507   match(Set dst (AbsVI dst mask));
24508   match(Set dst (AbsVL dst mask));
24509   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24510   ins_encode %{
24511     int vlen_enc = vector_length_encoding(this);
24512     BasicType bt = Matcher::vector_element_basic_type(this);
24513     int opc = this->ideal_Opcode();
24514     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24515                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24516   %}
24517   ins_pipe( pipe_slow );
24518 %}
24519 
24520 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24521   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24522   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24523   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24524   ins_encode %{
24525     assert(UseFMA, "Needs FMA instructions support.");
24526     int vlen_enc = vector_length_encoding(this);
24527     BasicType bt = Matcher::vector_element_basic_type(this);
24528     int opc = this->ideal_Opcode();
24529     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24530                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24531   %}
24532   ins_pipe( pipe_slow );
24533 %}
24534 
24535 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24536   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24537   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24538   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24539   ins_encode %{
24540     assert(UseFMA, "Needs FMA instructions support.");
24541     int vlen_enc = vector_length_encoding(this);
24542     BasicType bt = Matcher::vector_element_basic_type(this);
24543     int opc = this->ideal_Opcode();
24544     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24545                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24546   %}
24547   ins_pipe( pipe_slow );
24548 %}
24549 
24550 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24551   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24552   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24553   ins_encode %{
24554     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24555     int vlen_enc = vector_length_encoding(this, $src1);
24556     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24557 
24558     // Comparison i
24559     switch (src1_elem_bt) {
24560       case T_BYTE: {
24561         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24562         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24563         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24564         break;
24565       }
24566       case T_SHORT: {
24567         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24568         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24569         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24570         break;
24571       }
24572       case T_INT: {
24573         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24574         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24575         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24576         break;
24577       }
24578       case T_LONG: {
24579         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24580         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24581         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24582         break;
24583       }
24584       case T_FLOAT: {
24585         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24586         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24587         break;
24588       }
24589       case T_DOUBLE: {
24590         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24591         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24592         break;
24593       }
24594       default: assert(false, "%s", type2name(src1_elem_bt)); break;
24595     }
24596   %}
24597   ins_pipe( pipe_slow );
24598 %}
24599 
24600 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24601   predicate(Matcher::vector_length(n) <= 32);
24602   match(Set dst (MaskAll src));
24603   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24604   ins_encode %{
24605     int mask_len = Matcher::vector_length(this);
24606     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24607   %}
24608   ins_pipe( pipe_slow );
24609 %}
24610 
24611 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24612   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24613   match(Set dst (XorVMask src (MaskAll cnt)));
24614   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24615   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24616   ins_encode %{
24617     uint masklen = Matcher::vector_length(this);
24618     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24619   %}
24620   ins_pipe( pipe_slow );
24621 %}
24622 
24623 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24624   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24625             (Matcher::vector_length(n) == 16) ||
24626             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24627   match(Set dst (XorVMask src (MaskAll cnt)));
24628   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24629   ins_encode %{
24630     uint masklen = Matcher::vector_length(this);
24631     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24632   %}
24633   ins_pipe( pipe_slow );
24634 %}
24635 
24636 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24637   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24638   match(Set dst (VectorLongToMask src));
24639   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24640   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24641   ins_encode %{
24642     int mask_len = Matcher::vector_length(this);
24643     int vec_enc  = vector_length_encoding(mask_len);
24644     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24645                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24646   %}
24647   ins_pipe( pipe_slow );
24648 %}
24649 
24650 
24651 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24652   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24653   match(Set dst (VectorLongToMask src));
24654   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24655   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24656   ins_encode %{
24657     int mask_len = Matcher::vector_length(this);
24658     assert(mask_len <= 32, "invalid mask length");
24659     int vec_enc  = vector_length_encoding(mask_len);
24660     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24661                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24662   %}
24663   ins_pipe( pipe_slow );
24664 %}
24665 
24666 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24667   predicate(n->bottom_type()->isa_vectmask());
24668   match(Set dst (VectorLongToMask src));
24669   format %{ "long_to_mask_evex $dst, $src\t!" %}
24670   ins_encode %{
24671     __ kmov($dst$$KRegister, $src$$Register);
24672   %}
24673   ins_pipe( pipe_slow );
24674 %}
24675 
24676 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24677   match(Set dst (AndVMask src1 src2));
24678   match(Set dst (OrVMask src1 src2));
24679   match(Set dst (XorVMask src1 src2));
24680   effect(TEMP kscratch);
24681   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24682   ins_encode %{
24683     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24684     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24685     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24686     uint masklen = Matcher::vector_length(this);
24687     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24688     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24689   %}
24690   ins_pipe( pipe_slow );
24691 %}
24692 
24693 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24694   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24695   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24696   ins_encode %{
24697     int vlen_enc = vector_length_encoding(this);
24698     BasicType bt = Matcher::vector_element_basic_type(this);
24699     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24700                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24701   %}
24702   ins_pipe( pipe_slow );
24703 %}
24704 
24705 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24706   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24707   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24708   ins_encode %{
24709     int vlen_enc = vector_length_encoding(this);
24710     BasicType bt = Matcher::vector_element_basic_type(this);
24711     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24712                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24713   %}
24714   ins_pipe( pipe_slow );
24715 %}
24716 
24717 instruct castMM(kReg dst)
24718 %{
24719   match(Set dst (CastVV dst));
24720 
24721   size(0);
24722   format %{ "# castVV of $dst" %}
24723   ins_encode(/* empty encoding */);
24724   ins_cost(0);
24725   ins_pipe(empty);
24726 %}
24727 
24728 instruct castVV(vec dst)
24729 %{
24730   match(Set dst (CastVV dst));
24731 
24732   size(0);
24733   format %{ "# castVV of $dst" %}
24734   ins_encode(/* empty encoding */);
24735   ins_cost(0);
24736   ins_pipe(empty);
24737 %}
24738 
24739 instruct castVVLeg(legVec dst)
24740 %{
24741   match(Set dst (CastVV dst));
24742 
24743   size(0);
24744   format %{ "# castVV of $dst" %}
24745   ins_encode(/* empty encoding */);
24746   ins_cost(0);
24747   ins_pipe(empty);
24748 %}
24749 
24750 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
24751 %{
24752   match(Set dst (IsInfiniteF src));
24753   effect(TEMP ktmp, KILL cr);
24754   format %{ "float_class_check $dst, $src" %}
24755   ins_encode %{
24756     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24757     __ kmovbl($dst$$Register, $ktmp$$KRegister);
24758   %}
24759   ins_pipe(pipe_slow);
24760 %}
24761 
24762 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
24763 %{
24764   match(Set dst (IsInfiniteD src));
24765   effect(TEMP ktmp, KILL cr);
24766   format %{ "double_class_check $dst, $src" %}
24767   ins_encode %{
24768     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24769     __ kmovbl($dst$$Register, $ktmp$$KRegister);
24770   %}
24771   ins_pipe(pipe_slow);
24772 %}
24773 
24774 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
24775 %{
24776   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24777             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24778   match(Set dst (SaturatingAddV src1 src2));
24779   match(Set dst (SaturatingSubV src1 src2));
24780   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24781   ins_encode %{
24782     int vlen_enc = vector_length_encoding(this);
24783     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24784     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24785                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24786   %}
24787   ins_pipe(pipe_slow);
24788 %}
24789 
24790 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
24791 %{
24792   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24793             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24794   match(Set dst (SaturatingAddV src1 src2));
24795   match(Set dst (SaturatingSubV src1 src2));
24796   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24797   ins_encode %{
24798     int vlen_enc = vector_length_encoding(this);
24799     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24800     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24801                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24802   %}
24803   ins_pipe(pipe_slow);
24804 %}
24805 
24806 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
24807 %{
24808   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24809             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24810             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24811   match(Set dst (SaturatingAddV src1 src2));
24812   match(Set dst (SaturatingSubV src1 src2));
24813   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
24814   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
24815   ins_encode %{
24816     int vlen_enc = vector_length_encoding(this);
24817     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24818     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24819                                         $src1$$XMMRegister, $src2$$XMMRegister,
24820                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24821                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
24822   %}
24823   ins_pipe(pipe_slow);
24824 %}
24825 
24826 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
24827 %{
24828   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24829             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24830             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24831   match(Set dst (SaturatingAddV src1 src2));
24832   match(Set dst (SaturatingSubV src1 src2));
24833   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
24834   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
24835   ins_encode %{
24836     int vlen_enc = vector_length_encoding(this);
24837     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24838     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24839                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24840                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
24841   %}
24842   ins_pipe(pipe_slow);
24843 %}
24844 
24845 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
24846 %{
24847   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24848             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24849             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24850   match(Set dst (SaturatingAddV src1 src2));
24851   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
24852   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
24853   ins_encode %{
24854     int vlen_enc = vector_length_encoding(this);
24855     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24856     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24857                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24858   %}
24859   ins_pipe(pipe_slow);
24860 %}
24861 
24862 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
24863 %{
24864   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24865             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24866             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24867   match(Set dst (SaturatingAddV src1 src2));
24868   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24869   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24870   ins_encode %{
24871     int vlen_enc = vector_length_encoding(this);
24872     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24873     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24874                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
24875   %}
24876   ins_pipe(pipe_slow);
24877 %}
24878 
24879 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
24880 %{
24881   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24882             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24883             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24884   match(Set dst (SaturatingSubV src1 src2));
24885   effect(TEMP ktmp);
24886   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
24887   ins_encode %{
24888     int vlen_enc = vector_length_encoding(this);
24889     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24890     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24891                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24892   %}
24893   ins_pipe(pipe_slow);
24894 %}
24895 
24896 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
24897 %{
24898   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24899             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24900             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24901   match(Set dst (SaturatingSubV src1 src2));
24902   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24903   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
24904   ins_encode %{
24905     int vlen_enc = vector_length_encoding(this);
24906     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24907     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24908                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
24909   %}
24910   ins_pipe(pipe_slow);
24911 %}
24912 
24913 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
24914 %{
24915   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24916             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24917   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24918   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24919   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24920   ins_encode %{
24921     int vlen_enc = vector_length_encoding(this);
24922     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24923     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24924                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
24925   %}
24926   ins_pipe(pipe_slow);
24927 %}
24928 
24929 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
24930 %{
24931   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24932             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24933   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24934   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24935   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24936   ins_encode %{
24937     int vlen_enc = vector_length_encoding(this);
24938     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24939     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24940                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
24941   %}
24942   ins_pipe(pipe_slow);
24943 %}
24944 
24945 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
24946   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24947             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24948   match(Set dst (SaturatingAddV (Binary dst src) mask));
24949   match(Set dst (SaturatingSubV (Binary dst src) mask));
24950   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
24951   ins_encode %{
24952     int vlen_enc = vector_length_encoding(this);
24953     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24954     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24955                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
24956   %}
24957   ins_pipe( pipe_slow );
24958 %}
24959 
24960 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
24961   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24962             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24963   match(Set dst (SaturatingAddV (Binary dst src) mask));
24964   match(Set dst (SaturatingSubV (Binary dst src) mask));
24965   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
24966   ins_encode %{
24967     int vlen_enc = vector_length_encoding(this);
24968     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24969     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24970                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
24971   %}
24972   ins_pipe( pipe_slow );
24973 %}
24974 
24975 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
24976   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24977             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24978   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
24979   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
24980   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
24981   ins_encode %{
24982     int vlen_enc = vector_length_encoding(this);
24983     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24984     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24985                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
24986   %}
24987   ins_pipe( pipe_slow );
24988 %}
24989 
24990 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
24991   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24992             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24993   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
24994   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
24995   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
24996   ins_encode %{
24997     int vlen_enc = vector_length_encoding(this);
24998     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24999     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25000                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25001   %}
25002   ins_pipe( pipe_slow );
25003 %}
25004 
25005 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25006 %{
25007   match(Set index (SelectFromTwoVector (Binary index src1) src2));
25008   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25009   ins_encode %{
25010     int vlen_enc = vector_length_encoding(this);
25011     BasicType bt = Matcher::vector_element_basic_type(this);
25012     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25013   %}
25014   ins_pipe(pipe_slow);
25015 %}
25016 
25017 instruct reinterpretS2HF(regF dst, rRegI src)
25018 %{
25019   match(Set dst (ReinterpretS2HF src));
25020   format %{ "vmovw $dst, $src" %}
25021   ins_encode %{
25022     __ vmovw($dst$$XMMRegister, $src$$Register);
25023   %}
25024   ins_pipe(pipe_slow);
25025 %}
25026 
25027 instruct reinterpretHF2S(rRegI dst, regF src)
25028 %{
25029   match(Set dst (ReinterpretHF2S src));
25030   format %{ "vmovw $dst, $src" %}
25031   ins_encode %{
25032     __ vmovw($dst$$Register, $src$$XMMRegister);
25033   %}
25034   ins_pipe(pipe_slow);
25035 %}
25036 
25037 instruct convF2HFAndS2HF(regF dst, regF src)
25038 %{
25039   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25040   format %{ "convF2HFAndS2HF $dst, $src" %}
25041   ins_encode %{
25042     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25043   %}
25044   ins_pipe(pipe_slow);
25045 %}
25046 
25047 instruct convHF2SAndHF2F(regF dst, regF src)
25048 %{
25049   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25050   format %{ "convHF2SAndHF2F $dst, $src" %}
25051   ins_encode %{
25052     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25053   %}
25054   ins_pipe(pipe_slow);
25055 %}
25056 
25057 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25058 %{
25059   match(Set dst (SqrtHF src));
25060   format %{ "scalar_sqrt_fp16 $dst, $src" %}
25061   ins_encode %{
25062     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25063   %}
25064   ins_pipe(pipe_slow);
25065 %}
25066 
25067 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25068 %{
25069   match(Set dst (AddHF src1 src2));
25070   match(Set dst (DivHF src1 src2));
25071   match(Set dst (MulHF src1 src2));
25072   match(Set dst (SubHF src1 src2));
25073   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25074   ins_encode %{
25075     int opcode = this->ideal_Opcode();
25076     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25077   %}
25078   ins_pipe(pipe_slow);
25079 %}
25080 
25081 instruct scalar_minmax_HF_avx10_reg(regF dst, regF src1, regF src2)
25082 %{
25083   predicate(VM_Version::supports_avx10_2());
25084   match(Set dst (MaxHF src1 src2));
25085   match(Set dst (MinHF src1 src2));
25086   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25087   ins_encode %{
25088     int function = this->ideal_Opcode() == Op_MinHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25089     __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25090   %}
25091   ins_pipe( pipe_slow );
25092 %}
25093 
25094 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25095 %{
25096   predicate(!VM_Version::supports_avx10_2());
25097   match(Set dst (MaxHF src1 src2));
25098   match(Set dst (MinHF src1 src2));
25099   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25100   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25101   ins_encode %{
25102     int opcode = this->ideal_Opcode();
25103     __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25104                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25105   %}
25106   ins_pipe( pipe_slow );
25107 %}
25108 
25109 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25110 %{
25111   match(Set dst (FmaHF  src2 (Binary dst src1)));
25112   effect(DEF dst);
25113   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25114   ins_encode %{
25115     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25116   %}
25117   ins_pipe( pipe_slow );
25118 %}
25119 
25120 
25121 instruct vector_sqrt_HF_reg(vec dst, vec src)
25122 %{
25123   match(Set dst (SqrtVHF src));
25124   format %{ "vector_sqrt_fp16 $dst, $src" %}
25125   ins_encode %{
25126     int vlen_enc = vector_length_encoding(this);
25127     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25128   %}
25129   ins_pipe(pipe_slow);
25130 %}
25131 
25132 instruct vector_sqrt_HF_mem(vec dst, memory src)
25133 %{
25134   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25135   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25136   ins_encode %{
25137     int vlen_enc = vector_length_encoding(this);
25138     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25139   %}
25140   ins_pipe(pipe_slow);
25141 %}
25142 
25143 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25144 %{
25145   match(Set dst (AddVHF src1 src2));
25146   match(Set dst (DivVHF src1 src2));
25147   match(Set dst (MulVHF src1 src2));
25148   match(Set dst (SubVHF src1 src2));
25149   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25150   ins_encode %{
25151     int vlen_enc = vector_length_encoding(this);
25152     int opcode = this->ideal_Opcode();
25153     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25154   %}
25155   ins_pipe(pipe_slow);
25156 %}
25157 
25158 
25159 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25160 %{
25161   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25162   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25163   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25164   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25165   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25166   ins_encode %{
25167     int vlen_enc = vector_length_encoding(this);
25168     int opcode = this->ideal_Opcode();
25169     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25170   %}
25171   ins_pipe(pipe_slow);
25172 %}
25173 
25174 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25175 %{
25176   match(Set dst (FmaVHF src2 (Binary dst src1)));
25177   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25178   ins_encode %{
25179     int vlen_enc = vector_length_encoding(this);
25180     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25181   %}
25182   ins_pipe( pipe_slow );
25183 %}
25184 
25185 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25186 %{
25187   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25188   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25189   ins_encode %{
25190     int vlen_enc = vector_length_encoding(this);
25191     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25192   %}
25193   ins_pipe( pipe_slow );
25194 %}
25195 
25196 instruct vector_minmax_HF_avx10_mem(vec dst, vec src1, memory src2)
25197 %{
25198   predicate(VM_Version::supports_avx10_2());
25199   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25200   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25201   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25202   ins_encode %{
25203     int vlen_enc = vector_length_encoding(this);
25204     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25205     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25206   %}
25207   ins_pipe( pipe_slow );
25208 %}
25209 
25210 instruct vector_minmax_HF_avx10_reg(vec dst, vec src1, vec src2)
25211 %{
25212   predicate(VM_Version::supports_avx10_2());
25213   match(Set dst (MinVHF src1 src2));
25214   match(Set dst (MaxVHF src1 src2));
25215   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25216   ins_encode %{
25217     int vlen_enc = vector_length_encoding(this);
25218     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN;
25219     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25220   %}
25221   ins_pipe( pipe_slow );
25222 %}
25223 
25224 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25225 %{
25226   predicate(!VM_Version::supports_avx10_2());
25227   match(Set dst (MinVHF src1 src2));
25228   match(Set dst (MaxVHF src1 src2));
25229   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25230   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25231   ins_encode %{
25232     int vlen_enc = vector_length_encoding(this);
25233     int opcode = this->ideal_Opcode();
25234     __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25235                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25236   %}
25237   ins_pipe( pipe_slow );
25238 %}
25239 
25240 //----------PEEPHOLE RULES-----------------------------------------------------
25241 // These must follow all instruction definitions as they use the names
25242 // defined in the instructions definitions.
25243 //
25244 // peeppredicate ( rule_predicate );
25245 // // the predicate unless which the peephole rule will be ignored
25246 //
25247 // peepmatch ( root_instr_name [preceding_instruction]* );
25248 //
25249 // peepprocedure ( procedure_name );
25250 // // provide a procedure name to perform the optimization, the procedure should
25251 // // reside in the architecture dependent peephole file, the method has the
25252 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25253 // // with the arguments being the basic block, the current node index inside the
25254 // // block, the register allocator, the functions upon invoked return a new node
25255 // // defined in peepreplace, and the rules of the nodes appearing in the
25256 // // corresponding peepmatch, the function return true if successful, else
25257 // // return false
25258 //
25259 // peepconstraint %{
25260 // (instruction_number.operand_name relational_op instruction_number.operand_name
25261 //  [, ...] );
25262 // // instruction numbers are zero-based using left to right order in peepmatch
25263 //
25264 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25265 // // provide an instruction_number.operand_name for each operand that appears
25266 // // in the replacement instruction's match rule
25267 //
25268 // ---------VM FLAGS---------------------------------------------------------
25269 //
25270 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25271 //
25272 // Each peephole rule is given an identifying number starting with zero and
25273 // increasing by one in the order seen by the parser.  An individual peephole
25274 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25275 // on the command-line.
25276 //
25277 // ---------CURRENT LIMITATIONS----------------------------------------------
25278 //
25279 // Only transformations inside a basic block (do we need more for peephole)
25280 //
25281 // ---------EXAMPLE----------------------------------------------------------
25282 //
25283 // // pertinent parts of existing instructions in architecture description
25284 // instruct movI(rRegI dst, rRegI src)
25285 // %{
25286 //   match(Set dst (CopyI src));
25287 // %}
25288 //
25289 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25290 // %{
25291 //   match(Set dst (AddI dst src));
25292 //   effect(KILL cr);
25293 // %}
25294 //
25295 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25296 // %{
25297 //   match(Set dst (AddI dst src));
25298 // %}
25299 //
25300 // 1. Simple replacement
25301 // - Only match adjacent instructions in same basic block
25302 // - Only equality constraints
25303 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25304 // - Only one replacement instruction
25305 //
25306 // // Change (inc mov) to lea
25307 // peephole %{
25308 //   // lea should only be emitted when beneficial
25309 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25310 //   // increment preceded by register-register move
25311 //   peepmatch ( incI_rReg movI );
25312 //   // require that the destination register of the increment
25313 //   // match the destination register of the move
25314 //   peepconstraint ( 0.dst == 1.dst );
25315 //   // construct a replacement instruction that sets
25316 //   // the destination to ( move's source register + one )
25317 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25318 // %}
25319 //
25320 // 2. Procedural replacement
25321 // - More flexible finding relevent nodes
25322 // - More flexible constraints
25323 // - More flexible transformations
25324 // - May utilise architecture-dependent API more effectively
25325 // - Currently only one replacement instruction due to adlc parsing capabilities
25326 //
25327 // // Change (inc mov) to lea
25328 // peephole %{
25329 //   // lea should only be emitted when beneficial
25330 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25331 //   // the rule numbers of these nodes inside are passed into the function below
25332 //   peepmatch ( incI_rReg movI );
25333 //   // the method that takes the responsibility of transformation
25334 //   peepprocedure ( inc_mov_to_lea );
25335 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25336 //   // node is passed into the function above
25337 //   peepreplace ( leaI_rReg_immI() );
25338 // %}
25339 
25340 // These instructions is not matched by the matcher but used by the peephole
25341 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25342 %{
25343   predicate(false);
25344   match(Set dst (AddI src1 src2));
25345   format %{ "leal    $dst, [$src1 + $src2]" %}
25346   ins_encode %{
25347     Register dst = $dst$$Register;
25348     Register src1 = $src1$$Register;
25349     Register src2 = $src2$$Register;
25350     if (src1 != rbp && src1 != r13) {
25351       __ leal(dst, Address(src1, src2, Address::times_1));
25352     } else {
25353       assert(src2 != rbp && src2 != r13, "");
25354       __ leal(dst, Address(src2, src1, Address::times_1));
25355     }
25356   %}
25357   ins_pipe(ialu_reg_reg);
25358 %}
25359 
25360 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25361 %{
25362   predicate(false);
25363   match(Set dst (AddI src1 src2));
25364   format %{ "leal    $dst, [$src1 + $src2]" %}
25365   ins_encode %{
25366     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25367   %}
25368   ins_pipe(ialu_reg_reg);
25369 %}
25370 
25371 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25372 %{
25373   predicate(false);
25374   match(Set dst (LShiftI src shift));
25375   format %{ "leal    $dst, [$src << $shift]" %}
25376   ins_encode %{
25377     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25378     Register src = $src$$Register;
25379     if (scale == Address::times_2 && src != rbp && src != r13) {
25380       __ leal($dst$$Register, Address(src, src, Address::times_1));
25381     } else {
25382       __ leal($dst$$Register, Address(noreg, src, scale));
25383     }
25384   %}
25385   ins_pipe(ialu_reg_reg);
25386 %}
25387 
25388 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25389 %{
25390   predicate(false);
25391   match(Set dst (AddL src1 src2));
25392   format %{ "leaq    $dst, [$src1 + $src2]" %}
25393   ins_encode %{
25394     Register dst = $dst$$Register;
25395     Register src1 = $src1$$Register;
25396     Register src2 = $src2$$Register;
25397     if (src1 != rbp && src1 != r13) {
25398       __ leaq(dst, Address(src1, src2, Address::times_1));
25399     } else {
25400       assert(src2 != rbp && src2 != r13, "");
25401       __ leaq(dst, Address(src2, src1, Address::times_1));
25402     }
25403   %}
25404   ins_pipe(ialu_reg_reg);
25405 %}
25406 
25407 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25408 %{
25409   predicate(false);
25410   match(Set dst (AddL src1 src2));
25411   format %{ "leaq    $dst, [$src1 + $src2]" %}
25412   ins_encode %{
25413     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25414   %}
25415   ins_pipe(ialu_reg_reg);
25416 %}
25417 
25418 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25419 %{
25420   predicate(false);
25421   match(Set dst (LShiftL src shift));
25422   format %{ "leaq    $dst, [$src << $shift]" %}
25423   ins_encode %{
25424     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25425     Register src = $src$$Register;
25426     if (scale == Address::times_2 && src != rbp && src != r13) {
25427       __ leaq($dst$$Register, Address(src, src, Address::times_1));
25428     } else {
25429       __ leaq($dst$$Register, Address(noreg, src, scale));
25430     }
25431   %}
25432   ins_pipe(ialu_reg_reg);
25433 %}
25434 
25435 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25436 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25437 // processors with at least partial ALU support for lea
25438 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25439 // beneficial for processors with full ALU support
25440 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25441 
25442 peephole
25443 %{
25444   peeppredicate(VM_Version::supports_fast_2op_lea());
25445   peepmatch (addI_rReg);
25446   peepprocedure (lea_coalesce_reg);
25447   peepreplace (leaI_rReg_rReg_peep());
25448 %}
25449 
25450 peephole
25451 %{
25452   peeppredicate(VM_Version::supports_fast_2op_lea());
25453   peepmatch (addI_rReg_imm);
25454   peepprocedure (lea_coalesce_imm);
25455   peepreplace (leaI_rReg_immI_peep());
25456 %}
25457 
25458 peephole
25459 %{
25460   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25461                 VM_Version::is_intel_cascade_lake());
25462   peepmatch (incI_rReg);
25463   peepprocedure (lea_coalesce_imm);
25464   peepreplace (leaI_rReg_immI_peep());
25465 %}
25466 
25467 peephole
25468 %{
25469   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25470                 VM_Version::is_intel_cascade_lake());
25471   peepmatch (decI_rReg);
25472   peepprocedure (lea_coalesce_imm);
25473   peepreplace (leaI_rReg_immI_peep());
25474 %}
25475 
25476 peephole
25477 %{
25478   peeppredicate(VM_Version::supports_fast_2op_lea());
25479   peepmatch (salI_rReg_immI2);
25480   peepprocedure (lea_coalesce_imm);
25481   peepreplace (leaI_rReg_immI2_peep());
25482 %}
25483 
25484 peephole
25485 %{
25486   peeppredicate(VM_Version::supports_fast_2op_lea());
25487   peepmatch (addL_rReg);
25488   peepprocedure (lea_coalesce_reg);
25489   peepreplace (leaL_rReg_rReg_peep());
25490 %}
25491 
25492 peephole
25493 %{
25494   peeppredicate(VM_Version::supports_fast_2op_lea());
25495   peepmatch (addL_rReg_imm);
25496   peepprocedure (lea_coalesce_imm);
25497   peepreplace (leaL_rReg_immL32_peep());
25498 %}
25499 
25500 peephole
25501 %{
25502   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25503                 VM_Version::is_intel_cascade_lake());
25504   peepmatch (incL_rReg);
25505   peepprocedure (lea_coalesce_imm);
25506   peepreplace (leaL_rReg_immL32_peep());
25507 %}
25508 
25509 peephole
25510 %{
25511   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25512                 VM_Version::is_intel_cascade_lake());
25513   peepmatch (decL_rReg);
25514   peepprocedure (lea_coalesce_imm);
25515   peepreplace (leaL_rReg_immL32_peep());
25516 %}
25517 
25518 peephole
25519 %{
25520   peeppredicate(VM_Version::supports_fast_2op_lea());
25521   peepmatch (salL_rReg_immI2);
25522   peepprocedure (lea_coalesce_imm);
25523   peepreplace (leaL_rReg_immI2_peep());
25524 %}
25525 
25526 peephole
25527 %{
25528   peepmatch (leaPCompressedOopOffset);
25529   peepprocedure (lea_remove_redundant);
25530 %}
25531 
25532 peephole
25533 %{
25534   peepmatch (leaP8Narrow);
25535   peepprocedure (lea_remove_redundant);
25536 %}
25537 
25538 peephole
25539 %{
25540   peepmatch (leaP32Narrow);
25541   peepprocedure (lea_remove_redundant);
25542 %}
25543 
25544 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25545 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25546 
25547 //int variant
25548 peephole
25549 %{
25550   peepmatch (testI_reg);
25551   peepprocedure (test_may_remove);
25552 %}
25553 
25554 //long variant
25555 peephole
25556 %{
25557   peepmatch (testL_reg);
25558   peepprocedure (test_may_remove);
25559 %}
25560 
25561 
25562 //----------SMARTSPILL RULES---------------------------------------------------
25563 // These must follow all instruction definitions as they use the names
25564 // defined in the instructions definitions.