1 //
    2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }
 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;
 1678 }
 1679 
 1680 // This could be in MacroAssembler but it's fairly C2 specific
 1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1682   Label exit;
 1683   __ jccb(Assembler::noParity, exit);
 1684   __ pushf();
 1685   //
 1686   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1687   // zero OF,AF,SF for NaN values.
 1688   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1689   // values returns 'less than' result (CF is set).
 1690   // Leave the rest of flags unchanged.
 1691   //
 1692   //    7 6 5 4 3 2 1 0
 1693   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1694   //    0 0 1 0 1 0 1 1   (0x2B)
 1695   //
 1696   __ andq(Address(rsp, 0), 0xffffff2b);
 1697   __ popf();
 1698   __ bind(exit);
 1699 }
 1700 
 1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1702   Label done;
 1703   __ movl(dst, -1);
 1704   __ jcc(Assembler::parity, done);
 1705   __ jcc(Assembler::below, done);
 1706   __ setcc(Assembler::notEqual, dst);
 1707   __ bind(done);
 1708 }
 1709 
 1710 // Math.min()    # Math.max()
 1711 // --------------------------
 1712 // ucomis[s/d]   #
 1713 // ja   -> b     # a
 1714 // jp   -> NaN   # NaN
 1715 // jb   -> a     # b
 1716 // je            #
 1717 // |-jz -> a | b # a & b
 1718 // |    -> a     #
 1719 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1720                             XMMRegister a, XMMRegister b,
 1721                             XMMRegister xmmt, Register rt,
 1722                             bool min, bool single) {
 1723 
 1724   Label nan, zero, below, above, done;
 1725 
 1726   if (single)
 1727     __ ucomiss(a, b);
 1728   else
 1729     __ ucomisd(a, b);
 1730 
 1731   if (dst->encoding() != (min ? b : a)->encoding())
 1732     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1733   else
 1734     __ jccb(Assembler::above, done);
 1735 
 1736   __ jccb(Assembler::parity, nan);  // PF=1
 1737   __ jccb(Assembler::below, below); // CF=1
 1738 
 1739   // equal
 1740   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
 1741   if (single) {
 1742     __ ucomiss(a, xmmt);
 1743     __ jccb(Assembler::equal, zero);
 1744 
 1745     __ movflt(dst, a);
 1746     __ jmp(done);
 1747   }
 1748   else {
 1749     __ ucomisd(a, xmmt);
 1750     __ jccb(Assembler::equal, zero);
 1751 
 1752     __ movdbl(dst, a);
 1753     __ jmp(done);
 1754   }
 1755 
 1756   __ bind(zero);
 1757   if (min)
 1758     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1759   else
 1760     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1761 
 1762   __ jmp(done);
 1763 
 1764   __ bind(above);
 1765   if (single)
 1766     __ movflt(dst, min ? b : a);
 1767   else
 1768     __ movdbl(dst, min ? b : a);
 1769 
 1770   __ jmp(done);
 1771 
 1772   __ bind(nan);
 1773   if (single) {
 1774     __ movl(rt, 0x7fc00000); // Float.NaN
 1775     __ movdl(dst, rt);
 1776   }
 1777   else {
 1778     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1779     __ movdq(dst, rt);
 1780   }
 1781   __ jmp(done);
 1782 
 1783   __ bind(below);
 1784   if (single)
 1785     __ movflt(dst, min ? a : b);
 1786   else
 1787     __ movdbl(dst, min ? a : b);
 1788 
 1789   __ bind(done);
 1790 }
 1791 
 1792 //=============================================================================
 1793 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1794 
 1795 int ConstantTable::calculate_table_base_offset() const {
 1796   return 0;  // absolute addressing, no offset
 1797 }
 1798 
 1799 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1800 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1801   ShouldNotReachHere();
 1802 }
 1803 
 1804 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1805   // Empty encoding
 1806 }
 1807 
 1808 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1809   return 0;
 1810 }
 1811 
 1812 #ifndef PRODUCT
 1813 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1814   st->print("# MachConstantBaseNode (empty encoding)");
 1815 }
 1816 #endif
 1817 
 1818 
 1819 //=============================================================================
 1820 #ifndef PRODUCT
 1821 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1822   Compile* C = ra_->C;
 1823 
 1824   int framesize = C->output()->frame_size_in_bytes();
 1825   int bangsize = C->output()->bang_size_in_bytes();
 1826   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1827   // Remove wordSize for return addr which is already pushed.
 1828   framesize -= wordSize;
 1829 
 1830   if (C->output()->need_stack_bang(bangsize)) {
 1831     framesize -= wordSize;
 1832     st->print("# stack bang (%d bytes)", bangsize);
 1833     st->print("\n\t");
 1834     st->print("pushq   rbp\t# Save rbp");
 1835     if (PreserveFramePointer) {
 1836         st->print("\n\t");
 1837         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1838     }
 1839     if (framesize) {
 1840       st->print("\n\t");
 1841       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1842     }
 1843   } else {
 1844     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1845     st->print("\n\t");
 1846     framesize -= wordSize;
 1847     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1848     if (PreserveFramePointer) {
 1849       st->print("\n\t");
 1850       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1851       if (framesize > 0) {
 1852         st->print("\n\t");
 1853         st->print("addq    rbp, #%d", framesize);
 1854       }
 1855     }
 1856   }
 1857 
 1858   if (VerifyStackAtCalls) {
 1859     st->print("\n\t");
 1860     framesize -= wordSize;
 1861     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1862 #ifdef ASSERT
 1863     st->print("\n\t");
 1864     st->print("# stack alignment check");
 1865 #endif
 1866   }
 1867   if (C->stub_function() != nullptr) {
 1868     st->print("\n\t");
 1869     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1870     st->print("\n\t");
 1871     st->print("je      fast_entry\t");
 1872     st->print("\n\t");
 1873     st->print("call    #nmethod_entry_barrier_stub\t");
 1874     st->print("\n\tfast_entry:");
 1875   }
 1876   st->cr();
 1877 }
 1878 #endif
 1879 
 1880 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1881   Compile* C = ra_->C;
 1882 
 1883   int framesize = C->output()->frame_size_in_bytes();
 1884   int bangsize = C->output()->bang_size_in_bytes();
 1885 
 1886   if (C->clinit_barrier_on_entry()) {
 1887     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1888     assert(!C->method()->holder()->is_not_initialized() || C->do_clinit_barriers(), "initialization should have been started");
 1889 
 1890     Label L_skip_barrier;
 1891     Register klass = rscratch1;
 1892 
 1893     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1894     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1895 
 1896     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1897 
 1898     __ bind(L_skip_barrier);
 1899   }
 1900 
 1901   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
 1902 
 1903   C->output()->set_frame_complete(__ offset());
 1904 
 1905   if (C->has_mach_constant_base_node()) {
 1906     // NOTE: We set the table base offset here because users might be
 1907     // emitted before MachConstantBaseNode.
 1908     ConstantTable& constant_table = C->output()->constant_table();
 1909     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1910   }
 1911 }
 1912 
 1913 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1914 {
 1915   return MachNode::size(ra_); // too many variables; just compute it
 1916                               // the hard way
 1917 }
 1918 
 1919 int MachPrologNode::reloc() const
 1920 {
 1921   return 0; // a large enough number
 1922 }
 1923 
 1924 //=============================================================================
 1925 #ifndef PRODUCT
 1926 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1927 {
 1928   Compile* C = ra_->C;
 1929   if (generate_vzeroupper(C)) {
 1930     st->print("vzeroupper");
 1931     st->cr(); st->print("\t");
 1932   }
 1933 
 1934   int framesize = C->output()->frame_size_in_bytes();
 1935   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1936   // Remove word for return adr already pushed
 1937   // and RBP
 1938   framesize -= 2*wordSize;
 1939 
 1940   if (framesize) {
 1941     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1942     st->print("\t");
 1943   }
 1944 
 1945   st->print_cr("popq    rbp");
 1946   if (do_polling() && C->is_method_compilation()) {
 1947     st->print("\t");
 1948     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1949                  "ja      #safepoint_stub\t"
 1950                  "# Safepoint: poll for GC");
 1951   }
 1952 }
 1953 #endif
 1954 
 1955 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1956 {
 1957   Compile* C = ra_->C;
 1958 
 1959   if (generate_vzeroupper(C)) {
 1960     // Clear upper bits of YMM registers when current compiled code uses
 1961     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1962     __ vzeroupper();
 1963   }
 1964 
 1965   int framesize = C->output()->frame_size_in_bytes();
 1966   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1967   // Remove word for return adr already pushed
 1968   // and RBP
 1969   framesize -= 2*wordSize;
 1970 
 1971   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1972 
 1973   if (framesize) {
 1974     __ addq(rsp, framesize);
 1975   }
 1976 
 1977   __ popq(rbp);
 1978 
 1979   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1980     __ reserved_stack_check();
 1981   }
 1982 
 1983   if (do_polling() && C->is_method_compilation()) {
 1984     Label dummy_label;
 1985     Label* code_stub = &dummy_label;
 1986     if (!C->output()->in_scratch_emit_size()) {
 1987       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1988       C->output()->add_stub(stub);
 1989       code_stub = &stub->entry();
 1990     }
 1991     __ relocate(relocInfo::poll_return_type);
 1992     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1993   }
 1994 }
 1995 
 1996 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1997 {
 1998   return MachNode::size(ra_); // too many variables; just compute it
 1999                               // the hard way
 2000 }
 2001 
 2002 int MachEpilogNode::reloc() const
 2003 {
 2004   return 2; // a large enough number
 2005 }
 2006 
 2007 const Pipeline* MachEpilogNode::pipeline() const
 2008 {
 2009   return MachNode::pipeline_class();
 2010 }
 2011 
 2012 //=============================================================================
 2013 
 2014 enum RC {
 2015   rc_bad,
 2016   rc_int,
 2017   rc_kreg,
 2018   rc_float,
 2019   rc_stack
 2020 };
 2021 
 2022 static enum RC rc_class(OptoReg::Name reg)
 2023 {
 2024   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 2025 
 2026   if (OptoReg::is_stack(reg)) return rc_stack;
 2027 
 2028   VMReg r = OptoReg::as_VMReg(reg);
 2029 
 2030   if (r->is_Register()) return rc_int;
 2031 
 2032   if (r->is_KRegister()) return rc_kreg;
 2033 
 2034   assert(r->is_XMMRegister(), "must be");
 2035   return rc_float;
 2036 }
 2037 
 2038 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2039 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2040                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2041 
 2042 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2043                      int stack_offset, int reg, uint ireg, outputStream* st);
 2044 
 2045 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2046                                       int dst_offset, uint ireg, outputStream* st) {
 2047   if (masm) {
 2048     switch (ireg) {
 2049     case Op_VecS:
 2050       __ movq(Address(rsp, -8), rax);
 2051       __ movl(rax, Address(rsp, src_offset));
 2052       __ movl(Address(rsp, dst_offset), rax);
 2053       __ movq(rax, Address(rsp, -8));
 2054       break;
 2055     case Op_VecD:
 2056       __ pushq(Address(rsp, src_offset));
 2057       __ popq (Address(rsp, dst_offset));
 2058       break;
 2059     case Op_VecX:
 2060       __ pushq(Address(rsp, src_offset));
 2061       __ popq (Address(rsp, dst_offset));
 2062       __ pushq(Address(rsp, src_offset+8));
 2063       __ popq (Address(rsp, dst_offset+8));
 2064       break;
 2065     case Op_VecY:
 2066       __ vmovdqu(Address(rsp, -32), xmm0);
 2067       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2068       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2069       __ vmovdqu(xmm0, Address(rsp, -32));
 2070       break;
 2071     case Op_VecZ:
 2072       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2073       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2074       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2075       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2076       break;
 2077     default:
 2078       ShouldNotReachHere();
 2079     }
 2080 #ifndef PRODUCT
 2081   } else {
 2082     switch (ireg) {
 2083     case Op_VecS:
 2084       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2085                 "movl    rax, [rsp + #%d]\n\t"
 2086                 "movl    [rsp + #%d], rax\n\t"
 2087                 "movq    rax, [rsp - #8]",
 2088                 src_offset, dst_offset);
 2089       break;
 2090     case Op_VecD:
 2091       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2092                 "popq    [rsp + #%d]",
 2093                 src_offset, dst_offset);
 2094       break;
 2095      case Op_VecX:
 2096       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2097                 "popq    [rsp + #%d]\n\t"
 2098                 "pushq   [rsp + #%d]\n\t"
 2099                 "popq    [rsp + #%d]",
 2100                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2101       break;
 2102     case Op_VecY:
 2103       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2104                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2105                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2106                 "vmovdqu xmm0, [rsp - #32]",
 2107                 src_offset, dst_offset);
 2108       break;
 2109     case Op_VecZ:
 2110       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2111                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2112                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2113                 "vmovdqu xmm0, [rsp - #64]",
 2114                 src_offset, dst_offset);
 2115       break;
 2116     default:
 2117       ShouldNotReachHere();
 2118     }
 2119 #endif
 2120   }
 2121 }
 2122 
 2123 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2124                                        PhaseRegAlloc* ra_,
 2125                                        bool do_size,
 2126                                        outputStream* st) const {
 2127   assert(masm != nullptr || st  != nullptr, "sanity");
 2128   // Get registers to move
 2129   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2130   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2131   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2132   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2133 
 2134   enum RC src_second_rc = rc_class(src_second);
 2135   enum RC src_first_rc = rc_class(src_first);
 2136   enum RC dst_second_rc = rc_class(dst_second);
 2137   enum RC dst_first_rc = rc_class(dst_first);
 2138 
 2139   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2140          "must move at least 1 register" );
 2141 
 2142   if (src_first == dst_first && src_second == dst_second) {
 2143     // Self copy, no move
 2144     return 0;
 2145   }
 2146   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 2147     uint ireg = ideal_reg();
 2148     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2149     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2150     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2151       // mem -> mem
 2152       int src_offset = ra_->reg2offset(src_first);
 2153       int dst_offset = ra_->reg2offset(dst_first);
 2154       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2155     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2156       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2157     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2158       int stack_offset = ra_->reg2offset(dst_first);
 2159       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2160     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2161       int stack_offset = ra_->reg2offset(src_first);
 2162       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2163     } else {
 2164       ShouldNotReachHere();
 2165     }
 2166     return 0;
 2167   }
 2168   if (src_first_rc == rc_stack) {
 2169     // mem ->
 2170     if (dst_first_rc == rc_stack) {
 2171       // mem -> mem
 2172       assert(src_second != dst_first, "overlap");
 2173       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2174           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2175         // 64-bit
 2176         int src_offset = ra_->reg2offset(src_first);
 2177         int dst_offset = ra_->reg2offset(dst_first);
 2178         if (masm) {
 2179           __ pushq(Address(rsp, src_offset));
 2180           __ popq (Address(rsp, dst_offset));
 2181 #ifndef PRODUCT
 2182         } else {
 2183           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2184                     "popq    [rsp + #%d]",
 2185                      src_offset, dst_offset);
 2186 #endif
 2187         }
 2188       } else {
 2189         // 32-bit
 2190         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2191         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2192         // No pushl/popl, so:
 2193         int src_offset = ra_->reg2offset(src_first);
 2194         int dst_offset = ra_->reg2offset(dst_first);
 2195         if (masm) {
 2196           __ movq(Address(rsp, -8), rax);
 2197           __ movl(rax, Address(rsp, src_offset));
 2198           __ movl(Address(rsp, dst_offset), rax);
 2199           __ movq(rax, Address(rsp, -8));
 2200 #ifndef PRODUCT
 2201         } else {
 2202           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2203                     "movl    rax, [rsp + #%d]\n\t"
 2204                     "movl    [rsp + #%d], rax\n\t"
 2205                     "movq    rax, [rsp - #8]",
 2206                      src_offset, dst_offset);
 2207 #endif
 2208         }
 2209       }
 2210       return 0;
 2211     } else if (dst_first_rc == rc_int) {
 2212       // mem -> gpr
 2213       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2214           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2215         // 64-bit
 2216         int offset = ra_->reg2offset(src_first);
 2217         if (masm) {
 2218           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2219 #ifndef PRODUCT
 2220         } else {
 2221           st->print("movq    %s, [rsp + #%d]\t# spill",
 2222                      Matcher::regName[dst_first],
 2223                      offset);
 2224 #endif
 2225         }
 2226       } else {
 2227         // 32-bit
 2228         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2229         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2230         int offset = ra_->reg2offset(src_first);
 2231         if (masm) {
 2232           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2233 #ifndef PRODUCT
 2234         } else {
 2235           st->print("movl    %s, [rsp + #%d]\t# spill",
 2236                      Matcher::regName[dst_first],
 2237                      offset);
 2238 #endif
 2239         }
 2240       }
 2241       return 0;
 2242     } else if (dst_first_rc == rc_float) {
 2243       // mem-> xmm
 2244       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2245           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2246         // 64-bit
 2247         int offset = ra_->reg2offset(src_first);
 2248         if (masm) {
 2249           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2250 #ifndef PRODUCT
 2251         } else {
 2252           st->print("%s  %s, [rsp + #%d]\t# spill",
 2253                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2254                      Matcher::regName[dst_first],
 2255                      offset);
 2256 #endif
 2257         }
 2258       } else {
 2259         // 32-bit
 2260         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2261         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2262         int offset = ra_->reg2offset(src_first);
 2263         if (masm) {
 2264           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2265 #ifndef PRODUCT
 2266         } else {
 2267           st->print("movss   %s, [rsp + #%d]\t# spill",
 2268                      Matcher::regName[dst_first],
 2269                      offset);
 2270 #endif
 2271         }
 2272       }
 2273       return 0;
 2274     } else if (dst_first_rc == rc_kreg) {
 2275       // mem -> kreg
 2276       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2277           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2278         // 64-bit
 2279         int offset = ra_->reg2offset(src_first);
 2280         if (masm) {
 2281           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2282 #ifndef PRODUCT
 2283         } else {
 2284           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2285                      Matcher::regName[dst_first],
 2286                      offset);
 2287 #endif
 2288         }
 2289       }
 2290       return 0;
 2291     }
 2292   } else if (src_first_rc == rc_int) {
 2293     // gpr ->
 2294     if (dst_first_rc == rc_stack) {
 2295       // gpr -> mem
 2296       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2297           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2298         // 64-bit
 2299         int offset = ra_->reg2offset(dst_first);
 2300         if (masm) {
 2301           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2302 #ifndef PRODUCT
 2303         } else {
 2304           st->print("movq    [rsp + #%d], %s\t# spill",
 2305                      offset,
 2306                      Matcher::regName[src_first]);
 2307 #endif
 2308         }
 2309       } else {
 2310         // 32-bit
 2311         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2312         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2313         int offset = ra_->reg2offset(dst_first);
 2314         if (masm) {
 2315           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2316 #ifndef PRODUCT
 2317         } else {
 2318           st->print("movl    [rsp + #%d], %s\t# spill",
 2319                      offset,
 2320                      Matcher::regName[src_first]);
 2321 #endif
 2322         }
 2323       }
 2324       return 0;
 2325     } else if (dst_first_rc == rc_int) {
 2326       // gpr -> gpr
 2327       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2328           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2329         // 64-bit
 2330         if (masm) {
 2331           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2332                   as_Register(Matcher::_regEncode[src_first]));
 2333 #ifndef PRODUCT
 2334         } else {
 2335           st->print("movq    %s, %s\t# spill",
 2336                      Matcher::regName[dst_first],
 2337                      Matcher::regName[src_first]);
 2338 #endif
 2339         }
 2340         return 0;
 2341       } else {
 2342         // 32-bit
 2343         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2344         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2345         if (masm) {
 2346           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2347                   as_Register(Matcher::_regEncode[src_first]));
 2348 #ifndef PRODUCT
 2349         } else {
 2350           st->print("movl    %s, %s\t# spill",
 2351                      Matcher::regName[dst_first],
 2352                      Matcher::regName[src_first]);
 2353 #endif
 2354         }
 2355         return 0;
 2356       }
 2357     } else if (dst_first_rc == rc_float) {
 2358       // gpr -> xmm
 2359       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2360           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2361         // 64-bit
 2362         if (masm) {
 2363           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2364 #ifndef PRODUCT
 2365         } else {
 2366           st->print("movdq   %s, %s\t# spill",
 2367                      Matcher::regName[dst_first],
 2368                      Matcher::regName[src_first]);
 2369 #endif
 2370         }
 2371       } else {
 2372         // 32-bit
 2373         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2374         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2375         if (masm) {
 2376           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2377 #ifndef PRODUCT
 2378         } else {
 2379           st->print("movdl   %s, %s\t# spill",
 2380                      Matcher::regName[dst_first],
 2381                      Matcher::regName[src_first]);
 2382 #endif
 2383         }
 2384       }
 2385       return 0;
 2386     } else if (dst_first_rc == rc_kreg) {
 2387       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2388           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2389         // 64-bit
 2390         if (masm) {
 2391           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2392   #ifndef PRODUCT
 2393         } else {
 2394            st->print("kmovq   %s, %s\t# spill",
 2395                        Matcher::regName[dst_first],
 2396                        Matcher::regName[src_first]);
 2397   #endif
 2398         }
 2399       }
 2400       Unimplemented();
 2401       return 0;
 2402     }
 2403   } else if (src_first_rc == rc_float) {
 2404     // xmm ->
 2405     if (dst_first_rc == rc_stack) {
 2406       // xmm -> mem
 2407       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2408           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2409         // 64-bit
 2410         int offset = ra_->reg2offset(dst_first);
 2411         if (masm) {
 2412           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2413 #ifndef PRODUCT
 2414         } else {
 2415           st->print("movsd   [rsp + #%d], %s\t# spill",
 2416                      offset,
 2417                      Matcher::regName[src_first]);
 2418 #endif
 2419         }
 2420       } else {
 2421         // 32-bit
 2422         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2423         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2424         int offset = ra_->reg2offset(dst_first);
 2425         if (masm) {
 2426           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2427 #ifndef PRODUCT
 2428         } else {
 2429           st->print("movss   [rsp + #%d], %s\t# spill",
 2430                      offset,
 2431                      Matcher::regName[src_first]);
 2432 #endif
 2433         }
 2434       }
 2435       return 0;
 2436     } else if (dst_first_rc == rc_int) {
 2437       // xmm -> gpr
 2438       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2439           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2440         // 64-bit
 2441         if (masm) {
 2442           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2443 #ifndef PRODUCT
 2444         } else {
 2445           st->print("movdq   %s, %s\t# spill",
 2446                      Matcher::regName[dst_first],
 2447                      Matcher::regName[src_first]);
 2448 #endif
 2449         }
 2450       } else {
 2451         // 32-bit
 2452         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2453         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2454         if (masm) {
 2455           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2456 #ifndef PRODUCT
 2457         } else {
 2458           st->print("movdl   %s, %s\t# spill",
 2459                      Matcher::regName[dst_first],
 2460                      Matcher::regName[src_first]);
 2461 #endif
 2462         }
 2463       }
 2464       return 0;
 2465     } else if (dst_first_rc == rc_float) {
 2466       // xmm -> xmm
 2467       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2468           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2469         // 64-bit
 2470         if (masm) {
 2471           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2472 #ifndef PRODUCT
 2473         } else {
 2474           st->print("%s  %s, %s\t# spill",
 2475                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2476                      Matcher::regName[dst_first],
 2477                      Matcher::regName[src_first]);
 2478 #endif
 2479         }
 2480       } else {
 2481         // 32-bit
 2482         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2483         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2484         if (masm) {
 2485           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2486 #ifndef PRODUCT
 2487         } else {
 2488           st->print("%s  %s, %s\t# spill",
 2489                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2490                      Matcher::regName[dst_first],
 2491                      Matcher::regName[src_first]);
 2492 #endif
 2493         }
 2494       }
 2495       return 0;
 2496     } else if (dst_first_rc == rc_kreg) {
 2497       assert(false, "Illegal spilling");
 2498       return 0;
 2499     }
 2500   } else if (src_first_rc == rc_kreg) {
 2501     if (dst_first_rc == rc_stack) {
 2502       // mem -> kreg
 2503       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2504           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2505         // 64-bit
 2506         int offset = ra_->reg2offset(dst_first);
 2507         if (masm) {
 2508           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2509 #ifndef PRODUCT
 2510         } else {
 2511           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2512                      offset,
 2513                      Matcher::regName[src_first]);
 2514 #endif
 2515         }
 2516       }
 2517       return 0;
 2518     } else if (dst_first_rc == rc_int) {
 2519       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2520           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2521         // 64-bit
 2522         if (masm) {
 2523           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2524 #ifndef PRODUCT
 2525         } else {
 2526          st->print("kmovq   %s, %s\t# spill",
 2527                      Matcher::regName[dst_first],
 2528                      Matcher::regName[src_first]);
 2529 #endif
 2530         }
 2531       }
 2532       Unimplemented();
 2533       return 0;
 2534     } else if (dst_first_rc == rc_kreg) {
 2535       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2536           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2537         // 64-bit
 2538         if (masm) {
 2539           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2540 #ifndef PRODUCT
 2541         } else {
 2542          st->print("kmovq   %s, %s\t# spill",
 2543                      Matcher::regName[dst_first],
 2544                      Matcher::regName[src_first]);
 2545 #endif
 2546         }
 2547       }
 2548       return 0;
 2549     } else if (dst_first_rc == rc_float) {
 2550       assert(false, "Illegal spill");
 2551       return 0;
 2552     }
 2553   }
 2554 
 2555   assert(0," foo ");
 2556   Unimplemented();
 2557   return 0;
 2558 }
 2559 
 2560 #ifndef PRODUCT
 2561 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2562   implementation(nullptr, ra_, false, st);
 2563 }
 2564 #endif
 2565 
 2566 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2567   implementation(masm, ra_, false, nullptr);
 2568 }
 2569 
 2570 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2571   return MachNode::size(ra_);
 2572 }
 2573 
 2574 //=============================================================================
 2575 #ifndef PRODUCT
 2576 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2577 {
 2578   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2579   int reg = ra_->get_reg_first(this);
 2580   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2581             Matcher::regName[reg], offset);
 2582 }
 2583 #endif
 2584 
 2585 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2586 {
 2587   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2588   int reg = ra_->get_encode(this);
 2589 
 2590   __ lea(as_Register(reg), Address(rsp, offset));
 2591 }
 2592 
 2593 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2594 {
 2595   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2596   if (ra_->get_encode(this) > 15) {
 2597     return (offset < 0x80) ? 6 : 9; // REX2
 2598   } else {
 2599     return (offset < 0x80) ? 5 : 8; // REX
 2600   }
 2601 }
 2602 
 2603 //=============================================================================
 2604 #ifndef PRODUCT
 2605 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2606 {
 2607   if (UseCompressedClassPointers) {
 2608     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2609     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2610   } else {
 2611     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2612     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2613   }
 2614   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2615 }
 2616 #endif
 2617 
 2618 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2619 {
 2620   __ ic_check(InteriorEntryAlignment);
 2621 }
 2622 
 2623 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2624 {
 2625   return MachNode::size(ra_); // too many variables; just compute it
 2626                               // the hard way
 2627 }
 2628 
 2629 
 2630 //=============================================================================
 2631 
 2632 bool Matcher::supports_vector_calling_convention(void) {
 2633   return EnableVectorSupport;
 2634 }
 2635 
 2636 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2637   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2638 }
 2639 
 2640 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2641   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2642 }
 2643 
 2644 #ifdef ASSERT
 2645 static bool is_ndd_demotable(const MachNode* mdef) {
 2646   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2647 }
 2648 #endif
 2649 
 2650 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
 2651                                             int oper_index) {
 2652   if (mdef == nullptr) {
 2653     return false;
 2654   }
 2655 
 2656   if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
 2657       mdef->in(mdef->operand_index(oper_index)) == nullptr) {
 2658     assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
 2659     assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
 2660     return false;
 2661   }
 2662 
 2663   // Complex memory operand covers multiple incoming edges needed for
 2664   // address computation. Biasing def towards any address component will not
 2665   // result in NDD demotion by assembler.
 2666   if (mdef->operand_num_edges(oper_index) != 1) {
 2667     return false;
 2668   }
 2669 
 2670   // Demotion candidate must be register mask compatible with definition.
 2671   const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
 2672   if (!oper_mask.overlap(mdef->out_RegMask())) {
 2673     assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
 2674     return false;
 2675   }
 2676 
 2677   switch (oper_index) {
 2678   // First operand of MachNode corresponding to Intel APX NDD selection
 2679   // pattern can share its assigned register with definition operand if
 2680   // their live ranges do not overlap. In such a scenario we can demote
 2681   // it to legacy map0/map1 instruction by replacing its 4-byte extended
 2682   // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
 2683   // are decorated with a special flag by instruction selector.
 2684   case 1:
 2685     return is_ndd_demotable_opr1(mdef);
 2686 
 2687   // Definition operand of commutative operation can be biased towards second
 2688   // operand.
 2689   case 2:
 2690     return is_ndd_demotable_opr2(mdef);
 2691 
 2692   // Current scheme only selects up to two biasing candidates
 2693   default:
 2694     assert(false, "unhandled operand index: %s", mdef->Name());
 2695     break;
 2696   }
 2697 
 2698   return false;
 2699 }
 2700 
 2701 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2702   assert(EnableVectorSupport, "sanity");
 2703   int lo = XMM0_num;
 2704   int hi = XMM0b_num;
 2705   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2706   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2707   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2708   return OptoRegPair(hi, lo);
 2709 }
 2710 
 2711 // Is this branch offset short enough that a short branch can be used?
 2712 //
 2713 // NOTE: If the platform does not provide any short branch variants, then
 2714 //       this method should return false for offset 0.
 2715 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2716   // The passed offset is relative to address of the branch.
 2717   // On 86 a branch displacement is calculated relative to address
 2718   // of a next instruction.
 2719   offset -= br_size;
 2720 
 2721   // the short version of jmpConUCF2 contains multiple branches,
 2722   // making the reach slightly less
 2723   if (rule == jmpConUCF2_rule)
 2724     return (-126 <= offset && offset <= 125);
 2725   return (-128 <= offset && offset <= 127);
 2726 }
 2727 
 2728 // Return whether or not this register is ever used as an argument.
 2729 // This function is used on startup to build the trampoline stubs in
 2730 // generateOptoStub.  Registers not mentioned will be killed by the VM
 2731 // call in the trampoline, and arguments in those registers not be
 2732 // available to the callee.
 2733 bool Matcher::can_be_java_arg(int reg)
 2734 {
 2735   return
 2736     reg ==  RDI_num || reg == RDI_H_num ||
 2737     reg ==  RSI_num || reg == RSI_H_num ||
 2738     reg ==  RDX_num || reg == RDX_H_num ||
 2739     reg ==  RCX_num || reg == RCX_H_num ||
 2740     reg ==   R8_num || reg ==  R8_H_num ||
 2741     reg ==   R9_num || reg ==  R9_H_num ||
 2742     reg ==  R12_num || reg == R12_H_num ||
 2743     reg == XMM0_num || reg == XMM0b_num ||
 2744     reg == XMM1_num || reg == XMM1b_num ||
 2745     reg == XMM2_num || reg == XMM2b_num ||
 2746     reg == XMM3_num || reg == XMM3b_num ||
 2747     reg == XMM4_num || reg == XMM4b_num ||
 2748     reg == XMM5_num || reg == XMM5b_num ||
 2749     reg == XMM6_num || reg == XMM6b_num ||
 2750     reg == XMM7_num || reg == XMM7b_num;
 2751 }
 2752 
 2753 bool Matcher::is_spillable_arg(int reg)
 2754 {
 2755   return can_be_java_arg(reg);
 2756 }
 2757 
 2758 uint Matcher::int_pressure_limit()
 2759 {
 2760   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2761 }
 2762 
 2763 uint Matcher::float_pressure_limit()
 2764 {
 2765   // After experiment around with different values, the following default threshold
 2766   // works best for LCM's register pressure scheduling on x64.
 2767   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2768   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2769   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2770 }
 2771 
 2772 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 2773   // In 64 bit mode a code which use multiply when
 2774   // devisor is constant is faster than hardware
 2775   // DIV instruction (it uses MulHiL).
 2776   return false;
 2777 }
 2778 
 2779 // Register for DIVI projection of divmodI
 2780 const RegMask& Matcher::divI_proj_mask() {
 2781   return INT_RAX_REG_mask();
 2782 }
 2783 
 2784 // Register for MODI projection of divmodI
 2785 const RegMask& Matcher::modI_proj_mask() {
 2786   return INT_RDX_REG_mask();
 2787 }
 2788 
 2789 // Register for DIVL projection of divmodL
 2790 const RegMask& Matcher::divL_proj_mask() {
 2791   return LONG_RAX_REG_mask();
 2792 }
 2793 
 2794 // Register for MODL projection of divmodL
 2795 const RegMask& Matcher::modL_proj_mask() {
 2796   return LONG_RDX_REG_mask();
 2797 }
 2798 
 2799 %}
 2800 
 2801 source_hpp %{
 2802 // Header information of the source block.
 2803 // Method declarations/definitions which are used outside
 2804 // the ad-scope can conveniently be defined here.
 2805 //
 2806 // To keep related declarations/definitions/uses close together,
 2807 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2808 
 2809 #include "runtime/vm_version.hpp"
 2810 
 2811 class NativeJump;
 2812 
 2813 class CallStubImpl {
 2814 
 2815   //--------------------------------------------------------------
 2816   //---<  Used for optimization in Compile::shorten_branches  >---
 2817   //--------------------------------------------------------------
 2818 
 2819  public:
 2820   // Size of call trampoline stub.
 2821   static uint size_call_trampoline() {
 2822     return 0; // no call trampolines on this platform
 2823   }
 2824 
 2825   // number of relocations needed by a call trampoline stub
 2826   static uint reloc_call_trampoline() {
 2827     return 0; // no call trampolines on this platform
 2828   }
 2829 };
 2830 
 2831 class HandlerImpl {
 2832 
 2833  public:
 2834 
 2835   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2836 
 2837   static uint size_deopt_handler() {
 2838     // one call and one jmp.
 2839     return 7;
 2840   }
 2841 };
 2842 
 2843 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2844   switch(bytes) {
 2845     case  4: // fall-through
 2846     case  8: // fall-through
 2847     case 16: return Assembler::AVX_128bit;
 2848     case 32: return Assembler::AVX_256bit;
 2849     case 64: return Assembler::AVX_512bit;
 2850 
 2851     default: {
 2852       ShouldNotReachHere();
 2853       return Assembler::AVX_NoVec;
 2854     }
 2855   }
 2856 }
 2857 
 2858 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2859   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2860 }
 2861 
 2862 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2863   uint def_idx = use->operand_index(opnd);
 2864   Node* def = use->in(def_idx);
 2865   return vector_length_encoding(def);
 2866 }
 2867 
 2868 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2869   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2870          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2871 }
 2872 
 2873 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2874   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2875            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2876 }
 2877 
 2878 class Node::PD {
 2879 public:
 2880   enum NodeFlags : uint64_t {
 2881     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2882     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2883     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2884     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2885     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2886     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2887     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2888     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2889     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2890     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2891     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2892     Flag_ndd_demotable_opr1   = Node::_last_flag << 12,
 2893     Flag_ndd_demotable_opr2   = Node::_last_flag << 13,
 2894     _last_flag                = Flag_ndd_demotable_opr2
 2895   };
 2896 };
 2897 
 2898 %} // end source_hpp
 2899 
 2900 source %{
 2901 
 2902 #include "opto/addnode.hpp"
 2903 #include "c2_intelJccErratum_x86.hpp"
 2904 
 2905 void PhaseOutput::pd_perform_mach_node_analysis() {
 2906   if (VM_Version::has_intel_jcc_erratum()) {
 2907     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2908     _buf_sizes._code += extra_padding;
 2909   }
 2910 }
 2911 
 2912 int MachNode::pd_alignment_required() const {
 2913   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2914     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2915     return IntelJccErratum::largest_jcc_size() + 1;
 2916   } else {
 2917     return 1;
 2918   }
 2919 }
 2920 
 2921 int MachNode::compute_padding(int current_offset) const {
 2922   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2923     Compile* C = Compile::current();
 2924     PhaseOutput* output = C->output();
 2925     Block* block = output->block();
 2926     int index = output->index();
 2927     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2928   } else {
 2929     return 0;
 2930   }
 2931 }
 2932 
 2933 // Emit deopt handler code.
 2934 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2935 
 2936   // Note that the code buffer's insts_mark is always relative to insts.
 2937   // That's why we must use the macroassembler to generate a handler.
 2938   address base = __ start_a_stub(size_deopt_handler());
 2939   if (base == nullptr) {
 2940     ciEnv::current()->record_failure("CodeCache is full");
 2941     return 0;  // CodeBuffer::expand failed
 2942   }
 2943   int offset = __ offset();
 2944 
 2945   Label start;
 2946   __ bind(start);
 2947 
 2948   __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2949 
 2950   int entry_offset = __ offset();
 2951 
 2952   __ jmp(start);
 2953 
 2954   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2955   assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
 2956          "out of bounds read in post-call NOP check");
 2957   __ end_a_stub();
 2958   return entry_offset;
 2959 }
 2960 
 2961 static Assembler::Width widthForType(BasicType bt) {
 2962   if (bt == T_BYTE) {
 2963     return Assembler::B;
 2964   } else if (bt == T_SHORT) {
 2965     return Assembler::W;
 2966   } else if (bt == T_INT) {
 2967     return Assembler::D;
 2968   } else {
 2969     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2970     return Assembler::Q;
 2971   }
 2972 }
 2973 
 2974 //=============================================================================
 2975 
 2976   // Float masks come from different places depending on platform.
 2977   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2978   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2979   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2980   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2981   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2982   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2983   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2984   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2985   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2986   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2987   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2988   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 2989   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 2990   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 2991   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 2992   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 2993   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 2994   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 2995   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 2996 
 2997 //=============================================================================
 2998 bool Matcher::match_rule_supported(int opcode) {
 2999   if (!has_match_rule(opcode)) {
 3000     return false; // no match rule present
 3001   }
 3002   switch (opcode) {
 3003     case Op_AbsVL:
 3004     case Op_StoreVectorScatter:
 3005       if (UseAVX < 3) {
 3006         return false;
 3007       }
 3008       break;
 3009     case Op_PopCountI:
 3010     case Op_PopCountL:
 3011       if (!UsePopCountInstruction) {
 3012         return false;
 3013       }
 3014       break;
 3015     case Op_PopCountVI:
 3016       if (UseAVX < 2) {
 3017         return false;
 3018       }
 3019       break;
 3020     case Op_CompressV:
 3021     case Op_ExpandV:
 3022     case Op_PopCountVL:
 3023       if (UseAVX < 2) {
 3024         return false;
 3025       }
 3026       break;
 3027     case Op_MulVI:
 3028       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 3029         return false;
 3030       }
 3031       break;
 3032     case Op_MulVL:
 3033       if (UseSSE < 4) { // only with SSE4_1 or AVX
 3034         return false;
 3035       }
 3036       break;
 3037     case Op_MulReductionVL:
 3038       if (VM_Version::supports_avx512dq() == false) {
 3039         return false;
 3040       }
 3041       break;
 3042     case Op_AbsVB:
 3043     case Op_AbsVS:
 3044     case Op_AbsVI:
 3045     case Op_AddReductionVI:
 3046     case Op_AndReductionV:
 3047     case Op_OrReductionV:
 3048     case Op_XorReductionV:
 3049       if (UseSSE < 3) { // requires at least SSSE3
 3050         return false;
 3051       }
 3052       break;
 3053     case Op_MaxHF:
 3054     case Op_MinHF:
 3055       if (!VM_Version::supports_avx512vlbw()) {
 3056         return false;
 3057       }  // fallthrough
 3058     case Op_AddHF:
 3059     case Op_DivHF:
 3060     case Op_FmaHF:
 3061     case Op_MulHF:
 3062     case Op_ReinterpretS2HF:
 3063     case Op_ReinterpretHF2S:
 3064     case Op_SubHF:
 3065     case Op_SqrtHF:
 3066       if (!VM_Version::supports_avx512_fp16()) {
 3067         return false;
 3068       }
 3069       break;
 3070     case Op_VectorLoadShuffle:
 3071     case Op_VectorRearrange:
 3072     case Op_MulReductionVI:
 3073       if (UseSSE < 4) { // requires at least SSE4
 3074         return false;
 3075       }
 3076       break;
 3077     case Op_IsInfiniteF:
 3078     case Op_IsInfiniteD:
 3079       if (!VM_Version::supports_avx512dq()) {
 3080         return false;
 3081       }
 3082       break;
 3083     case Op_SqrtVD:
 3084     case Op_SqrtVF:
 3085     case Op_VectorMaskCmp:
 3086     case Op_VectorCastB2X:
 3087     case Op_VectorCastS2X:
 3088     case Op_VectorCastI2X:
 3089     case Op_VectorCastL2X:
 3090     case Op_VectorCastF2X:
 3091     case Op_VectorCastD2X:
 3092     case Op_VectorUCastB2X:
 3093     case Op_VectorUCastS2X:
 3094     case Op_VectorUCastI2X:
 3095     case Op_VectorMaskCast:
 3096       if (UseAVX < 1) { // enabled for AVX only
 3097         return false;
 3098       }
 3099       break;
 3100     case Op_PopulateIndex:
 3101       if (UseAVX < 2) {
 3102         return false;
 3103       }
 3104       break;
 3105     case Op_RoundVF:
 3106       if (UseAVX < 2) { // enabled for AVX2 only
 3107         return false;
 3108       }
 3109       break;
 3110     case Op_RoundVD:
 3111       if (UseAVX < 3) {
 3112         return false;  // enabled for AVX3 only
 3113       }
 3114       break;
 3115     case Op_CompareAndSwapL:
 3116     case Op_CompareAndSwapP:
 3117       break;
 3118     case Op_StrIndexOf:
 3119       if (!UseSSE42Intrinsics) {
 3120         return false;
 3121       }
 3122       break;
 3123     case Op_StrIndexOfChar:
 3124       if (!UseSSE42Intrinsics) {
 3125         return false;
 3126       }
 3127       break;
 3128     case Op_OnSpinWait:
 3129       if (VM_Version::supports_on_spin_wait() == false) {
 3130         return false;
 3131       }
 3132       break;
 3133     case Op_MulVB:
 3134     case Op_LShiftVB:
 3135     case Op_RShiftVB:
 3136     case Op_URShiftVB:
 3137     case Op_VectorInsert:
 3138     case Op_VectorLoadMask:
 3139     case Op_VectorStoreMask:
 3140     case Op_VectorBlend:
 3141       if (UseSSE < 4) {
 3142         return false;
 3143       }
 3144       break;
 3145     case Op_MaxD:
 3146     case Op_MaxF:
 3147     case Op_MinD:
 3148     case Op_MinF:
 3149       if (UseAVX < 1) { // enabled for AVX only
 3150         return false;
 3151       }
 3152       break;
 3153     case Op_CacheWB:
 3154     case Op_CacheWBPreSync:
 3155     case Op_CacheWBPostSync:
 3156       if (!VM_Version::supports_data_cache_line_flush()) {
 3157         return false;
 3158       }
 3159       break;
 3160     case Op_ExtractB:
 3161     case Op_ExtractL:
 3162     case Op_ExtractI:
 3163     case Op_RoundDoubleMode:
 3164       if (UseSSE < 4) {
 3165         return false;
 3166       }
 3167       break;
 3168     case Op_RoundDoubleModeV:
 3169       if (VM_Version::supports_avx() == false) {
 3170         return false; // 128bit vroundpd is not available
 3171       }
 3172       break;
 3173     case Op_LoadVectorGather:
 3174     case Op_LoadVectorGatherMasked:
 3175       if (UseAVX < 2) {
 3176         return false;
 3177       }
 3178       break;
 3179     case Op_FmaF:
 3180     case Op_FmaD:
 3181     case Op_FmaVD:
 3182     case Op_FmaVF:
 3183       if (!UseFMA) {
 3184         return false;
 3185       }
 3186       break;
 3187     case Op_MacroLogicV:
 3188       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3189         return false;
 3190       }
 3191       break;
 3192 
 3193     case Op_VectorCmpMasked:
 3194     case Op_VectorMaskGen:
 3195       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3196         return false;
 3197       }
 3198       break;
 3199     case Op_VectorMaskFirstTrue:
 3200     case Op_VectorMaskLastTrue:
 3201     case Op_VectorMaskTrueCount:
 3202     case Op_VectorMaskToLong:
 3203       if (UseAVX < 1) {
 3204          return false;
 3205       }
 3206       break;
 3207     case Op_RoundF:
 3208     case Op_RoundD:
 3209       break;
 3210     case Op_CopySignD:
 3211     case Op_CopySignF:
 3212       if (UseAVX < 3)  {
 3213         return false;
 3214       }
 3215       if (!VM_Version::supports_avx512vl()) {
 3216         return false;
 3217       }
 3218       break;
 3219     case Op_CompressBits:
 3220     case Op_ExpandBits:
 3221       if (!VM_Version::supports_bmi2()) {
 3222         return false;
 3223       }
 3224       break;
 3225     case Op_CompressM:
 3226       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3227         return false;
 3228       }
 3229       break;
 3230     case Op_ConvF2HF:
 3231     case Op_ConvHF2F:
 3232       if (!VM_Version::supports_float16()) {
 3233         return false;
 3234       }
 3235       break;
 3236     case Op_VectorCastF2HF:
 3237     case Op_VectorCastHF2F:
 3238       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3239         return false;
 3240       }
 3241       break;
 3242   }
 3243   return true;  // Match rules are supported by default.
 3244 }
 3245 
 3246 //------------------------------------------------------------------------
 3247 
 3248 static inline bool is_pop_count_instr_target(BasicType bt) {
 3249   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3250          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3251 }
 3252 
 3253 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3254   return match_rule_supported_vector(opcode, vlen, bt);
 3255 }
 3256 
 3257 // Identify extra cases that we might want to provide match rules for vector nodes and
 3258 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3259 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3260   if (!match_rule_supported(opcode)) {
 3261     return false;
 3262   }
 3263   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3264   //   * SSE2 supports 128bit vectors for all types;
 3265   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3266   //   * AVX2 supports 256bit vectors for all types;
 3267   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3268   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3269   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3270   // And MaxVectorSize is taken into account as well.
 3271   if (!vector_size_supported(bt, vlen)) {
 3272     return false;
 3273   }
 3274   // Special cases which require vector length follow:
 3275   //   * implementation limitations
 3276   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3277   //   * 128bit vroundpd instruction is present only in AVX1
 3278   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3279   switch (opcode) {
 3280     case Op_MaxVHF:
 3281     case Op_MinVHF:
 3282       if (!VM_Version::supports_avx512bw()) {
 3283         return false;
 3284       }
 3285     case Op_AddVHF:
 3286     case Op_DivVHF:
 3287     case Op_FmaVHF:
 3288     case Op_MulVHF:
 3289     case Op_SubVHF:
 3290     case Op_SqrtVHF:
 3291       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3292         return false;
 3293       }
 3294       if (!VM_Version::supports_avx512_fp16()) {
 3295         return false;
 3296       }
 3297       break;
 3298     case Op_AbsVF:
 3299     case Op_NegVF:
 3300       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3301         return false; // 512bit vandps and vxorps are not available
 3302       }
 3303       break;
 3304     case Op_AbsVD:
 3305     case Op_NegVD:
 3306       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3307         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3308       }
 3309       break;
 3310     case Op_RotateRightV:
 3311     case Op_RotateLeftV:
 3312       if (bt != T_INT && bt != T_LONG) {
 3313         return false;
 3314       } // fallthrough
 3315     case Op_MacroLogicV:
 3316       if (!VM_Version::supports_evex() ||
 3317           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3318         return false;
 3319       }
 3320       break;
 3321     case Op_ClearArray:
 3322     case Op_VectorMaskGen:
 3323     case Op_VectorCmpMasked:
 3324       if (!VM_Version::supports_avx512bw()) {
 3325         return false;
 3326       }
 3327       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3328         return false;
 3329       }
 3330       break;
 3331     case Op_LoadVectorMasked:
 3332     case Op_StoreVectorMasked:
 3333       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3334         return false;
 3335       }
 3336       break;
 3337     case Op_UMinV:
 3338     case Op_UMaxV:
 3339       if (UseAVX == 0) {
 3340         return false;
 3341       }
 3342       break;
 3343     case Op_MaxV:
 3344     case Op_MinV:
 3345       if (UseSSE < 4 && is_integral_type(bt)) {
 3346         return false;
 3347       }
 3348       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3349           // Float/Double intrinsics are enabled for AVX family currently.
 3350           if (UseAVX == 0) {
 3351             return false;
 3352           }
 3353           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3354             return false;
 3355           }
 3356       }
 3357       break;
 3358     case Op_CallLeafVector:
 3359       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3360         return false;
 3361       }
 3362       break;
 3363     case Op_AddReductionVI:
 3364       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3365         return false;
 3366       }
 3367       // fallthrough
 3368     case Op_AndReductionV:
 3369     case Op_OrReductionV:
 3370     case Op_XorReductionV:
 3371       if (is_subword_type(bt) && (UseSSE < 4)) {
 3372         return false;
 3373       }
 3374       break;
 3375     case Op_MinReductionV:
 3376     case Op_MaxReductionV:
 3377       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3378         return false;
 3379       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3380         return false;
 3381       }
 3382       // Float/Double intrinsics enabled for AVX family.
 3383       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3384         return false;
 3385       }
 3386       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3387         return false;
 3388       }
 3389       break;
 3390     case Op_VectorBlend:
 3391       if (UseAVX == 0 && size_in_bits < 128) {
 3392         return false;
 3393       }
 3394       break;
 3395     case Op_VectorTest:
 3396       if (UseSSE < 4) {
 3397         return false; // Implementation limitation
 3398       } else if (size_in_bits < 32) {
 3399         return false; // Implementation limitation
 3400       }
 3401       break;
 3402     case Op_VectorLoadShuffle:
 3403     case Op_VectorRearrange:
 3404       if(vlen == 2) {
 3405         return false; // Implementation limitation due to how shuffle is loaded
 3406       } else if (size_in_bits == 256 && UseAVX < 2) {
 3407         return false; // Implementation limitation
 3408       }
 3409       break;
 3410     case Op_VectorLoadMask:
 3411     case Op_VectorMaskCast:
 3412       if (size_in_bits == 256 && UseAVX < 2) {
 3413         return false; // Implementation limitation
 3414       }
 3415       // fallthrough
 3416     case Op_VectorStoreMask:
 3417       if (vlen == 2) {
 3418         return false; // Implementation limitation
 3419       }
 3420       break;
 3421     case Op_PopulateIndex:
 3422       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3423         return false;
 3424       }
 3425       break;
 3426     case Op_VectorCastB2X:
 3427     case Op_VectorCastS2X:
 3428     case Op_VectorCastI2X:
 3429       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3430         return false;
 3431       }
 3432       break;
 3433     case Op_VectorCastL2X:
 3434       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3435         return false;
 3436       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3437         return false;
 3438       }
 3439       break;
 3440     case Op_VectorCastF2X: {
 3441         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3442         // happen after intermediate conversion to integer and special handling
 3443         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3444         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3445         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3446           return false;
 3447         }
 3448       }
 3449       // fallthrough
 3450     case Op_VectorCastD2X:
 3451       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3452         return false;
 3453       }
 3454       break;
 3455     case Op_VectorCastF2HF:
 3456     case Op_VectorCastHF2F:
 3457       if (!VM_Version::supports_f16c() &&
 3458          ((!VM_Version::supports_evex() ||
 3459          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3460         return false;
 3461       }
 3462       break;
 3463     case Op_RoundVD:
 3464       if (!VM_Version::supports_avx512dq()) {
 3465         return false;
 3466       }
 3467       break;
 3468     case Op_MulReductionVI:
 3469       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3470         return false;
 3471       }
 3472       break;
 3473     case Op_LoadVectorGatherMasked:
 3474       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3475         return false;
 3476       }
 3477       if (is_subword_type(bt) &&
 3478          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3479           (size_in_bits < 64)                                      ||
 3480           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3481         return false;
 3482       }
 3483       break;
 3484     case Op_StoreVectorScatterMasked:
 3485     case Op_StoreVectorScatter:
 3486       if (is_subword_type(bt)) {
 3487         return false;
 3488       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3489         return false;
 3490       }
 3491       // fallthrough
 3492     case Op_LoadVectorGather:
 3493       if (!is_subword_type(bt) && size_in_bits == 64) {
 3494         return false;
 3495       }
 3496       if (is_subword_type(bt) && size_in_bits < 64) {
 3497         return false;
 3498       }
 3499       break;
 3500     case Op_SaturatingAddV:
 3501     case Op_SaturatingSubV:
 3502       if (UseAVX < 1) {
 3503         return false; // Implementation limitation
 3504       }
 3505       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3506         return false;
 3507       }
 3508       break;
 3509     case Op_SelectFromTwoVector:
 3510        if (size_in_bits < 128) {
 3511          return false;
 3512        }
 3513        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3514          return false;
 3515        }
 3516        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3517          return false;
 3518        }
 3519        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3520          return false;
 3521        }
 3522        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3523          return false;
 3524        }
 3525        break;
 3526     case Op_MaskAll:
 3527       if (!VM_Version::supports_evex()) {
 3528         return false;
 3529       }
 3530       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3531         return false;
 3532       }
 3533       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3534         return false;
 3535       }
 3536       break;
 3537     case Op_VectorMaskCmp:
 3538       if (vlen < 2 || size_in_bits < 32) {
 3539         return false;
 3540       }
 3541       break;
 3542     case Op_CompressM:
 3543       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3544         return false;
 3545       }
 3546       break;
 3547     case Op_CompressV:
 3548     case Op_ExpandV:
 3549       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3550         return false;
 3551       }
 3552       if (size_in_bits < 128 ) {
 3553         return false;
 3554       }
 3555     case Op_VectorLongToMask:
 3556       if (UseAVX < 1) {
 3557         return false;
 3558       }
 3559       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3560         return false;
 3561       }
 3562       break;
 3563     case Op_SignumVD:
 3564     case Op_SignumVF:
 3565       if (UseAVX < 1) {
 3566         return false;
 3567       }
 3568       break;
 3569     case Op_PopCountVI:
 3570     case Op_PopCountVL: {
 3571         if (!is_pop_count_instr_target(bt) &&
 3572             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3573           return false;
 3574         }
 3575       }
 3576       break;
 3577     case Op_ReverseV:
 3578     case Op_ReverseBytesV:
 3579       if (UseAVX < 2) {
 3580         return false;
 3581       }
 3582       break;
 3583     case Op_CountTrailingZerosV:
 3584     case Op_CountLeadingZerosV:
 3585       if (UseAVX < 2) {
 3586         return false;
 3587       }
 3588       break;
 3589   }
 3590   return true;  // Per default match rules are supported.
 3591 }
 3592 
 3593 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3594   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3595   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3596   // of their non-masked counterpart with mask edge being the differentiator.
 3597   // This routine does a strict check on the existence of masked operation patterns
 3598   // by returning a default false value for all the other opcodes apart from the
 3599   // ones whose masked instruction patterns are defined in this file.
 3600   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3601     return false;
 3602   }
 3603 
 3604   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3605   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3606     return false;
 3607   }
 3608   switch(opcode) {
 3609     // Unary masked operations
 3610     case Op_AbsVB:
 3611     case Op_AbsVS:
 3612       if(!VM_Version::supports_avx512bw()) {
 3613         return false;  // Implementation limitation
 3614       }
 3615     case Op_AbsVI:
 3616     case Op_AbsVL:
 3617       return true;
 3618 
 3619     // Ternary masked operations
 3620     case Op_FmaVF:
 3621     case Op_FmaVD:
 3622       return true;
 3623 
 3624     case Op_MacroLogicV:
 3625       if(bt != T_INT && bt != T_LONG) {
 3626         return false;
 3627       }
 3628       return true;
 3629 
 3630     // Binary masked operations
 3631     case Op_AddVB:
 3632     case Op_AddVS:
 3633     case Op_SubVB:
 3634     case Op_SubVS:
 3635     case Op_MulVS:
 3636     case Op_LShiftVS:
 3637     case Op_RShiftVS:
 3638     case Op_URShiftVS:
 3639       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3640       if (!VM_Version::supports_avx512bw()) {
 3641         return false;  // Implementation limitation
 3642       }
 3643       return true;
 3644 
 3645     case Op_MulVL:
 3646       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3647       if (!VM_Version::supports_avx512dq()) {
 3648         return false;  // Implementation limitation
 3649       }
 3650       return true;
 3651 
 3652     case Op_AndV:
 3653     case Op_OrV:
 3654     case Op_XorV:
 3655     case Op_RotateRightV:
 3656     case Op_RotateLeftV:
 3657       if (bt != T_INT && bt != T_LONG) {
 3658         return false; // Implementation limitation
 3659       }
 3660       return true;
 3661 
 3662     case Op_VectorLoadMask:
 3663       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3664       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3665         return false;
 3666       }
 3667       return true;
 3668 
 3669     case Op_AddVI:
 3670     case Op_AddVL:
 3671     case Op_AddVF:
 3672     case Op_AddVD:
 3673     case Op_SubVI:
 3674     case Op_SubVL:
 3675     case Op_SubVF:
 3676     case Op_SubVD:
 3677     case Op_MulVI:
 3678     case Op_MulVF:
 3679     case Op_MulVD:
 3680     case Op_DivVF:
 3681     case Op_DivVD:
 3682     case Op_SqrtVF:
 3683     case Op_SqrtVD:
 3684     case Op_LShiftVI:
 3685     case Op_LShiftVL:
 3686     case Op_RShiftVI:
 3687     case Op_RShiftVL:
 3688     case Op_URShiftVI:
 3689     case Op_URShiftVL:
 3690     case Op_LoadVectorMasked:
 3691     case Op_StoreVectorMasked:
 3692     case Op_LoadVectorGatherMasked:
 3693     case Op_StoreVectorScatterMasked:
 3694       return true;
 3695 
 3696     case Op_UMinV:
 3697     case Op_UMaxV:
 3698       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3699         return false;
 3700       } // fallthrough
 3701     case Op_MaxV:
 3702     case Op_MinV:
 3703       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3704         return false; // Implementation limitation
 3705       }
 3706       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3707         return false; // Implementation limitation
 3708       }
 3709       return true;
 3710     case Op_SaturatingAddV:
 3711     case Op_SaturatingSubV:
 3712       if (!is_subword_type(bt)) {
 3713         return false;
 3714       }
 3715       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3716         return false; // Implementation limitation
 3717       }
 3718       return true;
 3719 
 3720     case Op_VectorMaskCmp:
 3721       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3722         return false; // Implementation limitation
 3723       }
 3724       return true;
 3725 
 3726     case Op_VectorRearrange:
 3727       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3728         return false; // Implementation limitation
 3729       }
 3730       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3731         return false; // Implementation limitation
 3732       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3733         return false; // Implementation limitation
 3734       }
 3735       return true;
 3736 
 3737     // Binary Logical operations
 3738     case Op_AndVMask:
 3739     case Op_OrVMask:
 3740     case Op_XorVMask:
 3741       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3742         return false; // Implementation limitation
 3743       }
 3744       return true;
 3745 
 3746     case Op_PopCountVI:
 3747     case Op_PopCountVL:
 3748       if (!is_pop_count_instr_target(bt)) {
 3749         return false;
 3750       }
 3751       return true;
 3752 
 3753     case Op_MaskAll:
 3754       return true;
 3755 
 3756     case Op_CountLeadingZerosV:
 3757       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3758         return true;
 3759       }
 3760     default:
 3761       return false;
 3762   }
 3763 }
 3764 
 3765 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3766   return false;
 3767 }
 3768 
 3769 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3770 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3771   switch (elem_bt) {
 3772     case T_BYTE:  return false;
 3773     case T_SHORT: return !VM_Version::supports_avx512bw();
 3774     case T_INT:   return !VM_Version::supports_avx();
 3775     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3776     default:
 3777       ShouldNotReachHere();
 3778       return false;
 3779   }
 3780 }
 3781 
 3782 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 3783   // Prefer predicate if the mask type is "TypeVectMask".
 3784   return vt->isa_vectmask() != nullptr;
 3785 }
 3786 
 3787 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3788   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3789   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3790   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3791       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3792     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3793     return new legVecZOper();
 3794   }
 3795   if (legacy) {
 3796     switch (ideal_reg) {
 3797       case Op_VecS: return new legVecSOper();
 3798       case Op_VecD: return new legVecDOper();
 3799       case Op_VecX: return new legVecXOper();
 3800       case Op_VecY: return new legVecYOper();
 3801       case Op_VecZ: return new legVecZOper();
 3802     }
 3803   } else {
 3804     switch (ideal_reg) {
 3805       case Op_VecS: return new vecSOper();
 3806       case Op_VecD: return new vecDOper();
 3807       case Op_VecX: return new vecXOper();
 3808       case Op_VecY: return new vecYOper();
 3809       case Op_VecZ: return new vecZOper();
 3810     }
 3811   }
 3812   ShouldNotReachHere();
 3813   return nullptr;
 3814 }
 3815 
 3816 bool Matcher::is_reg2reg_move(MachNode* m) {
 3817   switch (m->rule()) {
 3818     case MoveVec2Leg_rule:
 3819     case MoveLeg2Vec_rule:
 3820     case MoveF2VL_rule:
 3821     case MoveF2LEG_rule:
 3822     case MoveVL2F_rule:
 3823     case MoveLEG2F_rule:
 3824     case MoveD2VL_rule:
 3825     case MoveD2LEG_rule:
 3826     case MoveVL2D_rule:
 3827     case MoveLEG2D_rule:
 3828       return true;
 3829     default:
 3830       return false;
 3831   }
 3832 }
 3833 
 3834 bool Matcher::is_generic_vector(MachOper* opnd) {
 3835   switch (opnd->opcode()) {
 3836     case VEC:
 3837     case LEGVEC:
 3838       return true;
 3839     default:
 3840       return false;
 3841   }
 3842 }
 3843 
 3844 //------------------------------------------------------------------------
 3845 
 3846 const RegMask* Matcher::predicate_reg_mask(void) {
 3847   return &_VECTMASK_REG_mask;
 3848 }
 3849 
 3850 // Max vector size in bytes. 0 if not supported.
 3851 int Matcher::vector_width_in_bytes(BasicType bt) {
 3852   assert(is_java_primitive(bt), "only primitive type vectors");
 3853   // SSE2 supports 128bit vectors for all types.
 3854   // AVX2 supports 256bit vectors for all types.
 3855   // AVX2/EVEX supports 512bit vectors for all types.
 3856   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3857   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3858   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3859     size = (UseAVX > 2) ? 64 : 32;
 3860   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3861     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3862   // Use flag to limit vector size.
 3863   size = MIN2(size,(int)MaxVectorSize);
 3864   // Minimum 2 values in vector (or 4 for bytes).
 3865   switch (bt) {
 3866   case T_DOUBLE:
 3867   case T_LONG:
 3868     if (size < 16) return 0;
 3869     break;
 3870   case T_FLOAT:
 3871   case T_INT:
 3872     if (size < 8) return 0;
 3873     break;
 3874   case T_BOOLEAN:
 3875     if (size < 4) return 0;
 3876     break;
 3877   case T_CHAR:
 3878     if (size < 4) return 0;
 3879     break;
 3880   case T_BYTE:
 3881     if (size < 4) return 0;
 3882     break;
 3883   case T_SHORT:
 3884     if (size < 4) return 0;
 3885     break;
 3886   default:
 3887     ShouldNotReachHere();
 3888   }
 3889   return size;
 3890 }
 3891 
 3892 // Limits on vector size (number of elements) loaded into vector.
 3893 int Matcher::max_vector_size(const BasicType bt) {
 3894   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3895 }
 3896 int Matcher::min_vector_size(const BasicType bt) {
 3897   int max_size = max_vector_size(bt);
 3898   // Min size which can be loaded into vector is 4 bytes.
 3899   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3900   // Support for calling svml double64 vectors
 3901   if (bt == T_DOUBLE) {
 3902     size = 1;
 3903   }
 3904   return MIN2(size,max_size);
 3905 }
 3906 
 3907 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3908   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3909   // by default on Cascade Lake
 3910   if (VM_Version::is_default_intel_cascade_lake()) {
 3911     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3912   }
 3913   return Matcher::max_vector_size(bt);
 3914 }
 3915 
 3916 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3917   return -1;
 3918 }
 3919 
 3920 // Vector ideal reg corresponding to specified size in bytes
 3921 uint Matcher::vector_ideal_reg(int size) {
 3922   assert(MaxVectorSize >= size, "");
 3923   switch(size) {
 3924     case  4: return Op_VecS;
 3925     case  8: return Op_VecD;
 3926     case 16: return Op_VecX;
 3927     case 32: return Op_VecY;
 3928     case 64: return Op_VecZ;
 3929   }
 3930   ShouldNotReachHere();
 3931   return 0;
 3932 }
 3933 
 3934 // Check for shift by small constant as well
 3935 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3936   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3937       shift->in(2)->get_int() <= 3 &&
 3938       // Are there other uses besides address expressions?
 3939       !matcher->is_visited(shift)) {
 3940     address_visited.set(shift->_idx); // Flag as address_visited
 3941     mstack.push(shift->in(2), Matcher::Visit);
 3942     Node *conv = shift->in(1);
 3943     // Allow Matcher to match the rule which bypass
 3944     // ConvI2L operation for an array index on LP64
 3945     // if the index value is positive.
 3946     if (conv->Opcode() == Op_ConvI2L &&
 3947         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3948         // Are there other uses besides address expressions?
 3949         !matcher->is_visited(conv)) {
 3950       address_visited.set(conv->_idx); // Flag as address_visited
 3951       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3952     } else {
 3953       mstack.push(conv, Matcher::Pre_Visit);
 3954     }
 3955     return true;
 3956   }
 3957   return false;
 3958 }
 3959 
 3960 // This function identifies sub-graphs in which a 'load' node is
 3961 // input to two different nodes, and such that it can be matched
 3962 // with BMI instructions like blsi, blsr, etc.
 3963 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3964 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3965 // refers to the same node.
 3966 //
 3967 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3968 // This is a temporary solution until we make DAGs expressible in ADL.
 3969 template<typename ConType>
 3970 class FusedPatternMatcher {
 3971   Node* _op1_node;
 3972   Node* _mop_node;
 3973   int _con_op;
 3974 
 3975   static int match_next(Node* n, int next_op, int next_op_idx) {
 3976     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3977       return -1;
 3978     }
 3979 
 3980     if (next_op_idx == -1) { // n is commutative, try rotations
 3981       if (n->in(1)->Opcode() == next_op) {
 3982         return 1;
 3983       } else if (n->in(2)->Opcode() == next_op) {
 3984         return 2;
 3985       }
 3986     } else {
 3987       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 3988       if (n->in(next_op_idx)->Opcode() == next_op) {
 3989         return next_op_idx;
 3990       }
 3991     }
 3992     return -1;
 3993   }
 3994 
 3995  public:
 3996   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 3997     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 3998 
 3999   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 4000              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 4001              typename ConType::NativeType con_value) {
 4002     if (_op1_node->Opcode() != op1) {
 4003       return false;
 4004     }
 4005     if (_mop_node->outcnt() > 2) {
 4006       return false;
 4007     }
 4008     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 4009     if (op1_op2_idx == -1) {
 4010       return false;
 4011     }
 4012     // Memory operation must be the other edge
 4013     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 4014 
 4015     // Check that the mop node is really what we want
 4016     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 4017       Node* op2_node = _op1_node->in(op1_op2_idx);
 4018       if (op2_node->outcnt() > 1) {
 4019         return false;
 4020       }
 4021       assert(op2_node->Opcode() == op2, "Should be");
 4022       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 4023       if (op2_con_idx == -1) {
 4024         return false;
 4025       }
 4026       // Memory operation must be the other edge
 4027       int op2_mop_idx = (op2_con_idx & 1) + 1;
 4028       // Check that the memory operation is the same node
 4029       if (op2_node->in(op2_mop_idx) == _mop_node) {
 4030         // Now check the constant
 4031         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 4032         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 4033           return true;
 4034         }
 4035       }
 4036     }
 4037     return false;
 4038   }
 4039 };
 4040 
 4041 static bool is_bmi_pattern(Node* n, Node* m) {
 4042   assert(UseBMI1Instructions, "sanity");
 4043   if (n != nullptr && m != nullptr) {
 4044     if (m->Opcode() == Op_LoadI) {
 4045       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 4046       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 4047              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 4048              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 4049     } else if (m->Opcode() == Op_LoadL) {
 4050       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 4051       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 4052              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 4053              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 4054     }
 4055   }
 4056   return false;
 4057 }
 4058 
 4059 // Should the matcher clone input 'm' of node 'n'?
 4060 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 4061   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 4062   if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
 4063     mstack.push(m, Visit);
 4064     return true;
 4065   }
 4066   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 4067     mstack.push(m, Visit);           // m = ShiftCntV
 4068     return true;
 4069   }
 4070   if (is_encode_and_store_pattern(n, m)) {
 4071     mstack.push(m, Visit);
 4072     return true;
 4073   }
 4074   return false;
 4075 }
 4076 
 4077 // Should the Matcher clone shifts on addressing modes, expecting them
 4078 // to be subsumed into complex addressing expressions or compute them
 4079 // into registers?
 4080 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4081   Node *off = m->in(AddPNode::Offset);
 4082   if (off->is_Con()) {
 4083     address_visited.test_set(m->_idx); // Flag as address_visited
 4084     Node *adr = m->in(AddPNode::Address);
 4085 
 4086     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4087     // AtomicAdd is not an addressing expression.
 4088     // Cheap to find it by looking for screwy base.
 4089     if (adr->is_AddP() &&
 4090         !adr->in(AddPNode::Base)->is_top() &&
 4091         !adr->in(AddPNode::Offset)->is_Con() &&
 4092         off->get_long() == (int) (off->get_long()) && // immL32
 4093         // Are there other uses besides address expressions?
 4094         !is_visited(adr)) {
 4095       address_visited.set(adr->_idx); // Flag as address_visited
 4096       Node *shift = adr->in(AddPNode::Offset);
 4097       if (!clone_shift(shift, this, mstack, address_visited)) {
 4098         mstack.push(shift, Pre_Visit);
 4099       }
 4100       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4101       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4102     } else {
 4103       mstack.push(adr, Pre_Visit);
 4104     }
 4105 
 4106     // Clone X+offset as it also folds into most addressing expressions
 4107     mstack.push(off, Visit);
 4108     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4109     return true;
 4110   } else if (clone_shift(off, this, mstack, address_visited)) {
 4111     address_visited.test_set(m->_idx); // Flag as address_visited
 4112     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4113     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4114     return true;
 4115   }
 4116   return false;
 4117 }
 4118 
 4119 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4120   switch (bt) {
 4121     case BoolTest::eq:
 4122       return Assembler::eq;
 4123     case BoolTest::ne:
 4124       return Assembler::neq;
 4125     case BoolTest::le:
 4126     case BoolTest::ule:
 4127       return Assembler::le;
 4128     case BoolTest::ge:
 4129     case BoolTest::uge:
 4130       return Assembler::nlt;
 4131     case BoolTest::lt:
 4132     case BoolTest::ult:
 4133       return Assembler::lt;
 4134     case BoolTest::gt:
 4135     case BoolTest::ugt:
 4136       return Assembler::nle;
 4137     default : ShouldNotReachHere(); return Assembler::_false;
 4138   }
 4139 }
 4140 
 4141 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4142   switch (bt) {
 4143   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4144   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4145   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4146   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4147   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4148   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4149   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4150   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4151   }
 4152 }
 4153 
 4154 // Helper methods for MachSpillCopyNode::implementation().
 4155 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4156                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4157   assert(ireg == Op_VecS || // 32bit vector
 4158          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4159           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4160          "no non-adjacent vector moves" );
 4161   if (masm) {
 4162     switch (ireg) {
 4163     case Op_VecS: // copy whole register
 4164     case Op_VecD:
 4165     case Op_VecX:
 4166       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4167         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4168       } else {
 4169         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4170      }
 4171       break;
 4172     case Op_VecY:
 4173       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4174         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4175       } else {
 4176         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4177      }
 4178       break;
 4179     case Op_VecZ:
 4180       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4181       break;
 4182     default:
 4183       ShouldNotReachHere();
 4184     }
 4185 #ifndef PRODUCT
 4186   } else {
 4187     switch (ireg) {
 4188     case Op_VecS:
 4189     case Op_VecD:
 4190     case Op_VecX:
 4191       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4192       break;
 4193     case Op_VecY:
 4194     case Op_VecZ:
 4195       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4196       break;
 4197     default:
 4198       ShouldNotReachHere();
 4199     }
 4200 #endif
 4201   }
 4202 }
 4203 
 4204 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4205                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4206   if (masm) {
 4207     if (is_load) {
 4208       switch (ireg) {
 4209       case Op_VecS:
 4210         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4211         break;
 4212       case Op_VecD:
 4213         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4214         break;
 4215       case Op_VecX:
 4216         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4217           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4218         } else {
 4219           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4220           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4221         }
 4222         break;
 4223       case Op_VecY:
 4224         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4225           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4226         } else {
 4227           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4228           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4229         }
 4230         break;
 4231       case Op_VecZ:
 4232         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4233         break;
 4234       default:
 4235         ShouldNotReachHere();
 4236       }
 4237     } else { // store
 4238       switch (ireg) {
 4239       case Op_VecS:
 4240         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4241         break;
 4242       case Op_VecD:
 4243         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4244         break;
 4245       case Op_VecX:
 4246         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4247           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4248         }
 4249         else {
 4250           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4251         }
 4252         break;
 4253       case Op_VecY:
 4254         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4255           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4256         }
 4257         else {
 4258           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4259         }
 4260         break;
 4261       case Op_VecZ:
 4262         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4263         break;
 4264       default:
 4265         ShouldNotReachHere();
 4266       }
 4267     }
 4268 #ifndef PRODUCT
 4269   } else {
 4270     if (is_load) {
 4271       switch (ireg) {
 4272       case Op_VecS:
 4273         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4274         break;
 4275       case Op_VecD:
 4276         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4277         break;
 4278        case Op_VecX:
 4279         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4280         break;
 4281       case Op_VecY:
 4282       case Op_VecZ:
 4283         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4284         break;
 4285       default:
 4286         ShouldNotReachHere();
 4287       }
 4288     } else { // store
 4289       switch (ireg) {
 4290       case Op_VecS:
 4291         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4292         break;
 4293       case Op_VecD:
 4294         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4295         break;
 4296        case Op_VecX:
 4297         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4298         break;
 4299       case Op_VecY:
 4300       case Op_VecZ:
 4301         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4302         break;
 4303       default:
 4304         ShouldNotReachHere();
 4305       }
 4306     }
 4307 #endif
 4308   }
 4309 }
 4310 
 4311 template <class T>
 4312 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4313   int size = type2aelembytes(bt) * len;
 4314   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4315   for (int i = 0; i < len; i++) {
 4316     int offset = i * type2aelembytes(bt);
 4317     switch (bt) {
 4318       case T_BYTE: val->at(i) = con; break;
 4319       case T_SHORT: {
 4320         jshort c = con;
 4321         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4322         break;
 4323       }
 4324       case T_INT: {
 4325         jint c = con;
 4326         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4327         break;
 4328       }
 4329       case T_LONG: {
 4330         jlong c = con;
 4331         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4332         break;
 4333       }
 4334       case T_FLOAT: {
 4335         jfloat c = con;
 4336         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4337         break;
 4338       }
 4339       case T_DOUBLE: {
 4340         jdouble c = con;
 4341         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4342         break;
 4343       }
 4344       default: assert(false, "%s", type2name(bt));
 4345     }
 4346   }
 4347   return val;
 4348 }
 4349 
 4350 static inline jlong high_bit_set(BasicType bt) {
 4351   switch (bt) {
 4352     case T_BYTE:  return 0x8080808080808080;
 4353     case T_SHORT: return 0x8000800080008000;
 4354     case T_INT:   return 0x8000000080000000;
 4355     case T_LONG:  return 0x8000000000000000;
 4356     default:
 4357       ShouldNotReachHere();
 4358       return 0;
 4359   }
 4360 }
 4361 
 4362 #ifndef PRODUCT
 4363   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4364     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4365   }
 4366 #endif
 4367 
 4368   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4369     __ nop(_count);
 4370   }
 4371 
 4372   uint MachNopNode::size(PhaseRegAlloc*) const {
 4373     return _count;
 4374   }
 4375 
 4376 #ifndef PRODUCT
 4377   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4378     st->print("# breakpoint");
 4379   }
 4380 #endif
 4381 
 4382   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4383     __ int3();
 4384   }
 4385 
 4386   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4387     return MachNode::size(ra_);
 4388   }
 4389 
 4390 %}
 4391 
 4392 //----------ENCODING BLOCK-----------------------------------------------------
 4393 // This block specifies the encoding classes used by the compiler to
 4394 // output byte streams.  Encoding classes are parameterized macros
 4395 // used by Machine Instruction Nodes in order to generate the bit
 4396 // encoding of the instruction.  Operands specify their base encoding
 4397 // interface with the interface keyword.  There are currently
 4398 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4399 // COND_INTER.  REG_INTER causes an operand to generate a function
 4400 // which returns its register number when queried.  CONST_INTER causes
 4401 // an operand to generate a function which returns the value of the
 4402 // constant when queried.  MEMORY_INTER causes an operand to generate
 4403 // four functions which return the Base Register, the Index Register,
 4404 // the Scale Value, and the Offset Value of the operand when queried.
 4405 // COND_INTER causes an operand to generate six functions which return
 4406 // the encoding code (ie - encoding bits for the instruction)
 4407 // associated with each basic boolean condition for a conditional
 4408 // instruction.
 4409 //
 4410 // Instructions specify two basic values for encoding.  Again, a
 4411 // function is available to check if the constant displacement is an
 4412 // oop. They use the ins_encode keyword to specify their encoding
 4413 // classes (which must be a sequence of enc_class names, and their
 4414 // parameters, specified in the encoding block), and they use the
 4415 // opcode keyword to specify, in order, their primary, secondary, and
 4416 // tertiary opcode.  Only the opcode sections which a particular
 4417 // instruction needs for encoding need to be specified.
 4418 encode %{
 4419   enc_class cdql_enc(no_rax_rdx_RegI div)
 4420   %{
 4421     // Full implementation of Java idiv and irem; checks for
 4422     // special case as described in JVM spec., p.243 & p.271.
 4423     //
 4424     //         normal case                           special case
 4425     //
 4426     // input : rax: dividend                         min_int
 4427     //         reg: divisor                          -1
 4428     //
 4429     // output: rax: quotient  (= rax idiv reg)       min_int
 4430     //         rdx: remainder (= rax irem reg)       0
 4431     //
 4432     //  Code sequnce:
 4433     //
 4434     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4435     //    5:   75 07/08                jne    e <normal>
 4436     //    7:   33 d2                   xor    %edx,%edx
 4437     //  [div >= 8 -> offset + 1]
 4438     //  [REX_B]
 4439     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4440     //    c:   74 03/04                je     11 <done>
 4441     // 000000000000000e <normal>:
 4442     //    e:   99                      cltd
 4443     //  [div >= 8 -> offset + 1]
 4444     //  [REX_B]
 4445     //    f:   f7 f9                   idiv   $div
 4446     // 0000000000000011 <done>:
 4447     Label normal;
 4448     Label done;
 4449 
 4450     // cmp    $0x80000000,%eax
 4451     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4452 
 4453     // jne    e <normal>
 4454     __ jccb(Assembler::notEqual, normal);
 4455 
 4456     // xor    %edx,%edx
 4457     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4458 
 4459     // cmp    $0xffffffffffffffff,%ecx
 4460     __ cmpl($div$$Register, -1);
 4461 
 4462     // je     11 <done>
 4463     __ jccb(Assembler::equal, done);
 4464 
 4465     // <normal>
 4466     // cltd
 4467     __ bind(normal);
 4468     __ cdql();
 4469 
 4470     // idivl
 4471     // <done>
 4472     __ idivl($div$$Register);
 4473     __ bind(done);
 4474   %}
 4475 
 4476   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4477   %{
 4478     // Full implementation of Java ldiv and lrem; checks for
 4479     // special case as described in JVM spec., p.243 & p.271.
 4480     //
 4481     //         normal case                           special case
 4482     //
 4483     // input : rax: dividend                         min_long
 4484     //         reg: divisor                          -1
 4485     //
 4486     // output: rax: quotient  (= rax idiv reg)       min_long
 4487     //         rdx: remainder (= rax irem reg)       0
 4488     //
 4489     //  Code sequnce:
 4490     //
 4491     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4492     //    7:   00 00 80
 4493     //    a:   48 39 d0                cmp    %rdx,%rax
 4494     //    d:   75 08                   jne    17 <normal>
 4495     //    f:   33 d2                   xor    %edx,%edx
 4496     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4497     //   15:   74 05                   je     1c <done>
 4498     // 0000000000000017 <normal>:
 4499     //   17:   48 99                   cqto
 4500     //   19:   48 f7 f9                idiv   $div
 4501     // 000000000000001c <done>:
 4502     Label normal;
 4503     Label done;
 4504 
 4505     // mov    $0x8000000000000000,%rdx
 4506     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4507 
 4508     // cmp    %rdx,%rax
 4509     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4510 
 4511     // jne    17 <normal>
 4512     __ jccb(Assembler::notEqual, normal);
 4513 
 4514     // xor    %edx,%edx
 4515     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4516 
 4517     // cmp    $0xffffffffffffffff,$div
 4518     __ cmpq($div$$Register, -1);
 4519 
 4520     // je     1e <done>
 4521     __ jccb(Assembler::equal, done);
 4522 
 4523     // <normal>
 4524     // cqto
 4525     __ bind(normal);
 4526     __ cdqq();
 4527 
 4528     // idivq (note: must be emitted by the user of this rule)
 4529     // <done>
 4530     __ idivq($div$$Register);
 4531     __ bind(done);
 4532   %}
 4533 
 4534   enc_class clear_avx %{
 4535     DEBUG_ONLY(int off0 = __ offset());
 4536     if (generate_vzeroupper(Compile::current())) {
 4537       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4538       // Clear upper bits of YMM registers when current compiled code uses
 4539       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4540       __ vzeroupper();
 4541     }
 4542     DEBUG_ONLY(int off1 = __ offset());
 4543     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4544   %}
 4545 
 4546   enc_class Java_To_Runtime(method meth) %{
 4547     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4548     __ call(r10);
 4549     __ post_call_nop();
 4550   %}
 4551 
 4552   enc_class Java_Static_Call(method meth)
 4553   %{
 4554     // JAVA STATIC CALL
 4555     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4556     // determine who we intended to call.
 4557     if (!_method) {
 4558       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4559     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4560       // The NOP here is purely to ensure that eliding a call to
 4561       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4562       __ addr_nop_5();
 4563       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4564     } else {
 4565       int method_index = resolved_method_index(masm);
 4566       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4567                                                   : static_call_Relocation::spec(method_index);
 4568       address mark = __ pc();
 4569       int call_offset = __ offset();
 4570       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4571       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4572         // Calls of the same statically bound method can share
 4573         // a stub to the interpreter.
 4574         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4575       } else {
 4576         // Emit stubs for static call.
 4577         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4578         __ clear_inst_mark();
 4579         if (stub == nullptr) {
 4580           ciEnv::current()->record_failure("CodeCache is full");
 4581           return;
 4582         }
 4583       }
 4584     }
 4585     __ post_call_nop();
 4586   %}
 4587 
 4588   enc_class Java_Dynamic_Call(method meth) %{
 4589     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4590     __ post_call_nop();
 4591   %}
 4592 
 4593   enc_class call_epilog %{
 4594     if (VerifyStackAtCalls) {
 4595       // Check that stack depth is unchanged: find majik cookie on stack
 4596       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4597       Label L;
 4598       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4599       __ jccb(Assembler::equal, L);
 4600       // Die if stack mismatch
 4601       __ int3();
 4602       __ bind(L);
 4603     }
 4604   %}
 4605 
 4606 %}
 4607 
 4608 //----------FRAME--------------------------------------------------------------
 4609 // Definition of frame structure and management information.
 4610 //
 4611 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4612 //                             |   (to get allocators register number
 4613 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4614 //  r   CALLER     |        |
 4615 //  o     |        +--------+      pad to even-align allocators stack-slot
 4616 //  w     V        |  pad0  |        numbers; owned by CALLER
 4617 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4618 //  h     ^        |   in   |  5
 4619 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4620 //  |     |        |        |  3
 4621 //  |     |        +--------+
 4622 //  V     |        | old out|      Empty on Intel, window on Sparc
 4623 //        |    old |preserve|      Must be even aligned.
 4624 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4625 //        |        |   in   |  3   area for Intel ret address
 4626 //     Owned by    |preserve|      Empty on Sparc.
 4627 //       SELF      +--------+
 4628 //        |        |  pad2  |  2   pad to align old SP
 4629 //        |        +--------+  1
 4630 //        |        | locks  |  0
 4631 //        |        +--------+----> OptoReg::stack0(), even aligned
 4632 //        |        |  pad1  | 11   pad to align new SP
 4633 //        |        +--------+
 4634 //        |        |        | 10
 4635 //        |        | spills |  9   spills
 4636 //        V        |        |  8   (pad0 slot for callee)
 4637 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4638 //        ^        |  out   |  7
 4639 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4640 //     Owned by    +--------+
 4641 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4642 //        |    new |preserve|      Must be even-aligned.
 4643 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4644 //        |        |        |
 4645 //
 4646 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4647 //         known from SELF's arguments and the Java calling convention.
 4648 //         Region 6-7 is determined per call site.
 4649 // Note 2: If the calling convention leaves holes in the incoming argument
 4650 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4651 //         are owned by the CALLEE.  Holes should not be necessary in the
 4652 //         incoming area, as the Java calling convention is completely under
 4653 //         the control of the AD file.  Doubles can be sorted and packed to
 4654 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4655 //         varargs C calling conventions.
 4656 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4657 //         even aligned with pad0 as needed.
 4658 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4659 //         region 6-11 is even aligned; it may be padded out more so that
 4660 //         the region from SP to FP meets the minimum stack alignment.
 4661 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4662 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4663 //         SP meets the minimum alignment.
 4664 
 4665 frame
 4666 %{
 4667   // These three registers define part of the calling convention
 4668   // between compiled code and the interpreter.
 4669   inline_cache_reg(RAX);                // Inline Cache Register
 4670 
 4671   // Optional: name the operand used by cisc-spilling to access
 4672   // [stack_pointer + offset]
 4673   cisc_spilling_operand_name(indOffset32);
 4674 
 4675   // Number of stack slots consumed by locking an object
 4676   sync_stack_slots(2);
 4677 
 4678   // Compiled code's Frame Pointer
 4679   frame_pointer(RSP);
 4680 
 4681   // Interpreter stores its frame pointer in a register which is
 4682   // stored to the stack by I2CAdaptors.
 4683   // I2CAdaptors convert from interpreted java to compiled java.
 4684   interpreter_frame_pointer(RBP);
 4685 
 4686   // Stack alignment requirement
 4687   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4688 
 4689   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4690   // for calls to C.  Supports the var-args backing area for register parms.
 4691   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4692 
 4693   // The after-PROLOG location of the return address.  Location of
 4694   // return address specifies a type (REG or STACK) and a number
 4695   // representing the register number (i.e. - use a register name) or
 4696   // stack slot.
 4697   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4698   // Otherwise, it is above the locks and verification slot and alignment word
 4699   return_addr(STACK - 2 +
 4700               align_up((Compile::current()->in_preserve_stack_slots() +
 4701                         Compile::current()->fixed_slots()),
 4702                        stack_alignment_in_slots()));
 4703 
 4704   // Location of compiled Java return values.  Same as C for now.
 4705   return_value
 4706   %{
 4707     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4708            "only return normal values");
 4709 
 4710     static const int lo[Op_RegL + 1] = {
 4711       0,
 4712       0,
 4713       RAX_num,  // Op_RegN
 4714       RAX_num,  // Op_RegI
 4715       RAX_num,  // Op_RegP
 4716       XMM0_num, // Op_RegF
 4717       XMM0_num, // Op_RegD
 4718       RAX_num   // Op_RegL
 4719     };
 4720     static const int hi[Op_RegL + 1] = {
 4721       0,
 4722       0,
 4723       OptoReg::Bad, // Op_RegN
 4724       OptoReg::Bad, // Op_RegI
 4725       RAX_H_num,    // Op_RegP
 4726       OptoReg::Bad, // Op_RegF
 4727       XMM0b_num,    // Op_RegD
 4728       RAX_H_num     // Op_RegL
 4729     };
 4730     // Excluded flags and vector registers.
 4731     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4732     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4733   %}
 4734 %}
 4735 
 4736 //----------ATTRIBUTES---------------------------------------------------------
 4737 //----------Operand Attributes-------------------------------------------------
 4738 op_attrib op_cost(0);        // Required cost attribute
 4739 
 4740 //----------Instruction Attributes---------------------------------------------
 4741 ins_attrib ins_cost(100);       // Required cost attribute
 4742 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4743 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4744                                 // a non-matching short branch variant
 4745                                 // of some long branch?
 4746 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4747                                 // be a power of 2) specifies the
 4748                                 // alignment that some part of the
 4749                                 // instruction (not necessarily the
 4750                                 // start) requires.  If > 1, a
 4751                                 // compute_padding() function must be
 4752                                 // provided for the instruction
 4753 
 4754 // Whether this node is expanded during code emission into a sequence of
 4755 // instructions and the first instruction can perform an implicit null check.
 4756 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4757 
 4758 //----------OPERANDS-----------------------------------------------------------
 4759 // Operand definitions must precede instruction definitions for correct parsing
 4760 // in the ADLC because operands constitute user defined types which are used in
 4761 // instruction definitions.
 4762 
 4763 //----------Simple Operands----------------------------------------------------
 4764 // Immediate Operands
 4765 // Integer Immediate
 4766 operand immI()
 4767 %{
 4768   match(ConI);
 4769 
 4770   op_cost(10);
 4771   format %{ %}
 4772   interface(CONST_INTER);
 4773 %}
 4774 
 4775 // Constant for test vs zero
 4776 operand immI_0()
 4777 %{
 4778   predicate(n->get_int() == 0);
 4779   match(ConI);
 4780 
 4781   op_cost(0);
 4782   format %{ %}
 4783   interface(CONST_INTER);
 4784 %}
 4785 
 4786 // Constant for increment
 4787 operand immI_1()
 4788 %{
 4789   predicate(n->get_int() == 1);
 4790   match(ConI);
 4791 
 4792   op_cost(0);
 4793   format %{ %}
 4794   interface(CONST_INTER);
 4795 %}
 4796 
 4797 // Constant for decrement
 4798 operand immI_M1()
 4799 %{
 4800   predicate(n->get_int() == -1);
 4801   match(ConI);
 4802 
 4803   op_cost(0);
 4804   format %{ %}
 4805   interface(CONST_INTER);
 4806 %}
 4807 
 4808 operand immI_2()
 4809 %{
 4810   predicate(n->get_int() == 2);
 4811   match(ConI);
 4812 
 4813   op_cost(0);
 4814   format %{ %}
 4815   interface(CONST_INTER);
 4816 %}
 4817 
 4818 operand immI_4()
 4819 %{
 4820   predicate(n->get_int() == 4);
 4821   match(ConI);
 4822 
 4823   op_cost(0);
 4824   format %{ %}
 4825   interface(CONST_INTER);
 4826 %}
 4827 
 4828 operand immI_8()
 4829 %{
 4830   predicate(n->get_int() == 8);
 4831   match(ConI);
 4832 
 4833   op_cost(0);
 4834   format %{ %}
 4835   interface(CONST_INTER);
 4836 %}
 4837 
 4838 // Valid scale values for addressing modes
 4839 operand immI2()
 4840 %{
 4841   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4842   match(ConI);
 4843 
 4844   format %{ %}
 4845   interface(CONST_INTER);
 4846 %}
 4847 
 4848 operand immU7()
 4849 %{
 4850   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4851   match(ConI);
 4852 
 4853   op_cost(5);
 4854   format %{ %}
 4855   interface(CONST_INTER);
 4856 %}
 4857 
 4858 operand immI8()
 4859 %{
 4860   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4861   match(ConI);
 4862 
 4863   op_cost(5);
 4864   format %{ %}
 4865   interface(CONST_INTER);
 4866 %}
 4867 
 4868 operand immU8()
 4869 %{
 4870   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4871   match(ConI);
 4872 
 4873   op_cost(5);
 4874   format %{ %}
 4875   interface(CONST_INTER);
 4876 %}
 4877 
 4878 operand immI16()
 4879 %{
 4880   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4881   match(ConI);
 4882 
 4883   op_cost(10);
 4884   format %{ %}
 4885   interface(CONST_INTER);
 4886 %}
 4887 
 4888 // Int Immediate non-negative
 4889 operand immU31()
 4890 %{
 4891   predicate(n->get_int() >= 0);
 4892   match(ConI);
 4893 
 4894   op_cost(0);
 4895   format %{ %}
 4896   interface(CONST_INTER);
 4897 %}
 4898 
 4899 // Pointer Immediate
 4900 operand immP()
 4901 %{
 4902   match(ConP);
 4903 
 4904   op_cost(10);
 4905   format %{ %}
 4906   interface(CONST_INTER);
 4907 %}
 4908 
 4909 // Null Pointer Immediate
 4910 operand immP0()
 4911 %{
 4912   predicate(n->get_ptr() == 0);
 4913   match(ConP);
 4914 
 4915   op_cost(5);
 4916   format %{ %}
 4917   interface(CONST_INTER);
 4918 %}
 4919 
 4920 // Pointer Immediate
 4921 operand immN() %{
 4922   match(ConN);
 4923 
 4924   op_cost(10);
 4925   format %{ %}
 4926   interface(CONST_INTER);
 4927 %}
 4928 
 4929 operand immNKlass() %{
 4930   match(ConNKlass);
 4931 
 4932   op_cost(10);
 4933   format %{ %}
 4934   interface(CONST_INTER);
 4935 %}
 4936 
 4937 // Null Pointer Immediate
 4938 operand immN0() %{
 4939   predicate(n->get_narrowcon() == 0);
 4940   match(ConN);
 4941 
 4942   op_cost(5);
 4943   format %{ %}
 4944   interface(CONST_INTER);
 4945 %}
 4946 
 4947 operand immP31()
 4948 %{
 4949   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 4950             && (n->get_ptr() >> 31) == 0);
 4951   match(ConP);
 4952 
 4953   op_cost(5);
 4954   format %{ %}
 4955   interface(CONST_INTER);
 4956 %}
 4957 
 4958 
 4959 // Long Immediate
 4960 operand immL()
 4961 %{
 4962   match(ConL);
 4963 
 4964   op_cost(20);
 4965   format %{ %}
 4966   interface(CONST_INTER);
 4967 %}
 4968 
 4969 // Long Immediate 8-bit
 4970 operand immL8()
 4971 %{
 4972   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 4973   match(ConL);
 4974 
 4975   op_cost(5);
 4976   format %{ %}
 4977   interface(CONST_INTER);
 4978 %}
 4979 
 4980 // Long Immediate 32-bit unsigned
 4981 operand immUL32()
 4982 %{
 4983   predicate(n->get_long() == (unsigned int) (n->get_long()));
 4984   match(ConL);
 4985 
 4986   op_cost(10);
 4987   format %{ %}
 4988   interface(CONST_INTER);
 4989 %}
 4990 
 4991 // Long Immediate 32-bit signed
 4992 operand immL32()
 4993 %{
 4994   predicate(n->get_long() == (int) (n->get_long()));
 4995   match(ConL);
 4996 
 4997   op_cost(15);
 4998   format %{ %}
 4999   interface(CONST_INTER);
 5000 %}
 5001 
 5002 operand immL_Pow2()
 5003 %{
 5004   predicate(is_power_of_2((julong)n->get_long()));
 5005   match(ConL);
 5006 
 5007   op_cost(15);
 5008   format %{ %}
 5009   interface(CONST_INTER);
 5010 %}
 5011 
 5012 operand immL_NotPow2()
 5013 %{
 5014   predicate(is_power_of_2((julong)~n->get_long()));
 5015   match(ConL);
 5016 
 5017   op_cost(15);
 5018   format %{ %}
 5019   interface(CONST_INTER);
 5020 %}
 5021 
 5022 // Long Immediate zero
 5023 operand immL0()
 5024 %{
 5025   predicate(n->get_long() == 0L);
 5026   match(ConL);
 5027 
 5028   op_cost(10);
 5029   format %{ %}
 5030   interface(CONST_INTER);
 5031 %}
 5032 
 5033 // Constant for increment
 5034 operand immL1()
 5035 %{
 5036   predicate(n->get_long() == 1);
 5037   match(ConL);
 5038 
 5039   format %{ %}
 5040   interface(CONST_INTER);
 5041 %}
 5042 
 5043 // Constant for decrement
 5044 operand immL_M1()
 5045 %{
 5046   predicate(n->get_long() == -1);
 5047   match(ConL);
 5048 
 5049   format %{ %}
 5050   interface(CONST_INTER);
 5051 %}
 5052 
 5053 // Long Immediate: low 32-bit mask
 5054 operand immL_32bits()
 5055 %{
 5056   predicate(n->get_long() == 0xFFFFFFFFL);
 5057   match(ConL);
 5058   op_cost(20);
 5059 
 5060   format %{ %}
 5061   interface(CONST_INTER);
 5062 %}
 5063 
 5064 // Int Immediate: 2^n-1, positive
 5065 operand immI_Pow2M1()
 5066 %{
 5067   predicate((n->get_int() > 0)
 5068             && is_power_of_2((juint)n->get_int() + 1));
 5069   match(ConI);
 5070 
 5071   op_cost(20);
 5072   format %{ %}
 5073   interface(CONST_INTER);
 5074 %}
 5075 
 5076 // Float Immediate zero
 5077 operand immF0()
 5078 %{
 5079   predicate(jint_cast(n->getf()) == 0);
 5080   match(ConF);
 5081 
 5082   op_cost(5);
 5083   format %{ %}
 5084   interface(CONST_INTER);
 5085 %}
 5086 
 5087 // Float Immediate
 5088 operand immF()
 5089 %{
 5090   match(ConF);
 5091 
 5092   op_cost(15);
 5093   format %{ %}
 5094   interface(CONST_INTER);
 5095 %}
 5096 
 5097 // Half Float Immediate
 5098 operand immH()
 5099 %{
 5100   match(ConH);
 5101 
 5102   op_cost(15);
 5103   format %{ %}
 5104   interface(CONST_INTER);
 5105 %}
 5106 
 5107 // Double Immediate zero
 5108 operand immD0()
 5109 %{
 5110   predicate(jlong_cast(n->getd()) == 0);
 5111   match(ConD);
 5112 
 5113   op_cost(5);
 5114   format %{ %}
 5115   interface(CONST_INTER);
 5116 %}
 5117 
 5118 // Double Immediate
 5119 operand immD()
 5120 %{
 5121   match(ConD);
 5122 
 5123   op_cost(15);
 5124   format %{ %}
 5125   interface(CONST_INTER);
 5126 %}
 5127 
 5128 // Immediates for special shifts (sign extend)
 5129 
 5130 // Constants for increment
 5131 operand immI_16()
 5132 %{
 5133   predicate(n->get_int() == 16);
 5134   match(ConI);
 5135 
 5136   format %{ %}
 5137   interface(CONST_INTER);
 5138 %}
 5139 
 5140 operand immI_24()
 5141 %{
 5142   predicate(n->get_int() == 24);
 5143   match(ConI);
 5144 
 5145   format %{ %}
 5146   interface(CONST_INTER);
 5147 %}
 5148 
 5149 // Constant for byte-wide masking
 5150 operand immI_255()
 5151 %{
 5152   predicate(n->get_int() == 255);
 5153   match(ConI);
 5154 
 5155   format %{ %}
 5156   interface(CONST_INTER);
 5157 %}
 5158 
 5159 // Constant for short-wide masking
 5160 operand immI_65535()
 5161 %{
 5162   predicate(n->get_int() == 65535);
 5163   match(ConI);
 5164 
 5165   format %{ %}
 5166   interface(CONST_INTER);
 5167 %}
 5168 
 5169 // Constant for byte-wide masking
 5170 operand immL_255()
 5171 %{
 5172   predicate(n->get_long() == 255);
 5173   match(ConL);
 5174 
 5175   format %{ %}
 5176   interface(CONST_INTER);
 5177 %}
 5178 
 5179 // Constant for short-wide masking
 5180 operand immL_65535()
 5181 %{
 5182   predicate(n->get_long() == 65535);
 5183   match(ConL);
 5184 
 5185   format %{ %}
 5186   interface(CONST_INTER);
 5187 %}
 5188 
 5189 // AOT Runtime Constants Address
 5190 operand immAOTRuntimeConstantsAddress()
 5191 %{
 5192   // Check if the address is in the range of AOT Runtime Constants
 5193   predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
 5194   match(ConP);
 5195 
 5196   op_cost(0);
 5197   format %{ %}
 5198   interface(CONST_INTER);
 5199 %}
 5200 
 5201 operand kReg()
 5202 %{
 5203   constraint(ALLOC_IN_RC(vectmask_reg));
 5204   match(RegVectMask);
 5205   format %{%}
 5206   interface(REG_INTER);
 5207 %}
 5208 
 5209 // Register Operands
 5210 // Integer Register
 5211 operand rRegI()
 5212 %{
 5213   constraint(ALLOC_IN_RC(int_reg));
 5214   match(RegI);
 5215 
 5216   match(rax_RegI);
 5217   match(rbx_RegI);
 5218   match(rcx_RegI);
 5219   match(rdx_RegI);
 5220   match(rdi_RegI);
 5221 
 5222   format %{ %}
 5223   interface(REG_INTER);
 5224 %}
 5225 
 5226 // Special Registers
 5227 operand rax_RegI()
 5228 %{
 5229   constraint(ALLOC_IN_RC(int_rax_reg));
 5230   match(RegI);
 5231   match(rRegI);
 5232 
 5233   format %{ "RAX" %}
 5234   interface(REG_INTER);
 5235 %}
 5236 
 5237 // Special Registers
 5238 operand rbx_RegI()
 5239 %{
 5240   constraint(ALLOC_IN_RC(int_rbx_reg));
 5241   match(RegI);
 5242   match(rRegI);
 5243 
 5244   format %{ "RBX" %}
 5245   interface(REG_INTER);
 5246 %}
 5247 
 5248 operand rcx_RegI()
 5249 %{
 5250   constraint(ALLOC_IN_RC(int_rcx_reg));
 5251   match(RegI);
 5252   match(rRegI);
 5253 
 5254   format %{ "RCX" %}
 5255   interface(REG_INTER);
 5256 %}
 5257 
 5258 operand rdx_RegI()
 5259 %{
 5260   constraint(ALLOC_IN_RC(int_rdx_reg));
 5261   match(RegI);
 5262   match(rRegI);
 5263 
 5264   format %{ "RDX" %}
 5265   interface(REG_INTER);
 5266 %}
 5267 
 5268 operand rdi_RegI()
 5269 %{
 5270   constraint(ALLOC_IN_RC(int_rdi_reg));
 5271   match(RegI);
 5272   match(rRegI);
 5273 
 5274   format %{ "RDI" %}
 5275   interface(REG_INTER);
 5276 %}
 5277 
 5278 operand no_rax_rdx_RegI()
 5279 %{
 5280   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5281   match(RegI);
 5282   match(rbx_RegI);
 5283   match(rcx_RegI);
 5284   match(rdi_RegI);
 5285 
 5286   format %{ %}
 5287   interface(REG_INTER);
 5288 %}
 5289 
 5290 operand no_rbp_r13_RegI()
 5291 %{
 5292   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5293   match(RegI);
 5294   match(rRegI);
 5295   match(rax_RegI);
 5296   match(rbx_RegI);
 5297   match(rcx_RegI);
 5298   match(rdx_RegI);
 5299   match(rdi_RegI);
 5300 
 5301   format %{ %}
 5302   interface(REG_INTER);
 5303 %}
 5304 
 5305 // Pointer Register
 5306 operand any_RegP()
 5307 %{
 5308   constraint(ALLOC_IN_RC(any_reg));
 5309   match(RegP);
 5310   match(rax_RegP);
 5311   match(rbx_RegP);
 5312   match(rdi_RegP);
 5313   match(rsi_RegP);
 5314   match(rbp_RegP);
 5315   match(r15_RegP);
 5316   match(rRegP);
 5317 
 5318   format %{ %}
 5319   interface(REG_INTER);
 5320 %}
 5321 
 5322 operand rRegP()
 5323 %{
 5324   constraint(ALLOC_IN_RC(ptr_reg));
 5325   match(RegP);
 5326   match(rax_RegP);
 5327   match(rbx_RegP);
 5328   match(rdi_RegP);
 5329   match(rsi_RegP);
 5330   match(rbp_RegP);  // See Q&A below about
 5331   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5332 
 5333   format %{ %}
 5334   interface(REG_INTER);
 5335 %}
 5336 
 5337 operand rRegN() %{
 5338   constraint(ALLOC_IN_RC(int_reg));
 5339   match(RegN);
 5340 
 5341   format %{ %}
 5342   interface(REG_INTER);
 5343 %}
 5344 
 5345 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5346 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5347 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5348 // The output of an instruction is controlled by the allocator, which respects
 5349 // register class masks, not match rules.  Unless an instruction mentions
 5350 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5351 // by the allocator as an input.
 5352 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5353 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5354 // result, RBP is not included in the output of the instruction either.
 5355 
 5356 // This operand is not allowed to use RBP even if
 5357 // RBP is not used to hold the frame pointer.
 5358 operand no_rbp_RegP()
 5359 %{
 5360   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5361   match(RegP);
 5362   match(rbx_RegP);
 5363   match(rsi_RegP);
 5364   match(rdi_RegP);
 5365 
 5366   format %{ %}
 5367   interface(REG_INTER);
 5368 %}
 5369 
 5370 // Special Registers
 5371 // Return a pointer value
 5372 operand rax_RegP()
 5373 %{
 5374   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5375   match(RegP);
 5376   match(rRegP);
 5377 
 5378   format %{ %}
 5379   interface(REG_INTER);
 5380 %}
 5381 
 5382 // Special Registers
 5383 // Return a compressed pointer value
 5384 operand rax_RegN()
 5385 %{
 5386   constraint(ALLOC_IN_RC(int_rax_reg));
 5387   match(RegN);
 5388   match(rRegN);
 5389 
 5390   format %{ %}
 5391   interface(REG_INTER);
 5392 %}
 5393 
 5394 // Used in AtomicAdd
 5395 operand rbx_RegP()
 5396 %{
 5397   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5398   match(RegP);
 5399   match(rRegP);
 5400 
 5401   format %{ %}
 5402   interface(REG_INTER);
 5403 %}
 5404 
 5405 operand rsi_RegP()
 5406 %{
 5407   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5408   match(RegP);
 5409   match(rRegP);
 5410 
 5411   format %{ %}
 5412   interface(REG_INTER);
 5413 %}
 5414 
 5415 operand rbp_RegP()
 5416 %{
 5417   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5418   match(RegP);
 5419   match(rRegP);
 5420 
 5421   format %{ %}
 5422   interface(REG_INTER);
 5423 %}
 5424 
 5425 // Used in rep stosq
 5426 operand rdi_RegP()
 5427 %{
 5428   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5429   match(RegP);
 5430   match(rRegP);
 5431 
 5432   format %{ %}
 5433   interface(REG_INTER);
 5434 %}
 5435 
 5436 operand r15_RegP()
 5437 %{
 5438   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5439   match(RegP);
 5440   match(rRegP);
 5441 
 5442   format %{ %}
 5443   interface(REG_INTER);
 5444 %}
 5445 
 5446 operand rRegL()
 5447 %{
 5448   constraint(ALLOC_IN_RC(long_reg));
 5449   match(RegL);
 5450   match(rax_RegL);
 5451   match(rdx_RegL);
 5452 
 5453   format %{ %}
 5454   interface(REG_INTER);
 5455 %}
 5456 
 5457 // Special Registers
 5458 operand no_rax_rdx_RegL()
 5459 %{
 5460   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5461   match(RegL);
 5462   match(rRegL);
 5463 
 5464   format %{ %}
 5465   interface(REG_INTER);
 5466 %}
 5467 
 5468 operand rax_RegL()
 5469 %{
 5470   constraint(ALLOC_IN_RC(long_rax_reg));
 5471   match(RegL);
 5472   match(rRegL);
 5473 
 5474   format %{ "RAX" %}
 5475   interface(REG_INTER);
 5476 %}
 5477 
 5478 operand rcx_RegL()
 5479 %{
 5480   constraint(ALLOC_IN_RC(long_rcx_reg));
 5481   match(RegL);
 5482   match(rRegL);
 5483 
 5484   format %{ %}
 5485   interface(REG_INTER);
 5486 %}
 5487 
 5488 operand rdx_RegL()
 5489 %{
 5490   constraint(ALLOC_IN_RC(long_rdx_reg));
 5491   match(RegL);
 5492   match(rRegL);
 5493 
 5494   format %{ %}
 5495   interface(REG_INTER);
 5496 %}
 5497 
 5498 operand r11_RegL()
 5499 %{
 5500   constraint(ALLOC_IN_RC(long_r11_reg));
 5501   match(RegL);
 5502   match(rRegL);
 5503 
 5504   format %{ %}
 5505   interface(REG_INTER);
 5506 %}
 5507 
 5508 operand no_rbp_r13_RegL()
 5509 %{
 5510   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5511   match(RegL);
 5512   match(rRegL);
 5513   match(rax_RegL);
 5514   match(rcx_RegL);
 5515   match(rdx_RegL);
 5516 
 5517   format %{ %}
 5518   interface(REG_INTER);
 5519 %}
 5520 
 5521 // Flags register, used as output of compare instructions
 5522 operand rFlagsReg()
 5523 %{
 5524   constraint(ALLOC_IN_RC(int_flags));
 5525   match(RegFlags);
 5526 
 5527   format %{ "RFLAGS" %}
 5528   interface(REG_INTER);
 5529 %}
 5530 
 5531 // Flags register, used as output of FLOATING POINT compare instructions
 5532 operand rFlagsRegU()
 5533 %{
 5534   constraint(ALLOC_IN_RC(int_flags));
 5535   match(RegFlags);
 5536 
 5537   format %{ "RFLAGS_U" %}
 5538   interface(REG_INTER);
 5539 %}
 5540 
 5541 operand rFlagsRegUCF() %{
 5542   constraint(ALLOC_IN_RC(int_flags));
 5543   match(RegFlags);
 5544   predicate(false);
 5545 
 5546   format %{ "RFLAGS_U_CF" %}
 5547   interface(REG_INTER);
 5548 %}
 5549 
 5550 // Float register operands
 5551 operand regF() %{
 5552    constraint(ALLOC_IN_RC(float_reg));
 5553    match(RegF);
 5554 
 5555    format %{ %}
 5556    interface(REG_INTER);
 5557 %}
 5558 
 5559 // Float register operands
 5560 operand legRegF() %{
 5561    constraint(ALLOC_IN_RC(float_reg_legacy));
 5562    match(RegF);
 5563 
 5564    format %{ %}
 5565    interface(REG_INTER);
 5566 %}
 5567 
 5568 // Float register operands
 5569 operand vlRegF() %{
 5570    constraint(ALLOC_IN_RC(float_reg_vl));
 5571    match(RegF);
 5572 
 5573    format %{ %}
 5574    interface(REG_INTER);
 5575 %}
 5576 
 5577 // Double register operands
 5578 operand regD() %{
 5579    constraint(ALLOC_IN_RC(double_reg));
 5580    match(RegD);
 5581 
 5582    format %{ %}
 5583    interface(REG_INTER);
 5584 %}
 5585 
 5586 // Double register operands
 5587 operand legRegD() %{
 5588    constraint(ALLOC_IN_RC(double_reg_legacy));
 5589    match(RegD);
 5590 
 5591    format %{ %}
 5592    interface(REG_INTER);
 5593 %}
 5594 
 5595 // Double register operands
 5596 operand vlRegD() %{
 5597    constraint(ALLOC_IN_RC(double_reg_vl));
 5598    match(RegD);
 5599 
 5600    format %{ %}
 5601    interface(REG_INTER);
 5602 %}
 5603 
 5604 //----------Memory Operands----------------------------------------------------
 5605 // Direct Memory Operand
 5606 // operand direct(immP addr)
 5607 // %{
 5608 //   match(addr);
 5609 
 5610 //   format %{ "[$addr]" %}
 5611 //   interface(MEMORY_INTER) %{
 5612 //     base(0xFFFFFFFF);
 5613 //     index(0x4);
 5614 //     scale(0x0);
 5615 //     disp($addr);
 5616 //   %}
 5617 // %}
 5618 
 5619 // Indirect Memory Operand
 5620 operand indirect(any_RegP reg)
 5621 %{
 5622   constraint(ALLOC_IN_RC(ptr_reg));
 5623   match(reg);
 5624 
 5625   format %{ "[$reg]" %}
 5626   interface(MEMORY_INTER) %{
 5627     base($reg);
 5628     index(0x4);
 5629     scale(0x0);
 5630     disp(0x0);
 5631   %}
 5632 %}
 5633 
 5634 // Indirect Memory Plus Short Offset Operand
 5635 operand indOffset8(any_RegP reg, immL8 off)
 5636 %{
 5637   constraint(ALLOC_IN_RC(ptr_reg));
 5638   match(AddP reg off);
 5639 
 5640   format %{ "[$reg + $off (8-bit)]" %}
 5641   interface(MEMORY_INTER) %{
 5642     base($reg);
 5643     index(0x4);
 5644     scale(0x0);
 5645     disp($off);
 5646   %}
 5647 %}
 5648 
 5649 // Indirect Memory Plus Long Offset Operand
 5650 operand indOffset32(any_RegP reg, immL32 off)
 5651 %{
 5652   constraint(ALLOC_IN_RC(ptr_reg));
 5653   match(AddP reg off);
 5654 
 5655   format %{ "[$reg + $off (32-bit)]" %}
 5656   interface(MEMORY_INTER) %{
 5657     base($reg);
 5658     index(0x4);
 5659     scale(0x0);
 5660     disp($off);
 5661   %}
 5662 %}
 5663 
 5664 // Indirect Memory Plus Index Register Plus Offset Operand
 5665 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5666 %{
 5667   constraint(ALLOC_IN_RC(ptr_reg));
 5668   match(AddP (AddP reg lreg) off);
 5669 
 5670   op_cost(10);
 5671   format %{"[$reg + $off + $lreg]" %}
 5672   interface(MEMORY_INTER) %{
 5673     base($reg);
 5674     index($lreg);
 5675     scale(0x0);
 5676     disp($off);
 5677   %}
 5678 %}
 5679 
 5680 // Indirect Memory Plus Index Register Plus Offset Operand
 5681 operand indIndex(any_RegP reg, rRegL lreg)
 5682 %{
 5683   constraint(ALLOC_IN_RC(ptr_reg));
 5684   match(AddP reg lreg);
 5685 
 5686   op_cost(10);
 5687   format %{"[$reg + $lreg]" %}
 5688   interface(MEMORY_INTER) %{
 5689     base($reg);
 5690     index($lreg);
 5691     scale(0x0);
 5692     disp(0x0);
 5693   %}
 5694 %}
 5695 
 5696 // Indirect Memory Times Scale Plus Index Register
 5697 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5698 %{
 5699   constraint(ALLOC_IN_RC(ptr_reg));
 5700   match(AddP reg (LShiftL lreg scale));
 5701 
 5702   op_cost(10);
 5703   format %{"[$reg + $lreg << $scale]" %}
 5704   interface(MEMORY_INTER) %{
 5705     base($reg);
 5706     index($lreg);
 5707     scale($scale);
 5708     disp(0x0);
 5709   %}
 5710 %}
 5711 
 5712 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5713 %{
 5714   constraint(ALLOC_IN_RC(ptr_reg));
 5715   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5716   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5717 
 5718   op_cost(10);
 5719   format %{"[$reg + pos $idx << $scale]" %}
 5720   interface(MEMORY_INTER) %{
 5721     base($reg);
 5722     index($idx);
 5723     scale($scale);
 5724     disp(0x0);
 5725   %}
 5726 %}
 5727 
 5728 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5729 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5730 %{
 5731   constraint(ALLOC_IN_RC(ptr_reg));
 5732   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5733 
 5734   op_cost(10);
 5735   format %{"[$reg + $off + $lreg << $scale]" %}
 5736   interface(MEMORY_INTER) %{
 5737     base($reg);
 5738     index($lreg);
 5739     scale($scale);
 5740     disp($off);
 5741   %}
 5742 %}
 5743 
 5744 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5745 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5746 %{
 5747   constraint(ALLOC_IN_RC(ptr_reg));
 5748   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5749   match(AddP (AddP reg (ConvI2L idx)) off);
 5750 
 5751   op_cost(10);
 5752   format %{"[$reg + $off + $idx]" %}
 5753   interface(MEMORY_INTER) %{
 5754     base($reg);
 5755     index($idx);
 5756     scale(0x0);
 5757     disp($off);
 5758   %}
 5759 %}
 5760 
 5761 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5762 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5763 %{
 5764   constraint(ALLOC_IN_RC(ptr_reg));
 5765   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5766   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5767 
 5768   op_cost(10);
 5769   format %{"[$reg + $off + $idx << $scale]" %}
 5770   interface(MEMORY_INTER) %{
 5771     base($reg);
 5772     index($idx);
 5773     scale($scale);
 5774     disp($off);
 5775   %}
 5776 %}
 5777 
 5778 // Indirect Narrow Oop Plus Offset Operand
 5779 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5780 // we can't free r12 even with CompressedOops::base() == nullptr.
 5781 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5782   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5783   constraint(ALLOC_IN_RC(ptr_reg));
 5784   match(AddP (DecodeN reg) off);
 5785 
 5786   op_cost(10);
 5787   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5788   interface(MEMORY_INTER) %{
 5789     base(0xc); // R12
 5790     index($reg);
 5791     scale(0x3);
 5792     disp($off);
 5793   %}
 5794 %}
 5795 
 5796 // Indirect Memory Operand
 5797 operand indirectNarrow(rRegN reg)
 5798 %{
 5799   predicate(CompressedOops::shift() == 0);
 5800   constraint(ALLOC_IN_RC(ptr_reg));
 5801   match(DecodeN reg);
 5802 
 5803   format %{ "[$reg]" %}
 5804   interface(MEMORY_INTER) %{
 5805     base($reg);
 5806     index(0x4);
 5807     scale(0x0);
 5808     disp(0x0);
 5809   %}
 5810 %}
 5811 
 5812 // Indirect Memory Plus Short Offset Operand
 5813 operand indOffset8Narrow(rRegN reg, immL8 off)
 5814 %{
 5815   predicate(CompressedOops::shift() == 0);
 5816   constraint(ALLOC_IN_RC(ptr_reg));
 5817   match(AddP (DecodeN reg) off);
 5818 
 5819   format %{ "[$reg + $off (8-bit)]" %}
 5820   interface(MEMORY_INTER) %{
 5821     base($reg);
 5822     index(0x4);
 5823     scale(0x0);
 5824     disp($off);
 5825   %}
 5826 %}
 5827 
 5828 // Indirect Memory Plus Long Offset Operand
 5829 operand indOffset32Narrow(rRegN reg, immL32 off)
 5830 %{
 5831   predicate(CompressedOops::shift() == 0);
 5832   constraint(ALLOC_IN_RC(ptr_reg));
 5833   match(AddP (DecodeN reg) off);
 5834 
 5835   format %{ "[$reg + $off (32-bit)]" %}
 5836   interface(MEMORY_INTER) %{
 5837     base($reg);
 5838     index(0x4);
 5839     scale(0x0);
 5840     disp($off);
 5841   %}
 5842 %}
 5843 
 5844 // Indirect Memory Plus Index Register Plus Offset Operand
 5845 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5846 %{
 5847   predicate(CompressedOops::shift() == 0);
 5848   constraint(ALLOC_IN_RC(ptr_reg));
 5849   match(AddP (AddP (DecodeN reg) lreg) off);
 5850 
 5851   op_cost(10);
 5852   format %{"[$reg + $off + $lreg]" %}
 5853   interface(MEMORY_INTER) %{
 5854     base($reg);
 5855     index($lreg);
 5856     scale(0x0);
 5857     disp($off);
 5858   %}
 5859 %}
 5860 
 5861 // Indirect Memory Plus Index Register Plus Offset Operand
 5862 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5863 %{
 5864   predicate(CompressedOops::shift() == 0);
 5865   constraint(ALLOC_IN_RC(ptr_reg));
 5866   match(AddP (DecodeN reg) lreg);
 5867 
 5868   op_cost(10);
 5869   format %{"[$reg + $lreg]" %}
 5870   interface(MEMORY_INTER) %{
 5871     base($reg);
 5872     index($lreg);
 5873     scale(0x0);
 5874     disp(0x0);
 5875   %}
 5876 %}
 5877 
 5878 // Indirect Memory Times Scale Plus Index Register
 5879 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5880 %{
 5881   predicate(CompressedOops::shift() == 0);
 5882   constraint(ALLOC_IN_RC(ptr_reg));
 5883   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5884 
 5885   op_cost(10);
 5886   format %{"[$reg + $lreg << $scale]" %}
 5887   interface(MEMORY_INTER) %{
 5888     base($reg);
 5889     index($lreg);
 5890     scale($scale);
 5891     disp(0x0);
 5892   %}
 5893 %}
 5894 
 5895 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5896 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5897 %{
 5898   predicate(CompressedOops::shift() == 0);
 5899   constraint(ALLOC_IN_RC(ptr_reg));
 5900   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5901 
 5902   op_cost(10);
 5903   format %{"[$reg + $off + $lreg << $scale]" %}
 5904   interface(MEMORY_INTER) %{
 5905     base($reg);
 5906     index($lreg);
 5907     scale($scale);
 5908     disp($off);
 5909   %}
 5910 %}
 5911 
 5912 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5913 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5914 %{
 5915   constraint(ALLOC_IN_RC(ptr_reg));
 5916   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5917   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5918 
 5919   op_cost(10);
 5920   format %{"[$reg + $off + $idx]" %}
 5921   interface(MEMORY_INTER) %{
 5922     base($reg);
 5923     index($idx);
 5924     scale(0x0);
 5925     disp($off);
 5926   %}
 5927 %}
 5928 
 5929 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5930 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 5931 %{
 5932   constraint(ALLOC_IN_RC(ptr_reg));
 5933   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5934   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 5935 
 5936   op_cost(10);
 5937   format %{"[$reg + $off + $idx << $scale]" %}
 5938   interface(MEMORY_INTER) %{
 5939     base($reg);
 5940     index($idx);
 5941     scale($scale);
 5942     disp($off);
 5943   %}
 5944 %}
 5945 
 5946 //----------Special Memory Operands--------------------------------------------
 5947 // Stack Slot Operand - This operand is used for loading and storing temporary
 5948 //                      values on the stack where a match requires a value to
 5949 //                      flow through memory.
 5950 operand stackSlotP(sRegP reg)
 5951 %{
 5952   constraint(ALLOC_IN_RC(stack_slots));
 5953   // No match rule because this operand is only generated in matching
 5954 
 5955   format %{ "[$reg]" %}
 5956   interface(MEMORY_INTER) %{
 5957     base(0x4);   // RSP
 5958     index(0x4);  // No Index
 5959     scale(0x0);  // No Scale
 5960     disp($reg);  // Stack Offset
 5961   %}
 5962 %}
 5963 
 5964 operand stackSlotI(sRegI reg)
 5965 %{
 5966   constraint(ALLOC_IN_RC(stack_slots));
 5967   // No match rule because this operand is only generated in matching
 5968 
 5969   format %{ "[$reg]" %}
 5970   interface(MEMORY_INTER) %{
 5971     base(0x4);   // RSP
 5972     index(0x4);  // No Index
 5973     scale(0x0);  // No Scale
 5974     disp($reg);  // Stack Offset
 5975   %}
 5976 %}
 5977 
 5978 operand stackSlotF(sRegF reg)
 5979 %{
 5980   constraint(ALLOC_IN_RC(stack_slots));
 5981   // No match rule because this operand is only generated in matching
 5982 
 5983   format %{ "[$reg]" %}
 5984   interface(MEMORY_INTER) %{
 5985     base(0x4);   // RSP
 5986     index(0x4);  // No Index
 5987     scale(0x0);  // No Scale
 5988     disp($reg);  // Stack Offset
 5989   %}
 5990 %}
 5991 
 5992 operand stackSlotD(sRegD reg)
 5993 %{
 5994   constraint(ALLOC_IN_RC(stack_slots));
 5995   // No match rule because this operand is only generated in matching
 5996 
 5997   format %{ "[$reg]" %}
 5998   interface(MEMORY_INTER) %{
 5999     base(0x4);   // RSP
 6000     index(0x4);  // No Index
 6001     scale(0x0);  // No Scale
 6002     disp($reg);  // Stack Offset
 6003   %}
 6004 %}
 6005 operand stackSlotL(sRegL reg)
 6006 %{
 6007   constraint(ALLOC_IN_RC(stack_slots));
 6008   // No match rule because this operand is only generated in matching
 6009 
 6010   format %{ "[$reg]" %}
 6011   interface(MEMORY_INTER) %{
 6012     base(0x4);   // RSP
 6013     index(0x4);  // No Index
 6014     scale(0x0);  // No Scale
 6015     disp($reg);  // Stack Offset
 6016   %}
 6017 %}
 6018 
 6019 //----------Conditional Branch Operands----------------------------------------
 6020 // Comparison Op  - This is the operation of the comparison, and is limited to
 6021 //                  the following set of codes:
 6022 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 6023 //
 6024 // Other attributes of the comparison, such as unsignedness, are specified
 6025 // by the comparison instruction that sets a condition code flags register.
 6026 // That result is represented by a flags operand whose subtype is appropriate
 6027 // to the unsignedness (etc.) of the comparison.
 6028 //
 6029 // Later, the instruction which matches both the Comparison Op (a Bool) and
 6030 // the flags (produced by the Cmp) specifies the coding of the comparison op
 6031 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 6032 
 6033 // Comparison Code
 6034 operand cmpOp()
 6035 %{
 6036   match(Bool);
 6037 
 6038   format %{ "" %}
 6039   interface(COND_INTER) %{
 6040     equal(0x4, "e");
 6041     not_equal(0x5, "ne");
 6042     less(0xC, "l");
 6043     greater_equal(0xD, "ge");
 6044     less_equal(0xE, "le");
 6045     greater(0xF, "g");
 6046     overflow(0x0, "o");
 6047     no_overflow(0x1, "no");
 6048   %}
 6049 %}
 6050 
 6051 // Comparison Code, unsigned compare.  Used by FP also, with
 6052 // C2 (unordered) turned into GT or LT already.  The other bits
 6053 // C0 and C3 are turned into Carry & Zero flags.
 6054 operand cmpOpU()
 6055 %{
 6056   match(Bool);
 6057 
 6058   format %{ "" %}
 6059   interface(COND_INTER) %{
 6060     equal(0x4, "e");
 6061     not_equal(0x5, "ne");
 6062     less(0x2, "b");
 6063     greater_equal(0x3, "ae");
 6064     less_equal(0x6, "be");
 6065     greater(0x7, "a");
 6066     overflow(0x0, "o");
 6067     no_overflow(0x1, "no");
 6068   %}
 6069 %}
 6070 
 6071 
 6072 // Floating comparisons that don't require any fixup for the unordered case,
 6073 // If both inputs of the comparison are the same, ZF is always set so we
 6074 // don't need to use cmpOpUCF2 for eq/ne
 6075 operand cmpOpUCF() %{
 6076   match(Bool);
 6077   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 6078             n->as_Bool()->_test._test == BoolTest::ge ||
 6079             n->as_Bool()->_test._test == BoolTest::le ||
 6080             n->as_Bool()->_test._test == BoolTest::gt ||
 6081             n->in(1)->in(1) == n->in(1)->in(2));
 6082   format %{ "" %}
 6083   interface(COND_INTER) %{
 6084     equal(0xb, "np");
 6085     not_equal(0xa, "p");
 6086     less(0x2, "b");
 6087     greater_equal(0x3, "ae");
 6088     less_equal(0x6, "be");
 6089     greater(0x7, "a");
 6090     overflow(0x0, "o");
 6091     no_overflow(0x1, "no");
 6092   %}
 6093 %}
 6094 
 6095 
 6096 // Floating comparisons that can be fixed up with extra conditional jumps
 6097 operand cmpOpUCF2() %{
 6098   match(Bool);
 6099   predicate((n->as_Bool()->_test._test == BoolTest::ne ||
 6100              n->as_Bool()->_test._test == BoolTest::eq) &&
 6101             n->in(1)->in(1) != n->in(1)->in(2));
 6102   format %{ "" %}
 6103   interface(COND_INTER) %{
 6104     equal(0x4, "e");
 6105     not_equal(0x5, "ne");
 6106     less(0x2, "b");
 6107     greater_equal(0x3, "ae");
 6108     less_equal(0x6, "be");
 6109     greater(0x7, "a");
 6110     overflow(0x0, "o");
 6111     no_overflow(0x1, "no");
 6112   %}
 6113 %}
 6114 
 6115 // Operands for bound floating pointer register arguments
 6116 operand rxmm0() %{
 6117   constraint(ALLOC_IN_RC(xmm0_reg));
 6118   match(VecX);
 6119   format%{%}
 6120   interface(REG_INTER);
 6121 %}
 6122 
 6123 // Vectors
 6124 
 6125 // Dummy generic vector class. Should be used for all vector operands.
 6126 // Replaced with vec[SDXYZ] during post-selection pass.
 6127 operand vec() %{
 6128   constraint(ALLOC_IN_RC(dynamic));
 6129   match(VecX);
 6130   match(VecY);
 6131   match(VecZ);
 6132   match(VecS);
 6133   match(VecD);
 6134 
 6135   format %{ %}
 6136   interface(REG_INTER);
 6137 %}
 6138 
 6139 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6140 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6141 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6142 // runtime code generation via reg_class_dynamic.
 6143 operand legVec() %{
 6144   constraint(ALLOC_IN_RC(dynamic));
 6145   match(VecX);
 6146   match(VecY);
 6147   match(VecZ);
 6148   match(VecS);
 6149   match(VecD);
 6150 
 6151   format %{ %}
 6152   interface(REG_INTER);
 6153 %}
 6154 
 6155 // Replaces vec during post-selection cleanup. See above.
 6156 operand vecS() %{
 6157   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6158   match(VecS);
 6159 
 6160   format %{ %}
 6161   interface(REG_INTER);
 6162 %}
 6163 
 6164 // Replaces legVec during post-selection cleanup. See above.
 6165 operand legVecS() %{
 6166   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6167   match(VecS);
 6168 
 6169   format %{ %}
 6170   interface(REG_INTER);
 6171 %}
 6172 
 6173 // Replaces vec during post-selection cleanup. See above.
 6174 operand vecD() %{
 6175   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6176   match(VecD);
 6177 
 6178   format %{ %}
 6179   interface(REG_INTER);
 6180 %}
 6181 
 6182 // Replaces legVec during post-selection cleanup. See above.
 6183 operand legVecD() %{
 6184   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6185   match(VecD);
 6186 
 6187   format %{ %}
 6188   interface(REG_INTER);
 6189 %}
 6190 
 6191 // Replaces vec during post-selection cleanup. See above.
 6192 operand vecX() %{
 6193   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6194   match(VecX);
 6195 
 6196   format %{ %}
 6197   interface(REG_INTER);
 6198 %}
 6199 
 6200 // Replaces legVec during post-selection cleanup. See above.
 6201 operand legVecX() %{
 6202   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6203   match(VecX);
 6204 
 6205   format %{ %}
 6206   interface(REG_INTER);
 6207 %}
 6208 
 6209 // Replaces vec during post-selection cleanup. See above.
 6210 operand vecY() %{
 6211   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6212   match(VecY);
 6213 
 6214   format %{ %}
 6215   interface(REG_INTER);
 6216 %}
 6217 
 6218 // Replaces legVec during post-selection cleanup. See above.
 6219 operand legVecY() %{
 6220   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6221   match(VecY);
 6222 
 6223   format %{ %}
 6224   interface(REG_INTER);
 6225 %}
 6226 
 6227 // Replaces vec during post-selection cleanup. See above.
 6228 operand vecZ() %{
 6229   constraint(ALLOC_IN_RC(vectorz_reg));
 6230   match(VecZ);
 6231 
 6232   format %{ %}
 6233   interface(REG_INTER);
 6234 %}
 6235 
 6236 // Replaces legVec during post-selection cleanup. See above.
 6237 operand legVecZ() %{
 6238   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6239   match(VecZ);
 6240 
 6241   format %{ %}
 6242   interface(REG_INTER);
 6243 %}
 6244 
 6245 //----------OPERAND CLASSES----------------------------------------------------
 6246 // Operand Classes are groups of operands that are used as to simplify
 6247 // instruction definitions by not requiring the AD writer to specify separate
 6248 // instructions for every form of operand when the instruction accepts
 6249 // multiple operand types with the same basic encoding and format.  The classic
 6250 // case of this is memory operands.
 6251 
 6252 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6253                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6254                indCompressedOopOffset,
 6255                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6256                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6257                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6258 
 6259 //----------PIPELINE-----------------------------------------------------------
 6260 // Rules which define the behavior of the target architectures pipeline.
 6261 pipeline %{
 6262 
 6263 //----------ATTRIBUTES---------------------------------------------------------
 6264 attributes %{
 6265   variable_size_instructions;        // Fixed size instructions
 6266   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6267   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6268   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6269   instruction_fetch_units = 1;       // of 16 bytes
 6270 %}
 6271 
 6272 //----------RESOURCES----------------------------------------------------------
 6273 // Resources are the functional units available to the machine
 6274 
 6275 // Generic P2/P3 pipeline
 6276 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6277 // 3 instructions decoded per cycle.
 6278 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6279 // 3 ALU op, only ALU0 handles mul instructions.
 6280 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6281            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6282            BR, FPU,
 6283            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6284 
 6285 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6286 // Pipeline Description specifies the stages in the machine's pipeline
 6287 
 6288 // Generic P2/P3 pipeline
 6289 pipe_desc(S0, S1, S2, S3, S4, S5);
 6290 
 6291 //----------PIPELINE CLASSES---------------------------------------------------
 6292 // Pipeline Classes describe the stages in which input and output are
 6293 // referenced by the hardware pipeline.
 6294 
 6295 // Naming convention: ialu or fpu
 6296 // Then: _reg
 6297 // Then: _reg if there is a 2nd register
 6298 // Then: _long if it's a pair of instructions implementing a long
 6299 // Then: _fat if it requires the big decoder
 6300 //   Or: _mem if it requires the big decoder and a memory unit.
 6301 
 6302 // Integer ALU reg operation
 6303 pipe_class ialu_reg(rRegI dst)
 6304 %{
 6305     single_instruction;
 6306     dst    : S4(write);
 6307     dst    : S3(read);
 6308     DECODE : S0;        // any decoder
 6309     ALU    : S3;        // any alu
 6310 %}
 6311 
 6312 // Long ALU reg operation
 6313 pipe_class ialu_reg_long(rRegL dst)
 6314 %{
 6315     instruction_count(2);
 6316     dst    : S4(write);
 6317     dst    : S3(read);
 6318     DECODE : S0(2);     // any 2 decoders
 6319     ALU    : S3(2);     // both alus
 6320 %}
 6321 
 6322 // Integer ALU reg operation using big decoder
 6323 pipe_class ialu_reg_fat(rRegI dst)
 6324 %{
 6325     single_instruction;
 6326     dst    : S4(write);
 6327     dst    : S3(read);
 6328     D0     : S0;        // big decoder only
 6329     ALU    : S3;        // any alu
 6330 %}
 6331 
 6332 // Integer ALU reg-reg operation
 6333 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6334 %{
 6335     single_instruction;
 6336     dst    : S4(write);
 6337     src    : S3(read);
 6338     DECODE : S0;        // any decoder
 6339     ALU    : S3;        // any alu
 6340 %}
 6341 
 6342 // Integer ALU reg-reg operation
 6343 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6344 %{
 6345     single_instruction;
 6346     dst    : S4(write);
 6347     src    : S3(read);
 6348     D0     : S0;        // big decoder only
 6349     ALU    : S3;        // any alu
 6350 %}
 6351 
 6352 // Integer ALU reg-mem operation
 6353 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6354 %{
 6355     single_instruction;
 6356     dst    : S5(write);
 6357     mem    : S3(read);
 6358     D0     : S0;        // big decoder only
 6359     ALU    : S4;        // any alu
 6360     MEM    : S3;        // any mem
 6361 %}
 6362 
 6363 // Integer mem operation (prefetch)
 6364 pipe_class ialu_mem(memory mem)
 6365 %{
 6366     single_instruction;
 6367     mem    : S3(read);
 6368     D0     : S0;        // big decoder only
 6369     MEM    : S3;        // any mem
 6370 %}
 6371 
 6372 // Integer Store to Memory
 6373 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6374 %{
 6375     single_instruction;
 6376     mem    : S3(read);
 6377     src    : S5(read);
 6378     D0     : S0;        // big decoder only
 6379     ALU    : S4;        // any alu
 6380     MEM    : S3;
 6381 %}
 6382 
 6383 // // Long Store to Memory
 6384 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6385 // %{
 6386 //     instruction_count(2);
 6387 //     mem    : S3(read);
 6388 //     src    : S5(read);
 6389 //     D0     : S0(2);          // big decoder only; twice
 6390 //     ALU    : S4(2);     // any 2 alus
 6391 //     MEM    : S3(2);  // Both mems
 6392 // %}
 6393 
 6394 // Integer Store to Memory
 6395 pipe_class ialu_mem_imm(memory mem)
 6396 %{
 6397     single_instruction;
 6398     mem    : S3(read);
 6399     D0     : S0;        // big decoder only
 6400     ALU    : S4;        // any alu
 6401     MEM    : S3;
 6402 %}
 6403 
 6404 // Integer ALU0 reg-reg operation
 6405 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6406 %{
 6407     single_instruction;
 6408     dst    : S4(write);
 6409     src    : S3(read);
 6410     D0     : S0;        // Big decoder only
 6411     ALU0   : S3;        // only alu0
 6412 %}
 6413 
 6414 // Integer ALU0 reg-mem operation
 6415 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6416 %{
 6417     single_instruction;
 6418     dst    : S5(write);
 6419     mem    : S3(read);
 6420     D0     : S0;        // big decoder only
 6421     ALU0   : S4;        // ALU0 only
 6422     MEM    : S3;        // any mem
 6423 %}
 6424 
 6425 // Integer ALU reg-reg operation
 6426 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6427 %{
 6428     single_instruction;
 6429     cr     : S4(write);
 6430     src1   : S3(read);
 6431     src2   : S3(read);
 6432     DECODE : S0;        // any decoder
 6433     ALU    : S3;        // any alu
 6434 %}
 6435 
 6436 // Integer ALU reg-imm operation
 6437 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6438 %{
 6439     single_instruction;
 6440     cr     : S4(write);
 6441     src1   : S3(read);
 6442     DECODE : S0;        // any decoder
 6443     ALU    : S3;        // any alu
 6444 %}
 6445 
 6446 // Integer ALU reg-mem operation
 6447 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6448 %{
 6449     single_instruction;
 6450     cr     : S4(write);
 6451     src1   : S3(read);
 6452     src2   : S3(read);
 6453     D0     : S0;        // big decoder only
 6454     ALU    : S4;        // any alu
 6455     MEM    : S3;
 6456 %}
 6457 
 6458 // Conditional move reg-reg
 6459 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6460 %{
 6461     instruction_count(4);
 6462     y      : S4(read);
 6463     q      : S3(read);
 6464     p      : S3(read);
 6465     DECODE : S0(4);     // any decoder
 6466 %}
 6467 
 6468 // Conditional move reg-reg
 6469 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6470 %{
 6471     single_instruction;
 6472     dst    : S4(write);
 6473     src    : S3(read);
 6474     cr     : S3(read);
 6475     DECODE : S0;        // any decoder
 6476 %}
 6477 
 6478 // Conditional move reg-mem
 6479 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6480 %{
 6481     single_instruction;
 6482     dst    : S4(write);
 6483     src    : S3(read);
 6484     cr     : S3(read);
 6485     DECODE : S0;        // any decoder
 6486     MEM    : S3;
 6487 %}
 6488 
 6489 // Conditional move reg-reg long
 6490 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6491 %{
 6492     single_instruction;
 6493     dst    : S4(write);
 6494     src    : S3(read);
 6495     cr     : S3(read);
 6496     DECODE : S0(2);     // any 2 decoders
 6497 %}
 6498 
 6499 // Float reg-reg operation
 6500 pipe_class fpu_reg(regD dst)
 6501 %{
 6502     instruction_count(2);
 6503     dst    : S3(read);
 6504     DECODE : S0(2);     // any 2 decoders
 6505     FPU    : S3;
 6506 %}
 6507 
 6508 // Float reg-reg operation
 6509 pipe_class fpu_reg_reg(regD dst, regD src)
 6510 %{
 6511     instruction_count(2);
 6512     dst    : S4(write);
 6513     src    : S3(read);
 6514     DECODE : S0(2);     // any 2 decoders
 6515     FPU    : S3;
 6516 %}
 6517 
 6518 // Float reg-reg operation
 6519 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6520 %{
 6521     instruction_count(3);
 6522     dst    : S4(write);
 6523     src1   : S3(read);
 6524     src2   : S3(read);
 6525     DECODE : S0(3);     // any 3 decoders
 6526     FPU    : S3(2);
 6527 %}
 6528 
 6529 // Float reg-reg operation
 6530 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6531 %{
 6532     instruction_count(4);
 6533     dst    : S4(write);
 6534     src1   : S3(read);
 6535     src2   : S3(read);
 6536     src3   : S3(read);
 6537     DECODE : S0(4);     // any 3 decoders
 6538     FPU    : S3(2);
 6539 %}
 6540 
 6541 // Float reg-reg operation
 6542 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6543 %{
 6544     instruction_count(4);
 6545     dst    : S4(write);
 6546     src1   : S3(read);
 6547     src2   : S3(read);
 6548     src3   : S3(read);
 6549     DECODE : S1(3);     // any 3 decoders
 6550     D0     : S0;        // Big decoder only
 6551     FPU    : S3(2);
 6552     MEM    : S3;
 6553 %}
 6554 
 6555 // Float reg-mem operation
 6556 pipe_class fpu_reg_mem(regD dst, memory mem)
 6557 %{
 6558     instruction_count(2);
 6559     dst    : S5(write);
 6560     mem    : S3(read);
 6561     D0     : S0;        // big decoder only
 6562     DECODE : S1;        // any decoder for FPU POP
 6563     FPU    : S4;
 6564     MEM    : S3;        // any mem
 6565 %}
 6566 
 6567 // Float reg-mem operation
 6568 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6569 %{
 6570     instruction_count(3);
 6571     dst    : S5(write);
 6572     src1   : S3(read);
 6573     mem    : S3(read);
 6574     D0     : S0;        // big decoder only
 6575     DECODE : S1(2);     // any decoder for FPU POP
 6576     FPU    : S4;
 6577     MEM    : S3;        // any mem
 6578 %}
 6579 
 6580 // Float mem-reg operation
 6581 pipe_class fpu_mem_reg(memory mem, regD src)
 6582 %{
 6583     instruction_count(2);
 6584     src    : S5(read);
 6585     mem    : S3(read);
 6586     DECODE : S0;        // any decoder for FPU PUSH
 6587     D0     : S1;        // big decoder only
 6588     FPU    : S4;
 6589     MEM    : S3;        // any mem
 6590 %}
 6591 
 6592 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6593 %{
 6594     instruction_count(3);
 6595     src1   : S3(read);
 6596     src2   : S3(read);
 6597     mem    : S3(read);
 6598     DECODE : S0(2);     // any decoder for FPU PUSH
 6599     D0     : S1;        // big decoder only
 6600     FPU    : S4;
 6601     MEM    : S3;        // any mem
 6602 %}
 6603 
 6604 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6605 %{
 6606     instruction_count(3);
 6607     src1   : S3(read);
 6608     src2   : S3(read);
 6609     mem    : S4(read);
 6610     DECODE : S0;        // any decoder for FPU PUSH
 6611     D0     : S0(2);     // big decoder only
 6612     FPU    : S4;
 6613     MEM    : S3(2);     // any mem
 6614 %}
 6615 
 6616 pipe_class fpu_mem_mem(memory dst, memory src1)
 6617 %{
 6618     instruction_count(2);
 6619     src1   : S3(read);
 6620     dst    : S4(read);
 6621     D0     : S0(2);     // big decoder only
 6622     MEM    : S3(2);     // any mem
 6623 %}
 6624 
 6625 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6626 %{
 6627     instruction_count(3);
 6628     src1   : S3(read);
 6629     src2   : S3(read);
 6630     dst    : S4(read);
 6631     D0     : S0(3);     // big decoder only
 6632     FPU    : S4;
 6633     MEM    : S3(3);     // any mem
 6634 %}
 6635 
 6636 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6637 %{
 6638     instruction_count(3);
 6639     src1   : S4(read);
 6640     mem    : S4(read);
 6641     DECODE : S0;        // any decoder for FPU PUSH
 6642     D0     : S0(2);     // big decoder only
 6643     FPU    : S4;
 6644     MEM    : S3(2);     // any mem
 6645 %}
 6646 
 6647 // Float load constant
 6648 pipe_class fpu_reg_con(regD dst)
 6649 %{
 6650     instruction_count(2);
 6651     dst    : S5(write);
 6652     D0     : S0;        // big decoder only for the load
 6653     DECODE : S1;        // any decoder for FPU POP
 6654     FPU    : S4;
 6655     MEM    : S3;        // any mem
 6656 %}
 6657 
 6658 // Float load constant
 6659 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6660 %{
 6661     instruction_count(3);
 6662     dst    : S5(write);
 6663     src    : S3(read);
 6664     D0     : S0;        // big decoder only for the load
 6665     DECODE : S1(2);     // any decoder for FPU POP
 6666     FPU    : S4;
 6667     MEM    : S3;        // any mem
 6668 %}
 6669 
 6670 // UnConditional branch
 6671 pipe_class pipe_jmp(label labl)
 6672 %{
 6673     single_instruction;
 6674     BR   : S3;
 6675 %}
 6676 
 6677 // Conditional branch
 6678 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6679 %{
 6680     single_instruction;
 6681     cr    : S1(read);
 6682     BR    : S3;
 6683 %}
 6684 
 6685 // Allocation idiom
 6686 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6687 %{
 6688     instruction_count(1); force_serialization;
 6689     fixed_latency(6);
 6690     heap_ptr : S3(read);
 6691     DECODE   : S0(3);
 6692     D0       : S2;
 6693     MEM      : S3;
 6694     ALU      : S3(2);
 6695     dst      : S5(write);
 6696     BR       : S5;
 6697 %}
 6698 
 6699 // Generic big/slow expanded idiom
 6700 pipe_class pipe_slow()
 6701 %{
 6702     instruction_count(10); multiple_bundles; force_serialization;
 6703     fixed_latency(100);
 6704     D0  : S0(2);
 6705     MEM : S3(2);
 6706 %}
 6707 
 6708 // The real do-nothing guy
 6709 pipe_class empty()
 6710 %{
 6711     instruction_count(0);
 6712 %}
 6713 
 6714 // Define the class for the Nop node
 6715 define
 6716 %{
 6717    MachNop = empty;
 6718 %}
 6719 
 6720 %}
 6721 
 6722 //----------INSTRUCTIONS-------------------------------------------------------
 6723 //
 6724 // match      -- States which machine-independent subtree may be replaced
 6725 //               by this instruction.
 6726 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6727 //               selection to identify a minimum cost tree of machine
 6728 //               instructions that matches a tree of machine-independent
 6729 //               instructions.
 6730 // format     -- A string providing the disassembly for this instruction.
 6731 //               The value of an instruction's operand may be inserted
 6732 //               by referring to it with a '$' prefix.
 6733 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6734 //               to within an encode class as $primary, $secondary, and $tertiary
 6735 //               rrspectively.  The primary opcode is commonly used to
 6736 //               indicate the type of machine instruction, while secondary
 6737 //               and tertiary are often used for prefix options or addressing
 6738 //               modes.
 6739 // ins_encode -- A list of encode classes with parameters. The encode class
 6740 //               name must have been defined in an 'enc_class' specification
 6741 //               in the encode section of the architecture description.
 6742 
 6743 // ============================================================================
 6744 
 6745 instruct ShouldNotReachHere() %{
 6746   match(Halt);
 6747   format %{ "stop\t# ShouldNotReachHere" %}
 6748   ins_encode %{
 6749     if (is_reachable()) {
 6750       const char* str = __ code_string(_halt_reason);
 6751       __ stop(str);
 6752     }
 6753   %}
 6754   ins_pipe(pipe_slow);
 6755 %}
 6756 
 6757 // ============================================================================
 6758 
 6759 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6760 // Load Float
 6761 instruct MoveF2VL(vlRegF dst, regF src) %{
 6762   match(Set dst src);
 6763   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6764   ins_encode %{
 6765     ShouldNotReachHere();
 6766   %}
 6767   ins_pipe( fpu_reg_reg );
 6768 %}
 6769 
 6770 // Load Float
 6771 instruct MoveF2LEG(legRegF dst, regF src) %{
 6772   match(Set dst src);
 6773   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6774   ins_encode %{
 6775     ShouldNotReachHere();
 6776   %}
 6777   ins_pipe( fpu_reg_reg );
 6778 %}
 6779 
 6780 // Load Float
 6781 instruct MoveVL2F(regF dst, vlRegF src) %{
 6782   match(Set dst src);
 6783   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6784   ins_encode %{
 6785     ShouldNotReachHere();
 6786   %}
 6787   ins_pipe( fpu_reg_reg );
 6788 %}
 6789 
 6790 // Load Float
 6791 instruct MoveLEG2F(regF dst, legRegF src) %{
 6792   match(Set dst src);
 6793   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6794   ins_encode %{
 6795     ShouldNotReachHere();
 6796   %}
 6797   ins_pipe( fpu_reg_reg );
 6798 %}
 6799 
 6800 // Load Double
 6801 instruct MoveD2VL(vlRegD dst, regD src) %{
 6802   match(Set dst src);
 6803   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6804   ins_encode %{
 6805     ShouldNotReachHere();
 6806   %}
 6807   ins_pipe( fpu_reg_reg );
 6808 %}
 6809 
 6810 // Load Double
 6811 instruct MoveD2LEG(legRegD dst, regD src) %{
 6812   match(Set dst src);
 6813   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6814   ins_encode %{
 6815     ShouldNotReachHere();
 6816   %}
 6817   ins_pipe( fpu_reg_reg );
 6818 %}
 6819 
 6820 // Load Double
 6821 instruct MoveVL2D(regD dst, vlRegD src) %{
 6822   match(Set dst src);
 6823   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6824   ins_encode %{
 6825     ShouldNotReachHere();
 6826   %}
 6827   ins_pipe( fpu_reg_reg );
 6828 %}
 6829 
 6830 // Load Double
 6831 instruct MoveLEG2D(regD dst, legRegD src) %{
 6832   match(Set dst src);
 6833   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6834   ins_encode %{
 6835     ShouldNotReachHere();
 6836   %}
 6837   ins_pipe( fpu_reg_reg );
 6838 %}
 6839 
 6840 //----------Load/Store/Move Instructions---------------------------------------
 6841 //----------Load Instructions--------------------------------------------------
 6842 
 6843 // Load Byte (8 bit signed)
 6844 instruct loadB(rRegI dst, memory mem)
 6845 %{
 6846   match(Set dst (LoadB mem));
 6847 
 6848   ins_cost(125);
 6849   format %{ "movsbl  $dst, $mem\t# byte" %}
 6850 
 6851   ins_encode %{
 6852     __ movsbl($dst$$Register, $mem$$Address);
 6853   %}
 6854 
 6855   ins_pipe(ialu_reg_mem);
 6856 %}
 6857 
 6858 // Load Byte (8 bit signed) into Long Register
 6859 instruct loadB2L(rRegL dst, memory mem)
 6860 %{
 6861   match(Set dst (ConvI2L (LoadB mem)));
 6862 
 6863   ins_cost(125);
 6864   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6865 
 6866   ins_encode %{
 6867     __ movsbq($dst$$Register, $mem$$Address);
 6868   %}
 6869 
 6870   ins_pipe(ialu_reg_mem);
 6871 %}
 6872 
 6873 // Load Unsigned Byte (8 bit UNsigned)
 6874 instruct loadUB(rRegI dst, memory mem)
 6875 %{
 6876   match(Set dst (LoadUB mem));
 6877 
 6878   ins_cost(125);
 6879   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6880 
 6881   ins_encode %{
 6882     __ movzbl($dst$$Register, $mem$$Address);
 6883   %}
 6884 
 6885   ins_pipe(ialu_reg_mem);
 6886 %}
 6887 
 6888 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6889 instruct loadUB2L(rRegL dst, memory mem)
 6890 %{
 6891   match(Set dst (ConvI2L (LoadUB mem)));
 6892 
 6893   ins_cost(125);
 6894   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 6895 
 6896   ins_encode %{
 6897     __ movzbq($dst$$Register, $mem$$Address);
 6898   %}
 6899 
 6900   ins_pipe(ialu_reg_mem);
 6901 %}
 6902 
 6903 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 6904 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 6905   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 6906   effect(KILL cr);
 6907 
 6908   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 6909             "andl    $dst, right_n_bits($mask, 8)" %}
 6910   ins_encode %{
 6911     Register Rdst = $dst$$Register;
 6912     __ movzbq(Rdst, $mem$$Address);
 6913     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 6914   %}
 6915   ins_pipe(ialu_reg_mem);
 6916 %}
 6917 
 6918 // Load Short (16 bit signed)
 6919 instruct loadS(rRegI dst, memory mem)
 6920 %{
 6921   match(Set dst (LoadS mem));
 6922 
 6923   ins_cost(125);
 6924   format %{ "movswl $dst, $mem\t# short" %}
 6925 
 6926   ins_encode %{
 6927     __ movswl($dst$$Register, $mem$$Address);
 6928   %}
 6929 
 6930   ins_pipe(ialu_reg_mem);
 6931 %}
 6932 
 6933 // Load Short (16 bit signed) to Byte (8 bit signed)
 6934 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6935   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 6936 
 6937   ins_cost(125);
 6938   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 6939   ins_encode %{
 6940     __ movsbl($dst$$Register, $mem$$Address);
 6941   %}
 6942   ins_pipe(ialu_reg_mem);
 6943 %}
 6944 
 6945 // Load Short (16 bit signed) into Long Register
 6946 instruct loadS2L(rRegL dst, memory mem)
 6947 %{
 6948   match(Set dst (ConvI2L (LoadS mem)));
 6949 
 6950   ins_cost(125);
 6951   format %{ "movswq $dst, $mem\t# short -> long" %}
 6952 
 6953   ins_encode %{
 6954     __ movswq($dst$$Register, $mem$$Address);
 6955   %}
 6956 
 6957   ins_pipe(ialu_reg_mem);
 6958 %}
 6959 
 6960 // Load Unsigned Short/Char (16 bit UNsigned)
 6961 instruct loadUS(rRegI dst, memory mem)
 6962 %{
 6963   match(Set dst (LoadUS mem));
 6964 
 6965   ins_cost(125);
 6966   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 6967 
 6968   ins_encode %{
 6969     __ movzwl($dst$$Register, $mem$$Address);
 6970   %}
 6971 
 6972   ins_pipe(ialu_reg_mem);
 6973 %}
 6974 
 6975 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 6976 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6977   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 6978 
 6979   ins_cost(125);
 6980   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 6981   ins_encode %{
 6982     __ movsbl($dst$$Register, $mem$$Address);
 6983   %}
 6984   ins_pipe(ialu_reg_mem);
 6985 %}
 6986 
 6987 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 6988 instruct loadUS2L(rRegL dst, memory mem)
 6989 %{
 6990   match(Set dst (ConvI2L (LoadUS mem)));
 6991 
 6992   ins_cost(125);
 6993   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 6994 
 6995   ins_encode %{
 6996     __ movzwq($dst$$Register, $mem$$Address);
 6997   %}
 6998 
 6999   ins_pipe(ialu_reg_mem);
 7000 %}
 7001 
 7002 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 7003 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7004   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7005 
 7006   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 7007   ins_encode %{
 7008     __ movzbq($dst$$Register, $mem$$Address);
 7009   %}
 7010   ins_pipe(ialu_reg_mem);
 7011 %}
 7012 
 7013 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 7014 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7015   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7016   effect(KILL cr);
 7017 
 7018   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 7019             "andl    $dst, right_n_bits($mask, 16)" %}
 7020   ins_encode %{
 7021     Register Rdst = $dst$$Register;
 7022     __ movzwq(Rdst, $mem$$Address);
 7023     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 7024   %}
 7025   ins_pipe(ialu_reg_mem);
 7026 %}
 7027 
 7028 // Load Integer
 7029 instruct loadI(rRegI dst, memory mem)
 7030 %{
 7031   match(Set dst (LoadI mem));
 7032 
 7033   ins_cost(125);
 7034   format %{ "movl    $dst, $mem\t# int" %}
 7035 
 7036   ins_encode %{
 7037     __ movl($dst$$Register, $mem$$Address);
 7038   %}
 7039 
 7040   ins_pipe(ialu_reg_mem);
 7041 %}
 7042 
 7043 // Load Integer (32 bit signed) to Byte (8 bit signed)
 7044 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7045   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 7046 
 7047   ins_cost(125);
 7048   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 7049   ins_encode %{
 7050     __ movsbl($dst$$Register, $mem$$Address);
 7051   %}
 7052   ins_pipe(ialu_reg_mem);
 7053 %}
 7054 
 7055 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 7056 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 7057   match(Set dst (AndI (LoadI mem) mask));
 7058 
 7059   ins_cost(125);
 7060   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 7061   ins_encode %{
 7062     __ movzbl($dst$$Register, $mem$$Address);
 7063   %}
 7064   ins_pipe(ialu_reg_mem);
 7065 %}
 7066 
 7067 // Load Integer (32 bit signed) to Short (16 bit signed)
 7068 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 7069   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 7070 
 7071   ins_cost(125);
 7072   format %{ "movswl  $dst, $mem\t# int -> short" %}
 7073   ins_encode %{
 7074     __ movswl($dst$$Register, $mem$$Address);
 7075   %}
 7076   ins_pipe(ialu_reg_mem);
 7077 %}
 7078 
 7079 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 7080 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 7081   match(Set dst (AndI (LoadI mem) mask));
 7082 
 7083   ins_cost(125);
 7084   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7085   ins_encode %{
 7086     __ movzwl($dst$$Register, $mem$$Address);
 7087   %}
 7088   ins_pipe(ialu_reg_mem);
 7089 %}
 7090 
 7091 // Load Integer into Long Register
 7092 instruct loadI2L(rRegL dst, memory mem)
 7093 %{
 7094   match(Set dst (ConvI2L (LoadI mem)));
 7095 
 7096   ins_cost(125);
 7097   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7098 
 7099   ins_encode %{
 7100     __ movslq($dst$$Register, $mem$$Address);
 7101   %}
 7102 
 7103   ins_pipe(ialu_reg_mem);
 7104 %}
 7105 
 7106 // Load Integer with mask 0xFF into Long Register
 7107 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7108   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7109 
 7110   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7111   ins_encode %{
 7112     __ movzbq($dst$$Register, $mem$$Address);
 7113   %}
 7114   ins_pipe(ialu_reg_mem);
 7115 %}
 7116 
 7117 // Load Integer with mask 0xFFFF into Long Register
 7118 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7119   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7120 
 7121   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7122   ins_encode %{
 7123     __ movzwq($dst$$Register, $mem$$Address);
 7124   %}
 7125   ins_pipe(ialu_reg_mem);
 7126 %}
 7127 
 7128 // Load Integer with a 31-bit mask into Long Register
 7129 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7130   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7131   effect(KILL cr);
 7132 
 7133   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7134             "andl    $dst, $mask" %}
 7135   ins_encode %{
 7136     Register Rdst = $dst$$Register;
 7137     __ movl(Rdst, $mem$$Address);
 7138     __ andl(Rdst, $mask$$constant);
 7139   %}
 7140   ins_pipe(ialu_reg_mem);
 7141 %}
 7142 
 7143 // Load Unsigned Integer into Long Register
 7144 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7145 %{
 7146   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7147 
 7148   ins_cost(125);
 7149   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7150 
 7151   ins_encode %{
 7152     __ movl($dst$$Register, $mem$$Address);
 7153   %}
 7154 
 7155   ins_pipe(ialu_reg_mem);
 7156 %}
 7157 
 7158 // Load Long
 7159 instruct loadL(rRegL dst, memory mem)
 7160 %{
 7161   match(Set dst (LoadL mem));
 7162 
 7163   ins_cost(125);
 7164   format %{ "movq    $dst, $mem\t# long" %}
 7165 
 7166   ins_encode %{
 7167     __ movq($dst$$Register, $mem$$Address);
 7168   %}
 7169 
 7170   ins_pipe(ialu_reg_mem); // XXX
 7171 %}
 7172 
 7173 // Load Range
 7174 instruct loadRange(rRegI dst, memory mem)
 7175 %{
 7176   match(Set dst (LoadRange mem));
 7177 
 7178   ins_cost(125); // XXX
 7179   format %{ "movl    $dst, $mem\t# range" %}
 7180   ins_encode %{
 7181     __ movl($dst$$Register, $mem$$Address);
 7182   %}
 7183   ins_pipe(ialu_reg_mem);
 7184 %}
 7185 
 7186 // Load Pointer
 7187 instruct loadP(rRegP dst, memory mem)
 7188 %{
 7189   match(Set dst (LoadP mem));
 7190   predicate(n->as_Load()->barrier_data() == 0);
 7191 
 7192   ins_cost(125); // XXX
 7193   format %{ "movq    $dst, $mem\t# ptr" %}
 7194   ins_encode %{
 7195     __ movq($dst$$Register, $mem$$Address);
 7196   %}
 7197   ins_pipe(ialu_reg_mem); // XXX
 7198 %}
 7199 
 7200 // Load Compressed Pointer
 7201 instruct loadN(rRegN dst, memory mem)
 7202 %{
 7203    predicate(n->as_Load()->barrier_data() == 0);
 7204    match(Set dst (LoadN mem));
 7205 
 7206    ins_cost(125); // XXX
 7207    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7208    ins_encode %{
 7209      __ movl($dst$$Register, $mem$$Address);
 7210    %}
 7211    ins_pipe(ialu_reg_mem); // XXX
 7212 %}
 7213 
 7214 
 7215 // Load Klass Pointer
 7216 instruct loadKlass(rRegP dst, memory mem)
 7217 %{
 7218   match(Set dst (LoadKlass mem));
 7219 
 7220   ins_cost(125); // XXX
 7221   format %{ "movq    $dst, $mem\t# class" %}
 7222   ins_encode %{
 7223     __ movq($dst$$Register, $mem$$Address);
 7224   %}
 7225   ins_pipe(ialu_reg_mem); // XXX
 7226 %}
 7227 
 7228 // Load narrow Klass Pointer
 7229 instruct loadNKlass(rRegN dst, memory mem)
 7230 %{
 7231   predicate(!UseCompactObjectHeaders);
 7232   match(Set dst (LoadNKlass mem));
 7233 
 7234   ins_cost(125); // XXX
 7235   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7236   ins_encode %{
 7237     __ movl($dst$$Register, $mem$$Address);
 7238   %}
 7239   ins_pipe(ialu_reg_mem); // XXX
 7240 %}
 7241 
 7242 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7243 %{
 7244   predicate(UseCompactObjectHeaders);
 7245   match(Set dst (LoadNKlass mem));
 7246   effect(KILL cr);
 7247   ins_cost(125);
 7248   format %{
 7249     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7250     "shrl    $dst, markWord::klass_shift_at_offset"
 7251   %}
 7252   ins_encode %{
 7253     if (UseAPX) {
 7254       __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
 7255     }
 7256     else {
 7257       __ movl($dst$$Register, $mem$$Address);
 7258       __ shrl($dst$$Register, markWord::klass_shift_at_offset);
 7259     }
 7260   %}
 7261   ins_pipe(ialu_reg_mem);
 7262 %}
 7263 
 7264 // Load Float
 7265 instruct loadF(regF dst, memory mem)
 7266 %{
 7267   match(Set dst (LoadF mem));
 7268 
 7269   ins_cost(145); // XXX
 7270   format %{ "movss   $dst, $mem\t# float" %}
 7271   ins_encode %{
 7272     __ movflt($dst$$XMMRegister, $mem$$Address);
 7273   %}
 7274   ins_pipe(pipe_slow); // XXX
 7275 %}
 7276 
 7277 // Load Double
 7278 instruct loadD_partial(regD dst, memory mem)
 7279 %{
 7280   predicate(!UseXmmLoadAndClearUpper);
 7281   match(Set dst (LoadD mem));
 7282 
 7283   ins_cost(145); // XXX
 7284   format %{ "movlpd  $dst, $mem\t# double" %}
 7285   ins_encode %{
 7286     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7287   %}
 7288   ins_pipe(pipe_slow); // XXX
 7289 %}
 7290 
 7291 instruct loadD(regD dst, memory mem)
 7292 %{
 7293   predicate(UseXmmLoadAndClearUpper);
 7294   match(Set dst (LoadD mem));
 7295 
 7296   ins_cost(145); // XXX
 7297   format %{ "movsd   $dst, $mem\t# double" %}
 7298   ins_encode %{
 7299     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7300   %}
 7301   ins_pipe(pipe_slow); // XXX
 7302 %}
 7303 
 7304 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
 7305 %{
 7306   match(Set dst con);
 7307 
 7308   format %{ "leaq  $dst, $con\t# AOT Runtime Constants Address" %}
 7309 
 7310   ins_encode %{
 7311     __ load_aotrc_address($dst$$Register, (address)$con$$constant);
 7312   %}
 7313 
 7314   ins_pipe(ialu_reg_fat);
 7315 %}
 7316 
 7317 // max = java.lang.Math.max(float a, float b)
 7318 instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
 7319   predicate(VM_Version::supports_avx10_2());
 7320   match(Set dst (MaxF a b));
 7321   format %{ "maxF $dst, $a, $b" %}
 7322   ins_encode %{
 7323     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
 7324   %}
 7325   ins_pipe( pipe_slow );
 7326 %}
 7327 
 7328 // max = java.lang.Math.max(float a, float b)
 7329 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7330   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7331   match(Set dst (MaxF a b));
 7332   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7333   format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7334   ins_encode %{
 7335     __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7336   %}
 7337   ins_pipe( pipe_slow );
 7338 %}
 7339 
 7340 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7341   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7342   match(Set dst (MaxF a b));
 7343   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7344 
 7345   format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
 7346   ins_encode %{
 7347     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7348                     false /*min*/, true /*single*/);
 7349   %}
 7350   ins_pipe( pipe_slow );
 7351 %}
 7352 
 7353 // max = java.lang.Math.max(double a, double b)
 7354 instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
 7355   predicate(VM_Version::supports_avx10_2());
 7356   match(Set dst (MaxD a b));
 7357   format %{ "maxD $dst, $a, $b" %}
 7358   ins_encode %{
 7359     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
 7360   %}
 7361   ins_pipe( pipe_slow );
 7362 %}
 7363 
 7364 // max = java.lang.Math.max(double a, double b)
 7365 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7366   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7367   match(Set dst (MaxD a b));
 7368   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7369   format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7370   ins_encode %{
 7371     __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7372   %}
 7373   ins_pipe( pipe_slow );
 7374 %}
 7375 
 7376 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7377   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7378   match(Set dst (MaxD a b));
 7379   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7380 
 7381   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7382   ins_encode %{
 7383     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7384                     false /*min*/, false /*single*/);
 7385   %}
 7386   ins_pipe( pipe_slow );
 7387 %}
 7388 
 7389 // max = java.lang.Math.min(float a, float b)
 7390 instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
 7391   predicate(VM_Version::supports_avx10_2());
 7392   match(Set dst (MinF a b));
 7393   format %{ "minF $dst, $a, $b" %}
 7394   ins_encode %{
 7395     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
 7396   %}
 7397   ins_pipe( pipe_slow );
 7398 %}
 7399 
 7400 // min = java.lang.Math.min(float a, float b)
 7401 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7402   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7403   match(Set dst (MinF a b));
 7404   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7405   format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7406   ins_encode %{
 7407     __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7408   %}
 7409   ins_pipe( pipe_slow );
 7410 %}
 7411 
 7412 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7413   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7414   match(Set dst (MinF a b));
 7415   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7416 
 7417   format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7418   ins_encode %{
 7419     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7420                     true /*min*/, true /*single*/);
 7421   %}
 7422   ins_pipe( pipe_slow );
 7423 %}
 7424 
 7425 // max = java.lang.Math.min(double a, double b)
 7426 instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
 7427   predicate(VM_Version::supports_avx10_2());
 7428   match(Set dst (MinD a b));
 7429   format %{ "minD $dst, $a, $b" %}
 7430   ins_encode %{
 7431     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
 7432   %}
 7433   ins_pipe( pipe_slow );
 7434 %}
 7435 
 7436 // min = java.lang.Math.min(double a, double b)
 7437 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7438   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7439   match(Set dst (MinD a b));
 7440   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7441     format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7442   ins_encode %{
 7443     __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7444   %}
 7445   ins_pipe( pipe_slow );
 7446 %}
 7447 
 7448 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7449   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7450   match(Set dst (MinD a b));
 7451   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7452 
 7453   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7454   ins_encode %{
 7455     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7456                     true /*min*/, false /*single*/);
 7457   %}
 7458   ins_pipe( pipe_slow );
 7459 %}
 7460 
 7461 // Load Effective Address
 7462 instruct leaP8(rRegP dst, indOffset8 mem)
 7463 %{
 7464   match(Set dst mem);
 7465 
 7466   ins_cost(110); // XXX
 7467   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7468   ins_encode %{
 7469     __ leaq($dst$$Register, $mem$$Address);
 7470   %}
 7471   ins_pipe(ialu_reg_reg_fat);
 7472 %}
 7473 
 7474 instruct leaP32(rRegP dst, indOffset32 mem)
 7475 %{
 7476   match(Set dst mem);
 7477 
 7478   ins_cost(110);
 7479   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7480   ins_encode %{
 7481     __ leaq($dst$$Register, $mem$$Address);
 7482   %}
 7483   ins_pipe(ialu_reg_reg_fat);
 7484 %}
 7485 
 7486 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7487 %{
 7488   match(Set dst mem);
 7489 
 7490   ins_cost(110);
 7491   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7492   ins_encode %{
 7493     __ leaq($dst$$Register, $mem$$Address);
 7494   %}
 7495   ins_pipe(ialu_reg_reg_fat);
 7496 %}
 7497 
 7498 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7499 %{
 7500   match(Set dst mem);
 7501 
 7502   ins_cost(110);
 7503   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7504   ins_encode %{
 7505     __ leaq($dst$$Register, $mem$$Address);
 7506   %}
 7507   ins_pipe(ialu_reg_reg_fat);
 7508 %}
 7509 
 7510 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7511 %{
 7512   match(Set dst mem);
 7513 
 7514   ins_cost(110);
 7515   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7516   ins_encode %{
 7517     __ leaq($dst$$Register, $mem$$Address);
 7518   %}
 7519   ins_pipe(ialu_reg_reg_fat);
 7520 %}
 7521 
 7522 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7523 %{
 7524   match(Set dst mem);
 7525 
 7526   ins_cost(110);
 7527   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7528   ins_encode %{
 7529     __ leaq($dst$$Register, $mem$$Address);
 7530   %}
 7531   ins_pipe(ialu_reg_reg_fat);
 7532 %}
 7533 
 7534 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7535 %{
 7536   match(Set dst mem);
 7537 
 7538   ins_cost(110);
 7539   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7540   ins_encode %{
 7541     __ leaq($dst$$Register, $mem$$Address);
 7542   %}
 7543   ins_pipe(ialu_reg_reg_fat);
 7544 %}
 7545 
 7546 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7547 %{
 7548   match(Set dst mem);
 7549 
 7550   ins_cost(110);
 7551   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7552   ins_encode %{
 7553     __ leaq($dst$$Register, $mem$$Address);
 7554   %}
 7555   ins_pipe(ialu_reg_reg_fat);
 7556 %}
 7557 
 7558 // Load Effective Address which uses Narrow (32-bits) oop
 7559 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7560 %{
 7561   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7562   match(Set dst mem);
 7563 
 7564   ins_cost(110);
 7565   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7566   ins_encode %{
 7567     __ leaq($dst$$Register, $mem$$Address);
 7568   %}
 7569   ins_pipe(ialu_reg_reg_fat);
 7570 %}
 7571 
 7572 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7573 %{
 7574   predicate(CompressedOops::shift() == 0);
 7575   match(Set dst mem);
 7576 
 7577   ins_cost(110); // XXX
 7578   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7579   ins_encode %{
 7580     __ leaq($dst$$Register, $mem$$Address);
 7581   %}
 7582   ins_pipe(ialu_reg_reg_fat);
 7583 %}
 7584 
 7585 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7586 %{
 7587   predicate(CompressedOops::shift() == 0);
 7588   match(Set dst mem);
 7589 
 7590   ins_cost(110);
 7591   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7592   ins_encode %{
 7593     __ leaq($dst$$Register, $mem$$Address);
 7594   %}
 7595   ins_pipe(ialu_reg_reg_fat);
 7596 %}
 7597 
 7598 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7599 %{
 7600   predicate(CompressedOops::shift() == 0);
 7601   match(Set dst mem);
 7602 
 7603   ins_cost(110);
 7604   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7605   ins_encode %{
 7606     __ leaq($dst$$Register, $mem$$Address);
 7607   %}
 7608   ins_pipe(ialu_reg_reg_fat);
 7609 %}
 7610 
 7611 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7612 %{
 7613   predicate(CompressedOops::shift() == 0);
 7614   match(Set dst mem);
 7615 
 7616   ins_cost(110);
 7617   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7618   ins_encode %{
 7619     __ leaq($dst$$Register, $mem$$Address);
 7620   %}
 7621   ins_pipe(ialu_reg_reg_fat);
 7622 %}
 7623 
 7624 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7625 %{
 7626   predicate(CompressedOops::shift() == 0);
 7627   match(Set dst mem);
 7628 
 7629   ins_cost(110);
 7630   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7631   ins_encode %{
 7632     __ leaq($dst$$Register, $mem$$Address);
 7633   %}
 7634   ins_pipe(ialu_reg_reg_fat);
 7635 %}
 7636 
 7637 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7638 %{
 7639   predicate(CompressedOops::shift() == 0);
 7640   match(Set dst mem);
 7641 
 7642   ins_cost(110);
 7643   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7644   ins_encode %{
 7645     __ leaq($dst$$Register, $mem$$Address);
 7646   %}
 7647   ins_pipe(ialu_reg_reg_fat);
 7648 %}
 7649 
 7650 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7651 %{
 7652   predicate(CompressedOops::shift() == 0);
 7653   match(Set dst mem);
 7654 
 7655   ins_cost(110);
 7656   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7657   ins_encode %{
 7658     __ leaq($dst$$Register, $mem$$Address);
 7659   %}
 7660   ins_pipe(ialu_reg_reg_fat);
 7661 %}
 7662 
 7663 instruct loadConI(rRegI dst, immI src)
 7664 %{
 7665   match(Set dst src);
 7666 
 7667   format %{ "movl    $dst, $src\t# int" %}
 7668   ins_encode %{
 7669     __ movl($dst$$Register, $src$$constant);
 7670   %}
 7671   ins_pipe(ialu_reg_fat); // XXX
 7672 %}
 7673 
 7674 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7675 %{
 7676   match(Set dst src);
 7677   effect(KILL cr);
 7678 
 7679   ins_cost(50);
 7680   format %{ "xorl    $dst, $dst\t# int" %}
 7681   ins_encode %{
 7682     __ xorl($dst$$Register, $dst$$Register);
 7683   %}
 7684   ins_pipe(ialu_reg);
 7685 %}
 7686 
 7687 instruct loadConL(rRegL dst, immL src)
 7688 %{
 7689   match(Set dst src);
 7690 
 7691   ins_cost(150);
 7692   format %{ "movq    $dst, $src\t# long" %}
 7693   ins_encode %{
 7694     __ mov64($dst$$Register, $src$$constant);
 7695   %}
 7696   ins_pipe(ialu_reg);
 7697 %}
 7698 
 7699 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7700 %{
 7701   match(Set dst src);
 7702   effect(KILL cr);
 7703 
 7704   ins_cost(50);
 7705   format %{ "xorl    $dst, $dst\t# long" %}
 7706   ins_encode %{
 7707     __ xorl($dst$$Register, $dst$$Register);
 7708   %}
 7709   ins_pipe(ialu_reg); // XXX
 7710 %}
 7711 
 7712 instruct loadConUL32(rRegL dst, immUL32 src)
 7713 %{
 7714   match(Set dst src);
 7715 
 7716   ins_cost(60);
 7717   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7718   ins_encode %{
 7719     __ movl($dst$$Register, $src$$constant);
 7720   %}
 7721   ins_pipe(ialu_reg);
 7722 %}
 7723 
 7724 instruct loadConL32(rRegL dst, immL32 src)
 7725 %{
 7726   match(Set dst src);
 7727 
 7728   ins_cost(70);
 7729   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7730   ins_encode %{
 7731     __ movq($dst$$Register, $src$$constant);
 7732   %}
 7733   ins_pipe(ialu_reg);
 7734 %}
 7735 
 7736 instruct loadConP(rRegP dst, immP con) %{
 7737   match(Set dst con);
 7738 
 7739   format %{ "movq    $dst, $con\t# ptr" %}
 7740   ins_encode %{
 7741     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7742   %}
 7743   ins_pipe(ialu_reg_fat); // XXX
 7744 %}
 7745 
 7746 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7747 %{
 7748   match(Set dst src);
 7749   effect(KILL cr);
 7750 
 7751   ins_cost(50);
 7752   format %{ "xorl    $dst, $dst\t# ptr" %}
 7753   ins_encode %{
 7754     __ xorl($dst$$Register, $dst$$Register);
 7755   %}
 7756   ins_pipe(ialu_reg);
 7757 %}
 7758 
 7759 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7760 %{
 7761   match(Set dst src);
 7762   effect(KILL cr);
 7763 
 7764   ins_cost(60);
 7765   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7766   ins_encode %{
 7767     __ movl($dst$$Register, $src$$constant);
 7768   %}
 7769   ins_pipe(ialu_reg);
 7770 %}
 7771 
 7772 instruct loadConF(regF dst, immF con) %{
 7773   match(Set dst con);
 7774   ins_cost(125);
 7775   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7776   ins_encode %{
 7777     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7778   %}
 7779   ins_pipe(pipe_slow);
 7780 %}
 7781 
 7782 instruct loadConH(regF dst, immH con) %{
 7783   match(Set dst con);
 7784   ins_cost(125);
 7785   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7786   ins_encode %{
 7787     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7788   %}
 7789   ins_pipe(pipe_slow);
 7790 %}
 7791 
 7792 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7793   match(Set dst src);
 7794   effect(KILL cr);
 7795   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7796   ins_encode %{
 7797     __ xorq($dst$$Register, $dst$$Register);
 7798   %}
 7799   ins_pipe(ialu_reg);
 7800 %}
 7801 
 7802 instruct loadConN(rRegN dst, immN src) %{
 7803   match(Set dst src);
 7804 
 7805   ins_cost(125);
 7806   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7807   ins_encode %{
 7808     address con = (address)$src$$constant;
 7809     if (con == nullptr) {
 7810       ShouldNotReachHere();
 7811     } else {
 7812       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7813     }
 7814   %}
 7815   ins_pipe(ialu_reg_fat); // XXX
 7816 %}
 7817 
 7818 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7819   match(Set dst src);
 7820 
 7821   ins_cost(125);
 7822   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7823   ins_encode %{
 7824     address con = (address)$src$$constant;
 7825     if (con == nullptr) {
 7826       ShouldNotReachHere();
 7827     } else {
 7828       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7829     }
 7830   %}
 7831   ins_pipe(ialu_reg_fat); // XXX
 7832 %}
 7833 
 7834 instruct loadConF0(regF dst, immF0 src)
 7835 %{
 7836   match(Set dst src);
 7837   ins_cost(100);
 7838 
 7839   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7840   ins_encode %{
 7841     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7842   %}
 7843   ins_pipe(pipe_slow);
 7844 %}
 7845 
 7846 // Use the same format since predicate() can not be used here.
 7847 instruct loadConD(regD dst, immD con) %{
 7848   match(Set dst con);
 7849   ins_cost(125);
 7850   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7851   ins_encode %{
 7852     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7853   %}
 7854   ins_pipe(pipe_slow);
 7855 %}
 7856 
 7857 instruct loadConD0(regD dst, immD0 src)
 7858 %{
 7859   match(Set dst src);
 7860   ins_cost(100);
 7861 
 7862   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7863   ins_encode %{
 7864     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7865   %}
 7866   ins_pipe(pipe_slow);
 7867 %}
 7868 
 7869 instruct loadSSI(rRegI dst, stackSlotI src)
 7870 %{
 7871   match(Set dst src);
 7872 
 7873   ins_cost(125);
 7874   format %{ "movl    $dst, $src\t# int stk" %}
 7875   ins_encode %{
 7876     __ movl($dst$$Register, $src$$Address);
 7877   %}
 7878   ins_pipe(ialu_reg_mem);
 7879 %}
 7880 
 7881 instruct loadSSL(rRegL dst, stackSlotL src)
 7882 %{
 7883   match(Set dst src);
 7884 
 7885   ins_cost(125);
 7886   format %{ "movq    $dst, $src\t# long stk" %}
 7887   ins_encode %{
 7888     __ movq($dst$$Register, $src$$Address);
 7889   %}
 7890   ins_pipe(ialu_reg_mem);
 7891 %}
 7892 
 7893 instruct loadSSP(rRegP dst, stackSlotP src)
 7894 %{
 7895   match(Set dst src);
 7896 
 7897   ins_cost(125);
 7898   format %{ "movq    $dst, $src\t# ptr stk" %}
 7899   ins_encode %{
 7900     __ movq($dst$$Register, $src$$Address);
 7901   %}
 7902   ins_pipe(ialu_reg_mem);
 7903 %}
 7904 
 7905 instruct loadSSF(regF dst, stackSlotF src)
 7906 %{
 7907   match(Set dst src);
 7908 
 7909   ins_cost(125);
 7910   format %{ "movss   $dst, $src\t# float stk" %}
 7911   ins_encode %{
 7912     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 7913   %}
 7914   ins_pipe(pipe_slow); // XXX
 7915 %}
 7916 
 7917 // Use the same format since predicate() can not be used here.
 7918 instruct loadSSD(regD dst, stackSlotD src)
 7919 %{
 7920   match(Set dst src);
 7921 
 7922   ins_cost(125);
 7923   format %{ "movsd   $dst, $src\t# double stk" %}
 7924   ins_encode  %{
 7925     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 7926   %}
 7927   ins_pipe(pipe_slow); // XXX
 7928 %}
 7929 
 7930 // Prefetch instructions for allocation.
 7931 // Must be safe to execute with invalid address (cannot fault).
 7932 
 7933 instruct prefetchAlloc( memory mem ) %{
 7934   predicate(AllocatePrefetchInstr==3);
 7935   match(PrefetchAllocation mem);
 7936   ins_cost(125);
 7937 
 7938   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 7939   ins_encode %{
 7940     __ prefetchw($mem$$Address);
 7941   %}
 7942   ins_pipe(ialu_mem);
 7943 %}
 7944 
 7945 instruct prefetchAllocNTA( memory mem ) %{
 7946   predicate(AllocatePrefetchInstr==0);
 7947   match(PrefetchAllocation mem);
 7948   ins_cost(125);
 7949 
 7950   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 7951   ins_encode %{
 7952     __ prefetchnta($mem$$Address);
 7953   %}
 7954   ins_pipe(ialu_mem);
 7955 %}
 7956 
 7957 instruct prefetchAllocT0( memory mem ) %{
 7958   predicate(AllocatePrefetchInstr==1);
 7959   match(PrefetchAllocation mem);
 7960   ins_cost(125);
 7961 
 7962   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 7963   ins_encode %{
 7964     __ prefetcht0($mem$$Address);
 7965   %}
 7966   ins_pipe(ialu_mem);
 7967 %}
 7968 
 7969 instruct prefetchAllocT2( memory mem ) %{
 7970   predicate(AllocatePrefetchInstr==2);
 7971   match(PrefetchAllocation mem);
 7972   ins_cost(125);
 7973 
 7974   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 7975   ins_encode %{
 7976     __ prefetcht2($mem$$Address);
 7977   %}
 7978   ins_pipe(ialu_mem);
 7979 %}
 7980 
 7981 //----------Store Instructions-------------------------------------------------
 7982 
 7983 // Store Byte
 7984 instruct storeB(memory mem, rRegI src)
 7985 %{
 7986   match(Set mem (StoreB mem src));
 7987 
 7988   ins_cost(125); // XXX
 7989   format %{ "movb    $mem, $src\t# byte" %}
 7990   ins_encode %{
 7991     __ movb($mem$$Address, $src$$Register);
 7992   %}
 7993   ins_pipe(ialu_mem_reg);
 7994 %}
 7995 
 7996 // Store Char/Short
 7997 instruct storeC(memory mem, rRegI src)
 7998 %{
 7999   match(Set mem (StoreC mem src));
 8000 
 8001   ins_cost(125); // XXX
 8002   format %{ "movw    $mem, $src\t# char/short" %}
 8003   ins_encode %{
 8004     __ movw($mem$$Address, $src$$Register);
 8005   %}
 8006   ins_pipe(ialu_mem_reg);
 8007 %}
 8008 
 8009 // Store Integer
 8010 instruct storeI(memory mem, rRegI src)
 8011 %{
 8012   match(Set mem (StoreI mem src));
 8013 
 8014   ins_cost(125); // XXX
 8015   format %{ "movl    $mem, $src\t# int" %}
 8016   ins_encode %{
 8017     __ movl($mem$$Address, $src$$Register);
 8018   %}
 8019   ins_pipe(ialu_mem_reg);
 8020 %}
 8021 
 8022 // Store Long
 8023 instruct storeL(memory mem, rRegL src)
 8024 %{
 8025   match(Set mem (StoreL mem src));
 8026 
 8027   ins_cost(125); // XXX
 8028   format %{ "movq    $mem, $src\t# long" %}
 8029   ins_encode %{
 8030     __ movq($mem$$Address, $src$$Register);
 8031   %}
 8032   ins_pipe(ialu_mem_reg); // XXX
 8033 %}
 8034 
 8035 // Store Pointer
 8036 instruct storeP(memory mem, any_RegP src)
 8037 %{
 8038   predicate(n->as_Store()->barrier_data() == 0);
 8039   match(Set mem (StoreP mem src));
 8040 
 8041   ins_cost(125); // XXX
 8042   format %{ "movq    $mem, $src\t# ptr" %}
 8043   ins_encode %{
 8044     __ movq($mem$$Address, $src$$Register);
 8045   %}
 8046   ins_pipe(ialu_mem_reg);
 8047 %}
 8048 
 8049 instruct storeImmP0(memory mem, immP0 zero)
 8050 %{
 8051   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 8052   match(Set mem (StoreP mem zero));
 8053 
 8054   ins_cost(125); // XXX
 8055   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 8056   ins_encode %{
 8057     __ movq($mem$$Address, r12);
 8058   %}
 8059   ins_pipe(ialu_mem_reg);
 8060 %}
 8061 
 8062 // Store Null Pointer, mark word, or other simple pointer constant.
 8063 instruct storeImmP(memory mem, immP31 src)
 8064 %{
 8065   predicate(n->as_Store()->barrier_data() == 0);
 8066   match(Set mem (StoreP mem src));
 8067 
 8068   ins_cost(150); // XXX
 8069   format %{ "movq    $mem, $src\t# ptr" %}
 8070   ins_encode %{
 8071     __ movq($mem$$Address, $src$$constant);
 8072   %}
 8073   ins_pipe(ialu_mem_imm);
 8074 %}
 8075 
 8076 // Store Compressed Pointer
 8077 instruct storeN(memory mem, rRegN src)
 8078 %{
 8079   predicate(n->as_Store()->barrier_data() == 0);
 8080   match(Set mem (StoreN mem src));
 8081 
 8082   ins_cost(125); // XXX
 8083   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8084   ins_encode %{
 8085     __ movl($mem$$Address, $src$$Register);
 8086   %}
 8087   ins_pipe(ialu_mem_reg);
 8088 %}
 8089 
 8090 instruct storeNKlass(memory mem, rRegN src)
 8091 %{
 8092   match(Set mem (StoreNKlass mem src));
 8093 
 8094   ins_cost(125); // XXX
 8095   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8096   ins_encode %{
 8097     __ movl($mem$$Address, $src$$Register);
 8098   %}
 8099   ins_pipe(ialu_mem_reg);
 8100 %}
 8101 
 8102 instruct storeImmN0(memory mem, immN0 zero)
 8103 %{
 8104   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8105   match(Set mem (StoreN mem zero));
 8106 
 8107   ins_cost(125); // XXX
 8108   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8109   ins_encode %{
 8110     __ movl($mem$$Address, r12);
 8111   %}
 8112   ins_pipe(ialu_mem_reg);
 8113 %}
 8114 
 8115 instruct storeImmN(memory mem, immN src)
 8116 %{
 8117   predicate(n->as_Store()->barrier_data() == 0);
 8118   match(Set mem (StoreN mem src));
 8119 
 8120   ins_cost(150); // XXX
 8121   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8122   ins_encode %{
 8123     address con = (address)$src$$constant;
 8124     if (con == nullptr) {
 8125       __ movl($mem$$Address, 0);
 8126     } else {
 8127       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8128     }
 8129   %}
 8130   ins_pipe(ialu_mem_imm);
 8131 %}
 8132 
 8133 instruct storeImmNKlass(memory mem, immNKlass src)
 8134 %{
 8135   match(Set mem (StoreNKlass mem src));
 8136 
 8137   ins_cost(150); // XXX
 8138   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8139   ins_encode %{
 8140     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8141   %}
 8142   ins_pipe(ialu_mem_imm);
 8143 %}
 8144 
 8145 // Store Integer Immediate
 8146 instruct storeImmI0(memory mem, immI_0 zero)
 8147 %{
 8148   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8149   match(Set mem (StoreI mem zero));
 8150 
 8151   ins_cost(125); // XXX
 8152   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8153   ins_encode %{
 8154     __ movl($mem$$Address, r12);
 8155   %}
 8156   ins_pipe(ialu_mem_reg);
 8157 %}
 8158 
 8159 instruct storeImmI(memory mem, immI src)
 8160 %{
 8161   match(Set mem (StoreI mem src));
 8162 
 8163   ins_cost(150);
 8164   format %{ "movl    $mem, $src\t# int" %}
 8165   ins_encode %{
 8166     __ movl($mem$$Address, $src$$constant);
 8167   %}
 8168   ins_pipe(ialu_mem_imm);
 8169 %}
 8170 
 8171 // Store Long Immediate
 8172 instruct storeImmL0(memory mem, immL0 zero)
 8173 %{
 8174   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8175   match(Set mem (StoreL mem zero));
 8176 
 8177   ins_cost(125); // XXX
 8178   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8179   ins_encode %{
 8180     __ movq($mem$$Address, r12);
 8181   %}
 8182   ins_pipe(ialu_mem_reg);
 8183 %}
 8184 
 8185 instruct storeImmL(memory mem, immL32 src)
 8186 %{
 8187   match(Set mem (StoreL mem src));
 8188 
 8189   ins_cost(150);
 8190   format %{ "movq    $mem, $src\t# long" %}
 8191   ins_encode %{
 8192     __ movq($mem$$Address, $src$$constant);
 8193   %}
 8194   ins_pipe(ialu_mem_imm);
 8195 %}
 8196 
 8197 // Store Short/Char Immediate
 8198 instruct storeImmC0(memory mem, immI_0 zero)
 8199 %{
 8200   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8201   match(Set mem (StoreC mem zero));
 8202 
 8203   ins_cost(125); // XXX
 8204   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8205   ins_encode %{
 8206     __ movw($mem$$Address, r12);
 8207   %}
 8208   ins_pipe(ialu_mem_reg);
 8209 %}
 8210 
 8211 instruct storeImmI16(memory mem, immI16 src)
 8212 %{
 8213   predicate(UseStoreImmI16);
 8214   match(Set mem (StoreC mem src));
 8215 
 8216   ins_cost(150);
 8217   format %{ "movw    $mem, $src\t# short/char" %}
 8218   ins_encode %{
 8219     __ movw($mem$$Address, $src$$constant);
 8220   %}
 8221   ins_pipe(ialu_mem_imm);
 8222 %}
 8223 
 8224 // Store Byte Immediate
 8225 instruct storeImmB0(memory mem, immI_0 zero)
 8226 %{
 8227   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8228   match(Set mem (StoreB mem zero));
 8229 
 8230   ins_cost(125); // XXX
 8231   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8232   ins_encode %{
 8233     __ movb($mem$$Address, r12);
 8234   %}
 8235   ins_pipe(ialu_mem_reg);
 8236 %}
 8237 
 8238 instruct storeImmB(memory mem, immI8 src)
 8239 %{
 8240   match(Set mem (StoreB mem src));
 8241 
 8242   ins_cost(150); // XXX
 8243   format %{ "movb    $mem, $src\t# byte" %}
 8244   ins_encode %{
 8245     __ movb($mem$$Address, $src$$constant);
 8246   %}
 8247   ins_pipe(ialu_mem_imm);
 8248 %}
 8249 
 8250 // Store Float
 8251 instruct storeF(memory mem, regF src)
 8252 %{
 8253   match(Set mem (StoreF mem src));
 8254 
 8255   ins_cost(95); // XXX
 8256   format %{ "movss   $mem, $src\t# float" %}
 8257   ins_encode %{
 8258     __ movflt($mem$$Address, $src$$XMMRegister);
 8259   %}
 8260   ins_pipe(pipe_slow); // XXX
 8261 %}
 8262 
 8263 // Store immediate Float value (it is faster than store from XMM register)
 8264 instruct storeF0(memory mem, immF0 zero)
 8265 %{
 8266   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8267   match(Set mem (StoreF mem zero));
 8268 
 8269   ins_cost(25); // XXX
 8270   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8271   ins_encode %{
 8272     __ movl($mem$$Address, r12);
 8273   %}
 8274   ins_pipe(ialu_mem_reg);
 8275 %}
 8276 
 8277 instruct storeF_imm(memory mem, immF src)
 8278 %{
 8279   match(Set mem (StoreF mem src));
 8280 
 8281   ins_cost(50);
 8282   format %{ "movl    $mem, $src\t# float" %}
 8283   ins_encode %{
 8284     __ movl($mem$$Address, jint_cast($src$$constant));
 8285   %}
 8286   ins_pipe(ialu_mem_imm);
 8287 %}
 8288 
 8289 // Store Double
 8290 instruct storeD(memory mem, regD src)
 8291 %{
 8292   match(Set mem (StoreD mem src));
 8293 
 8294   ins_cost(95); // XXX
 8295   format %{ "movsd   $mem, $src\t# double" %}
 8296   ins_encode %{
 8297     __ movdbl($mem$$Address, $src$$XMMRegister);
 8298   %}
 8299   ins_pipe(pipe_slow); // XXX
 8300 %}
 8301 
 8302 // Store immediate double 0.0 (it is faster than store from XMM register)
 8303 instruct storeD0_imm(memory mem, immD0 src)
 8304 %{
 8305   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8306   match(Set mem (StoreD mem src));
 8307 
 8308   ins_cost(50);
 8309   format %{ "movq    $mem, $src\t# double 0." %}
 8310   ins_encode %{
 8311     __ movq($mem$$Address, $src$$constant);
 8312   %}
 8313   ins_pipe(ialu_mem_imm);
 8314 %}
 8315 
 8316 instruct storeD0(memory mem, immD0 zero)
 8317 %{
 8318   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8319   match(Set mem (StoreD mem zero));
 8320 
 8321   ins_cost(25); // XXX
 8322   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8323   ins_encode %{
 8324     __ movq($mem$$Address, r12);
 8325   %}
 8326   ins_pipe(ialu_mem_reg);
 8327 %}
 8328 
 8329 instruct storeSSI(stackSlotI dst, rRegI src)
 8330 %{
 8331   match(Set dst src);
 8332 
 8333   ins_cost(100);
 8334   format %{ "movl    $dst, $src\t# int stk" %}
 8335   ins_encode %{
 8336     __ movl($dst$$Address, $src$$Register);
 8337   %}
 8338   ins_pipe( ialu_mem_reg );
 8339 %}
 8340 
 8341 instruct storeSSL(stackSlotL dst, rRegL src)
 8342 %{
 8343   match(Set dst src);
 8344 
 8345   ins_cost(100);
 8346   format %{ "movq    $dst, $src\t# long stk" %}
 8347   ins_encode %{
 8348     __ movq($dst$$Address, $src$$Register);
 8349   %}
 8350   ins_pipe(ialu_mem_reg);
 8351 %}
 8352 
 8353 instruct storeSSP(stackSlotP dst, rRegP src)
 8354 %{
 8355   match(Set dst src);
 8356 
 8357   ins_cost(100);
 8358   format %{ "movq    $dst, $src\t# ptr stk" %}
 8359   ins_encode %{
 8360     __ movq($dst$$Address, $src$$Register);
 8361   %}
 8362   ins_pipe(ialu_mem_reg);
 8363 %}
 8364 
 8365 instruct storeSSF(stackSlotF dst, regF src)
 8366 %{
 8367   match(Set dst src);
 8368 
 8369   ins_cost(95); // XXX
 8370   format %{ "movss   $dst, $src\t# float stk" %}
 8371   ins_encode %{
 8372     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8373   %}
 8374   ins_pipe(pipe_slow); // XXX
 8375 %}
 8376 
 8377 instruct storeSSD(stackSlotD dst, regD src)
 8378 %{
 8379   match(Set dst src);
 8380 
 8381   ins_cost(95); // XXX
 8382   format %{ "movsd   $dst, $src\t# double stk" %}
 8383   ins_encode %{
 8384     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8385   %}
 8386   ins_pipe(pipe_slow); // XXX
 8387 %}
 8388 
 8389 instruct cacheWB(indirect addr)
 8390 %{
 8391   predicate(VM_Version::supports_data_cache_line_flush());
 8392   match(CacheWB addr);
 8393 
 8394   ins_cost(100);
 8395   format %{"cache wb $addr" %}
 8396   ins_encode %{
 8397     assert($addr->index_position() < 0, "should be");
 8398     assert($addr$$disp == 0, "should be");
 8399     __ cache_wb(Address($addr$$base$$Register, 0));
 8400   %}
 8401   ins_pipe(pipe_slow); // XXX
 8402 %}
 8403 
 8404 instruct cacheWBPreSync()
 8405 %{
 8406   predicate(VM_Version::supports_data_cache_line_flush());
 8407   match(CacheWBPreSync);
 8408 
 8409   ins_cost(100);
 8410   format %{"cache wb presync" %}
 8411   ins_encode %{
 8412     __ cache_wbsync(true);
 8413   %}
 8414   ins_pipe(pipe_slow); // XXX
 8415 %}
 8416 
 8417 instruct cacheWBPostSync()
 8418 %{
 8419   predicate(VM_Version::supports_data_cache_line_flush());
 8420   match(CacheWBPostSync);
 8421 
 8422   ins_cost(100);
 8423   format %{"cache wb postsync" %}
 8424   ins_encode %{
 8425     __ cache_wbsync(false);
 8426   %}
 8427   ins_pipe(pipe_slow); // XXX
 8428 %}
 8429 
 8430 //----------BSWAP Instructions-------------------------------------------------
 8431 instruct bytes_reverse_int(rRegI dst) %{
 8432   match(Set dst (ReverseBytesI dst));
 8433 
 8434   format %{ "bswapl  $dst" %}
 8435   ins_encode %{
 8436     __ bswapl($dst$$Register);
 8437   %}
 8438   ins_pipe( ialu_reg );
 8439 %}
 8440 
 8441 instruct bytes_reverse_long(rRegL dst) %{
 8442   match(Set dst (ReverseBytesL dst));
 8443 
 8444   format %{ "bswapq  $dst" %}
 8445   ins_encode %{
 8446     __ bswapq($dst$$Register);
 8447   %}
 8448   ins_pipe( ialu_reg);
 8449 %}
 8450 
 8451 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8452   match(Set dst (ReverseBytesUS dst));
 8453   effect(KILL cr);
 8454 
 8455   format %{ "bswapl  $dst\n\t"
 8456             "shrl    $dst,16\n\t" %}
 8457   ins_encode %{
 8458     __ bswapl($dst$$Register);
 8459     __ shrl($dst$$Register, 16);
 8460   %}
 8461   ins_pipe( ialu_reg );
 8462 %}
 8463 
 8464 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8465   match(Set dst (ReverseBytesS dst));
 8466   effect(KILL cr);
 8467 
 8468   format %{ "bswapl  $dst\n\t"
 8469             "sar     $dst,16\n\t" %}
 8470   ins_encode %{
 8471     __ bswapl($dst$$Register);
 8472     __ sarl($dst$$Register, 16);
 8473   %}
 8474   ins_pipe( ialu_reg );
 8475 %}
 8476 
 8477 //---------- Zeros Count Instructions ------------------------------------------
 8478 
 8479 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8480   predicate(UseCountLeadingZerosInstruction);
 8481   match(Set dst (CountLeadingZerosI src));
 8482   effect(KILL cr);
 8483 
 8484   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8485   ins_encode %{
 8486     __ lzcntl($dst$$Register, $src$$Register);
 8487   %}
 8488   ins_pipe(ialu_reg);
 8489 %}
 8490 
 8491 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8492   predicate(UseCountLeadingZerosInstruction);
 8493   match(Set dst (CountLeadingZerosI (LoadI src)));
 8494   effect(KILL cr);
 8495   ins_cost(175);
 8496   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8497   ins_encode %{
 8498     __ lzcntl($dst$$Register, $src$$Address);
 8499   %}
 8500   ins_pipe(ialu_reg_mem);
 8501 %}
 8502 
 8503 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8504   predicate(!UseCountLeadingZerosInstruction);
 8505   match(Set dst (CountLeadingZerosI src));
 8506   effect(KILL cr);
 8507 
 8508   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8509             "jnz     skip\n\t"
 8510             "movl    $dst, -1\n"
 8511       "skip:\n\t"
 8512             "negl    $dst\n\t"
 8513             "addl    $dst, 31" %}
 8514   ins_encode %{
 8515     Register Rdst = $dst$$Register;
 8516     Register Rsrc = $src$$Register;
 8517     Label skip;
 8518     __ bsrl(Rdst, Rsrc);
 8519     __ jccb(Assembler::notZero, skip);
 8520     __ movl(Rdst, -1);
 8521     __ bind(skip);
 8522     __ negl(Rdst);
 8523     __ addl(Rdst, BitsPerInt - 1);
 8524   %}
 8525   ins_pipe(ialu_reg);
 8526 %}
 8527 
 8528 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8529   predicate(UseCountLeadingZerosInstruction);
 8530   match(Set dst (CountLeadingZerosL src));
 8531   effect(KILL cr);
 8532 
 8533   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8534   ins_encode %{
 8535     __ lzcntq($dst$$Register, $src$$Register);
 8536   %}
 8537   ins_pipe(ialu_reg);
 8538 %}
 8539 
 8540 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8541   predicate(UseCountLeadingZerosInstruction);
 8542   match(Set dst (CountLeadingZerosL (LoadL src)));
 8543   effect(KILL cr);
 8544   ins_cost(175);
 8545   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8546   ins_encode %{
 8547     __ lzcntq($dst$$Register, $src$$Address);
 8548   %}
 8549   ins_pipe(ialu_reg_mem);
 8550 %}
 8551 
 8552 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8553   predicate(!UseCountLeadingZerosInstruction);
 8554   match(Set dst (CountLeadingZerosL src));
 8555   effect(KILL cr);
 8556 
 8557   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8558             "jnz     skip\n\t"
 8559             "movl    $dst, -1\n"
 8560       "skip:\n\t"
 8561             "negl    $dst\n\t"
 8562             "addl    $dst, 63" %}
 8563   ins_encode %{
 8564     Register Rdst = $dst$$Register;
 8565     Register Rsrc = $src$$Register;
 8566     Label skip;
 8567     __ bsrq(Rdst, Rsrc);
 8568     __ jccb(Assembler::notZero, skip);
 8569     __ movl(Rdst, -1);
 8570     __ bind(skip);
 8571     __ negl(Rdst);
 8572     __ addl(Rdst, BitsPerLong - 1);
 8573   %}
 8574   ins_pipe(ialu_reg);
 8575 %}
 8576 
 8577 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8578   predicate(UseCountTrailingZerosInstruction);
 8579   match(Set dst (CountTrailingZerosI src));
 8580   effect(KILL cr);
 8581 
 8582   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8583   ins_encode %{
 8584     __ tzcntl($dst$$Register, $src$$Register);
 8585   %}
 8586   ins_pipe(ialu_reg);
 8587 %}
 8588 
 8589 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8590   predicate(UseCountTrailingZerosInstruction);
 8591   match(Set dst (CountTrailingZerosI (LoadI src)));
 8592   effect(KILL cr);
 8593   ins_cost(175);
 8594   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8595   ins_encode %{
 8596     __ tzcntl($dst$$Register, $src$$Address);
 8597   %}
 8598   ins_pipe(ialu_reg_mem);
 8599 %}
 8600 
 8601 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8602   predicate(!UseCountTrailingZerosInstruction);
 8603   match(Set dst (CountTrailingZerosI src));
 8604   effect(KILL cr);
 8605 
 8606   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8607             "jnz     done\n\t"
 8608             "movl    $dst, 32\n"
 8609       "done:" %}
 8610   ins_encode %{
 8611     Register Rdst = $dst$$Register;
 8612     Label done;
 8613     __ bsfl(Rdst, $src$$Register);
 8614     __ jccb(Assembler::notZero, done);
 8615     __ movl(Rdst, BitsPerInt);
 8616     __ bind(done);
 8617   %}
 8618   ins_pipe(ialu_reg);
 8619 %}
 8620 
 8621 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8622   predicate(UseCountTrailingZerosInstruction);
 8623   match(Set dst (CountTrailingZerosL src));
 8624   effect(KILL cr);
 8625 
 8626   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8627   ins_encode %{
 8628     __ tzcntq($dst$$Register, $src$$Register);
 8629   %}
 8630   ins_pipe(ialu_reg);
 8631 %}
 8632 
 8633 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8634   predicate(UseCountTrailingZerosInstruction);
 8635   match(Set dst (CountTrailingZerosL (LoadL src)));
 8636   effect(KILL cr);
 8637   ins_cost(175);
 8638   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8639   ins_encode %{
 8640     __ tzcntq($dst$$Register, $src$$Address);
 8641   %}
 8642   ins_pipe(ialu_reg_mem);
 8643 %}
 8644 
 8645 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8646   predicate(!UseCountTrailingZerosInstruction);
 8647   match(Set dst (CountTrailingZerosL src));
 8648   effect(KILL cr);
 8649 
 8650   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8651             "jnz     done\n\t"
 8652             "movl    $dst, 64\n"
 8653       "done:" %}
 8654   ins_encode %{
 8655     Register Rdst = $dst$$Register;
 8656     Label done;
 8657     __ bsfq(Rdst, $src$$Register);
 8658     __ jccb(Assembler::notZero, done);
 8659     __ movl(Rdst, BitsPerLong);
 8660     __ bind(done);
 8661   %}
 8662   ins_pipe(ialu_reg);
 8663 %}
 8664 
 8665 //--------------- Reverse Operation Instructions ----------------
 8666 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8667   predicate(!VM_Version::supports_gfni());
 8668   match(Set dst (ReverseI src));
 8669   effect(TEMP dst, TEMP rtmp, KILL cr);
 8670   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8671   ins_encode %{
 8672     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8673   %}
 8674   ins_pipe( ialu_reg );
 8675 %}
 8676 
 8677 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8678   predicate(VM_Version::supports_gfni());
 8679   match(Set dst (ReverseI src));
 8680   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8681   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8682   ins_encode %{
 8683     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8684   %}
 8685   ins_pipe( ialu_reg );
 8686 %}
 8687 
 8688 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8689   predicate(!VM_Version::supports_gfni());
 8690   match(Set dst (ReverseL src));
 8691   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8692   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8693   ins_encode %{
 8694     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8695   %}
 8696   ins_pipe( ialu_reg );
 8697 %}
 8698 
 8699 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8700   predicate(VM_Version::supports_gfni());
 8701   match(Set dst (ReverseL src));
 8702   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8703   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8704   ins_encode %{
 8705     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8706   %}
 8707   ins_pipe( ialu_reg );
 8708 %}
 8709 
 8710 //---------- Population Count Instructions -------------------------------------
 8711 
 8712 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8713   predicate(UsePopCountInstruction);
 8714   match(Set dst (PopCountI src));
 8715   effect(KILL cr);
 8716 
 8717   format %{ "popcnt  $dst, $src" %}
 8718   ins_encode %{
 8719     __ popcntl($dst$$Register, $src$$Register);
 8720   %}
 8721   ins_pipe(ialu_reg);
 8722 %}
 8723 
 8724 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8725   predicate(UsePopCountInstruction);
 8726   match(Set dst (PopCountI (LoadI mem)));
 8727   effect(KILL cr);
 8728 
 8729   format %{ "popcnt  $dst, $mem" %}
 8730   ins_encode %{
 8731     __ popcntl($dst$$Register, $mem$$Address);
 8732   %}
 8733   ins_pipe(ialu_reg);
 8734 %}
 8735 
 8736 // Note: Long.bitCount(long) returns an int.
 8737 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8738   predicate(UsePopCountInstruction);
 8739   match(Set dst (PopCountL src));
 8740   effect(KILL cr);
 8741 
 8742   format %{ "popcnt  $dst, $src" %}
 8743   ins_encode %{
 8744     __ popcntq($dst$$Register, $src$$Register);
 8745   %}
 8746   ins_pipe(ialu_reg);
 8747 %}
 8748 
 8749 // Note: Long.bitCount(long) returns an int.
 8750 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8751   predicate(UsePopCountInstruction);
 8752   match(Set dst (PopCountL (LoadL mem)));
 8753   effect(KILL cr);
 8754 
 8755   format %{ "popcnt  $dst, $mem" %}
 8756   ins_encode %{
 8757     __ popcntq($dst$$Register, $mem$$Address);
 8758   %}
 8759   ins_pipe(ialu_reg);
 8760 %}
 8761 
 8762 
 8763 //----------MemBar Instructions-----------------------------------------------
 8764 // Memory barrier flavors
 8765 
 8766 instruct membar_acquire()
 8767 %{
 8768   match(MemBarAcquire);
 8769   match(LoadFence);
 8770   ins_cost(0);
 8771 
 8772   size(0);
 8773   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8774   ins_encode();
 8775   ins_pipe(empty);
 8776 %}
 8777 
 8778 instruct membar_acquire_lock()
 8779 %{
 8780   match(MemBarAcquireLock);
 8781   ins_cost(0);
 8782 
 8783   size(0);
 8784   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8785   ins_encode();
 8786   ins_pipe(empty);
 8787 %}
 8788 
 8789 instruct membar_release()
 8790 %{
 8791   match(MemBarRelease);
 8792   match(StoreFence);
 8793   ins_cost(0);
 8794 
 8795   size(0);
 8796   format %{ "MEMBAR-release ! (empty encoding)" %}
 8797   ins_encode();
 8798   ins_pipe(empty);
 8799 %}
 8800 
 8801 instruct membar_release_lock()
 8802 %{
 8803   match(MemBarReleaseLock);
 8804   ins_cost(0);
 8805 
 8806   size(0);
 8807   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8808   ins_encode();
 8809   ins_pipe(empty);
 8810 %}
 8811 
 8812 instruct membar_volatile(rFlagsReg cr) %{
 8813   match(MemBarVolatile);
 8814   effect(KILL cr);
 8815   ins_cost(400);
 8816 
 8817   format %{
 8818     $$template
 8819     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8820   %}
 8821   ins_encode %{
 8822     __ membar(Assembler::StoreLoad);
 8823   %}
 8824   ins_pipe(pipe_slow);
 8825 %}
 8826 
 8827 instruct unnecessary_membar_volatile()
 8828 %{
 8829   match(MemBarVolatile);
 8830   predicate(Matcher::post_store_load_barrier(n));
 8831   ins_cost(0);
 8832 
 8833   size(0);
 8834   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8835   ins_encode();
 8836   ins_pipe(empty);
 8837 %}
 8838 
 8839 instruct membar_storestore() %{
 8840   match(MemBarStoreStore);
 8841   match(StoreStoreFence);
 8842   ins_cost(0);
 8843 
 8844   size(0);
 8845   format %{ "MEMBAR-storestore (empty encoding)" %}
 8846   ins_encode( );
 8847   ins_pipe(empty);
 8848 %}
 8849 
 8850 //----------Move Instructions--------------------------------------------------
 8851 
 8852 instruct castX2P(rRegP dst, rRegL src)
 8853 %{
 8854   match(Set dst (CastX2P src));
 8855 
 8856   format %{ "movq    $dst, $src\t# long->ptr" %}
 8857   ins_encode %{
 8858     if ($dst$$reg != $src$$reg) {
 8859       __ movptr($dst$$Register, $src$$Register);
 8860     }
 8861   %}
 8862   ins_pipe(ialu_reg_reg); // XXX
 8863 %}
 8864 
 8865 instruct castP2X(rRegL dst, rRegP src)
 8866 %{
 8867   match(Set dst (CastP2X src));
 8868 
 8869   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8870   ins_encode %{
 8871     if ($dst$$reg != $src$$reg) {
 8872       __ movptr($dst$$Register, $src$$Register);
 8873     }
 8874   %}
 8875   ins_pipe(ialu_reg_reg); // XXX
 8876 %}
 8877 
 8878 // Convert oop into int for vectors alignment masking
 8879 instruct convP2I(rRegI dst, rRegP src)
 8880 %{
 8881   match(Set dst (ConvL2I (CastP2X src)));
 8882 
 8883   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8884   ins_encode %{
 8885     __ movl($dst$$Register, $src$$Register);
 8886   %}
 8887   ins_pipe(ialu_reg_reg); // XXX
 8888 %}
 8889 
 8890 // Convert compressed oop into int for vectors alignment masking
 8891 // in case of 32bit oops (heap < 4Gb).
 8892 instruct convN2I(rRegI dst, rRegN src)
 8893 %{
 8894   predicate(CompressedOops::shift() == 0);
 8895   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 8896 
 8897   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 8898   ins_encode %{
 8899     __ movl($dst$$Register, $src$$Register);
 8900   %}
 8901   ins_pipe(ialu_reg_reg); // XXX
 8902 %}
 8903 
 8904 // Convert oop pointer into compressed form
 8905 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 8906   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 8907   match(Set dst (EncodeP src));
 8908   effect(KILL cr);
 8909   format %{ "encode_heap_oop $dst,$src" %}
 8910   ins_encode %{
 8911     Register s = $src$$Register;
 8912     Register d = $dst$$Register;
 8913     if (s != d) {
 8914       __ movq(d, s);
 8915     }
 8916     __ encode_heap_oop(d);
 8917   %}
 8918   ins_pipe(ialu_reg_long);
 8919 %}
 8920 
 8921 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8922   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 8923   match(Set dst (EncodeP src));
 8924   effect(KILL cr);
 8925   format %{ "encode_heap_oop_not_null $dst,$src" %}
 8926   ins_encode %{
 8927     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 8928   %}
 8929   ins_pipe(ialu_reg_long);
 8930 %}
 8931 
 8932 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 8933   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 8934             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 8935   match(Set dst (DecodeN src));
 8936   effect(KILL cr);
 8937   format %{ "decode_heap_oop $dst,$src" %}
 8938   ins_encode %{
 8939     Register s = $src$$Register;
 8940     Register d = $dst$$Register;
 8941     if (s != d) {
 8942       __ movq(d, s);
 8943     }
 8944     __ decode_heap_oop(d);
 8945   %}
 8946   ins_pipe(ialu_reg_long);
 8947 %}
 8948 
 8949 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 8950   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 8951             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 8952   match(Set dst (DecodeN src));
 8953   effect(KILL cr);
 8954   format %{ "decode_heap_oop_not_null $dst,$src" %}
 8955   ins_encode %{
 8956     Register s = $src$$Register;
 8957     Register d = $dst$$Register;
 8958     if (s != d) {
 8959       __ decode_heap_oop_not_null(d, s);
 8960     } else {
 8961       __ decode_heap_oop_not_null(d);
 8962     }
 8963   %}
 8964   ins_pipe(ialu_reg_long);
 8965 %}
 8966 
 8967 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8968   match(Set dst (EncodePKlass src));
 8969   effect(TEMP dst, KILL cr);
 8970   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 8971   ins_encode %{
 8972     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 8973   %}
 8974   ins_pipe(ialu_reg_long);
 8975 %}
 8976 
 8977 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 8978   match(Set dst (DecodeNKlass src));
 8979   effect(TEMP dst, KILL cr);
 8980   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 8981   ins_encode %{
 8982     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 8983   %}
 8984   ins_pipe(ialu_reg_long);
 8985 %}
 8986 
 8987 //----------Conditional Move---------------------------------------------------
 8988 // Jump
 8989 // dummy instruction for generating temp registers
 8990 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 8991   match(Jump (LShiftL switch_val shift));
 8992   ins_cost(350);
 8993   predicate(false);
 8994   effect(TEMP dest);
 8995 
 8996   format %{ "leaq    $dest, [$constantaddress]\n\t"
 8997             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 8998   ins_encode %{
 8999     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9000     // to do that and the compiler is using that register as one it can allocate.
 9001     // So we build it all by hand.
 9002     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 9003     // ArrayAddress dispatch(table, index);
 9004     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 9005     __ lea($dest$$Register, $constantaddress);
 9006     __ jmp(dispatch);
 9007   %}
 9008   ins_pipe(pipe_jmp);
 9009 %}
 9010 
 9011 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 9012   match(Jump (AddL (LShiftL switch_val shift) offset));
 9013   ins_cost(350);
 9014   effect(TEMP dest);
 9015 
 9016   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9017             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 9018   ins_encode %{
 9019     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9020     // to do that and the compiler is using that register as one it can allocate.
 9021     // So we build it all by hand.
 9022     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9023     // ArrayAddress dispatch(table, index);
 9024     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9025     __ lea($dest$$Register, $constantaddress);
 9026     __ jmp(dispatch);
 9027   %}
 9028   ins_pipe(pipe_jmp);
 9029 %}
 9030 
 9031 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 9032   match(Jump switch_val);
 9033   ins_cost(350);
 9034   effect(TEMP dest);
 9035 
 9036   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9037             "jmp     [$dest + $switch_val]\n\t" %}
 9038   ins_encode %{
 9039     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9040     // to do that and the compiler is using that register as one it can allocate.
 9041     // So we build it all by hand.
 9042     // Address index(noreg, switch_reg, Address::times_1);
 9043     // ArrayAddress dispatch(table, index);
 9044     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 9045     __ lea($dest$$Register, $constantaddress);
 9046     __ jmp(dispatch);
 9047   %}
 9048   ins_pipe(pipe_jmp);
 9049 %}
 9050 
 9051 // Conditional move
 9052 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 9053 %{
 9054   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9055   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9056 
 9057   ins_cost(100); // XXX
 9058   format %{ "setbn$cop $dst\t# signed, int" %}
 9059   ins_encode %{
 9060     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9061     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9062   %}
 9063   ins_pipe(ialu_reg);
 9064 %}
 9065 
 9066 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 9067 %{
 9068   predicate(!UseAPX);
 9069   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9070 
 9071   ins_cost(200); // XXX
 9072   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9073   ins_encode %{
 9074     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9075   %}
 9076   ins_pipe(pipe_cmov_reg);
 9077 %}
 9078 
 9079 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 9080 %{
 9081   predicate(UseAPX);
 9082   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9083 
 9084   ins_cost(200);
 9085   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9086   ins_encode %{
 9087     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9088   %}
 9089   ins_pipe(pipe_cmov_reg);
 9090 %}
 9091 
 9092 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 9093 %{
 9094   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9095   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9096 
 9097   ins_cost(100); // XXX
 9098   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9099   ins_encode %{
 9100     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9101     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9102   %}
 9103   ins_pipe(ialu_reg);
 9104 %}
 9105 
 9106 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9107   predicate(!UseAPX);
 9108   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9109 
 9110   ins_cost(200); // XXX
 9111   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9112   ins_encode %{
 9113     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9114   %}
 9115   ins_pipe(pipe_cmov_reg);
 9116 %}
 9117 
 9118 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9119   predicate(UseAPX);
 9120   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9121 
 9122   ins_cost(200);
 9123   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9124   ins_encode %{
 9125     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9126   %}
 9127   ins_pipe(pipe_cmov_reg);
 9128 %}
 9129 
 9130 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9131 %{
 9132   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9133   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9134 
 9135   ins_cost(100); // XXX
 9136   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9137   ins_encode %{
 9138     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9139     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9140   %}
 9141   ins_pipe(ialu_reg);
 9142 %}
 9143 
 9144 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9145   predicate(!UseAPX);
 9146   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9147   ins_cost(200);
 9148   expand %{
 9149     cmovI_regU(cop, cr, dst, src);
 9150   %}
 9151 %}
 9152 
 9153 instruct cmovI_regUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, rRegI src2) %{
 9154   predicate(UseAPX);
 9155   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9156   ins_cost(200);
 9157   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9158   ins_encode %{
 9159     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9160   %}
 9161   ins_pipe(pipe_cmov_reg);
 9162 %}
 9163 
 9164 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9165   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9166   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9167 
 9168   ins_cost(200); // XXX
 9169   format %{ "cmovpl  $dst, $src\n\t"
 9170             "cmovnel $dst, $src" %}
 9171   ins_encode %{
 9172     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9173     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9174   %}
 9175   ins_pipe(pipe_cmov_reg);
 9176 %}
 9177 
 9178 instruct cmovI_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
 9179   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9180   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9181   effect(TEMP dst);
 9182 
 9183   ins_cost(200);
 9184   format %{ "ecmovpl  $dst, $src1, $src2\n\t"
 9185             "cmovnel  $dst, $src2" %}
 9186   ins_encode %{
 9187     __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9188     __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9189   %}
 9190   ins_pipe(pipe_cmov_reg);
 9191 %}
 9192 
 9193 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9194 // inputs of the CMove
 9195 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9196   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9197   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9198   effect(TEMP dst);
 9199 
 9200   ins_cost(200); // XXX
 9201   format %{ "cmovpl  $dst, $src\n\t"
 9202             "cmovnel $dst, $src" %}
 9203   ins_encode %{
 9204     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9205     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9206   %}
 9207   ins_pipe(pipe_cmov_reg);
 9208 %}
 9209 
 9210 // We need this special handling for only eq / neq comparison since NaN == NaN is false,
 9211 // and parity flag bit is set if any of the operand is a NaN.
 9212 instruct cmovI_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
 9213   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9214   match(Set dst (CMoveI (Binary cop cr) (Binary src2 src1)));
 9215   effect(TEMP dst);
 9216 
 9217   ins_cost(200);
 9218   format %{ "ecmovpl  $dst, $src1, $src2\n\t"
 9219             "cmovnel  $dst, $src2" %}
 9220   ins_encode %{
 9221     __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9222     __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9223   %}
 9224   ins_pipe(pipe_cmov_reg);
 9225 %}
 9226 
 9227 // Conditional move
 9228 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9229   predicate(!UseAPX);
 9230   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9231 
 9232   ins_cost(250); // XXX
 9233   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9234   ins_encode %{
 9235     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9236   %}
 9237   ins_pipe(pipe_cmov_mem);
 9238 %}
 9239 
 9240 // Conditional move
 9241 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
 9242 %{
 9243   predicate(UseAPX);
 9244   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9245 
 9246   ins_cost(250);
 9247   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9248   ins_encode %{
 9249     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9250   %}
 9251   ins_pipe(pipe_cmov_mem);
 9252 %}
 9253 
 9254 // Conditional move
 9255 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9256 %{
 9257   predicate(!UseAPX);
 9258   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9259 
 9260   ins_cost(250); // XXX
 9261   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9262   ins_encode %{
 9263     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9264   %}
 9265   ins_pipe(pipe_cmov_mem);
 9266 %}
 9267 
 9268 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9269   predicate(!UseAPX);
 9270   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9271   ins_cost(250);
 9272   expand %{
 9273     cmovI_memU(cop, cr, dst, src);
 9274   %}
 9275 %}
 9276 
 9277 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
 9278 %{
 9279   predicate(UseAPX);
 9280   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9281 
 9282   ins_cost(250);
 9283   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9284   ins_encode %{
 9285     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9286   %}
 9287   ins_pipe(pipe_cmov_mem);
 9288 %}
 9289 
 9290 instruct cmovI_rReg_rReg_memUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, memory src2)
 9291 %{
 9292   predicate(UseAPX);
 9293   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9294   ins_cost(250);
 9295   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9296   ins_encode %{
 9297     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9298   %}
 9299   ins_pipe(pipe_cmov_mem);
 9300 %}
 9301 
 9302 // Conditional move
 9303 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9304 %{
 9305   predicate(!UseAPX);
 9306   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9307 
 9308   ins_cost(200); // XXX
 9309   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9310   ins_encode %{
 9311     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9312   %}
 9313   ins_pipe(pipe_cmov_reg);
 9314 %}
 9315 
 9316 // Conditional move ndd
 9317 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9318 %{
 9319   predicate(UseAPX);
 9320   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9321 
 9322   ins_cost(200);
 9323   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9324   ins_encode %{
 9325     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9326   %}
 9327   ins_pipe(pipe_cmov_reg);
 9328 %}
 9329 
 9330 // Conditional move
 9331 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9332 %{
 9333   predicate(!UseAPX);
 9334   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9335 
 9336   ins_cost(200); // XXX
 9337   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9338   ins_encode %{
 9339     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9340   %}
 9341   ins_pipe(pipe_cmov_reg);
 9342 %}
 9343 
 9344 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9345   predicate(!UseAPX);
 9346   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9347   ins_cost(200);
 9348   expand %{
 9349     cmovN_regU(cop, cr, dst, src);
 9350   %}
 9351 %}
 9352 
 9353 // Conditional move ndd
 9354 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9355 %{
 9356   predicate(UseAPX);
 9357   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9358 
 9359   ins_cost(200);
 9360   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9361   ins_encode %{
 9362     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9363   %}
 9364   ins_pipe(pipe_cmov_reg);
 9365 %}
 9366 
 9367 instruct cmovN_regUCF_ndd(rRegN dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegN src1, rRegN src2) %{
 9368   predicate(UseAPX);
 9369   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9370   ins_cost(200);
 9371   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9372   ins_encode %{
 9373     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9374   %}
 9375   ins_pipe(pipe_cmov_reg);
 9376 %}
 9377 
 9378 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9379   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9380   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9381 
 9382   ins_cost(200); // XXX
 9383   format %{ "cmovpl  $dst, $src\n\t"
 9384             "cmovnel $dst, $src" %}
 9385   ins_encode %{
 9386     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9387     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9388   %}
 9389   ins_pipe(pipe_cmov_reg);
 9390 %}
 9391 
 9392 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9393 // inputs of the CMove
 9394 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9395   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9396   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9397 
 9398   ins_cost(200); // XXX
 9399   format %{ "cmovpl  $dst, $src\n\t"
 9400             "cmovnel $dst, $src" %}
 9401   ins_encode %{
 9402     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9403     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9404   %}
 9405   ins_pipe(pipe_cmov_reg);
 9406 %}
 9407 
 9408 // Conditional move
 9409 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9410 %{
 9411   predicate(!UseAPX);
 9412   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9413 
 9414   ins_cost(200); // XXX
 9415   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9416   ins_encode %{
 9417     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9418   %}
 9419   ins_pipe(pipe_cmov_reg);  // XXX
 9420 %}
 9421 
 9422 // Conditional move ndd
 9423 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9424 %{
 9425   predicate(UseAPX);
 9426   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9427 
 9428   ins_cost(200);
 9429   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9430   ins_encode %{
 9431     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9432   %}
 9433   ins_pipe(pipe_cmov_reg);
 9434 %}
 9435 
 9436 // Conditional move
 9437 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9438 %{
 9439   predicate(!UseAPX);
 9440   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9441 
 9442   ins_cost(200); // XXX
 9443   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9444   ins_encode %{
 9445     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9446   %}
 9447   ins_pipe(pipe_cmov_reg); // XXX
 9448 %}
 9449 
 9450 // Conditional move ndd
 9451 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9452 %{
 9453   predicate(UseAPX);
 9454   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9455 
 9456   ins_cost(200);
 9457   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9458   ins_encode %{
 9459     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9460   %}
 9461   ins_pipe(pipe_cmov_reg);
 9462 %}
 9463 
 9464 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9465   predicate(!UseAPX);
 9466   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9467   ins_cost(200);
 9468   expand %{
 9469     cmovP_regU(cop, cr, dst, src);
 9470   %}
 9471 %}
 9472 
 9473 instruct cmovP_regUCF_ndd(rRegP dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegP src1, rRegP src2) %{
 9474   predicate(UseAPX);
 9475   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9476   ins_cost(200);
 9477   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9478   ins_encode %{
 9479     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9480   %}
 9481   ins_pipe(pipe_cmov_reg);
 9482 %}
 9483 
 9484 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9485   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9486   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9487 
 9488   ins_cost(200); // XXX
 9489   format %{ "cmovpq  $dst, $src\n\t"
 9490             "cmovneq $dst, $src" %}
 9491   ins_encode %{
 9492     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9493     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9494   %}
 9495   ins_pipe(pipe_cmov_reg);
 9496 %}
 9497 
 9498 instruct cmovP_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
 9499   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9500   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9501   effect(TEMP dst);
 9502 
 9503   ins_cost(200);
 9504   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9505             "cmovneq  $dst, $src2" %}
 9506   ins_encode %{
 9507     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9508     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9509   %}
 9510   ins_pipe(pipe_cmov_reg);
 9511 %}
 9512 
 9513 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9514 // inputs of the CMove
 9515 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9516   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9517   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9518 
 9519   ins_cost(200); // XXX
 9520   format %{ "cmovpq  $dst, $src\n\t"
 9521             "cmovneq $dst, $src" %}
 9522   ins_encode %{
 9523     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9524     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9525   %}
 9526   ins_pipe(pipe_cmov_reg);
 9527 %}
 9528 
 9529 instruct cmovP_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
 9530   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9531   match(Set dst (CMoveP (Binary cop cr) (Binary src2 src1)));
 9532   effect(TEMP dst);
 9533 
 9534   ins_cost(200);
 9535   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9536             "cmovneq  $dst, $src2" %}
 9537   ins_encode %{
 9538     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9539     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9540   %}
 9541   ins_pipe(pipe_cmov_reg);
 9542 %}
 9543 
 9544 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9545 %{
 9546   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9547   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9548 
 9549   ins_cost(100); // XXX
 9550   format %{ "setbn$cop $dst\t# signed, long" %}
 9551   ins_encode %{
 9552     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9553     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9554   %}
 9555   ins_pipe(ialu_reg);
 9556 %}
 9557 
 9558 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9559 %{
 9560   predicate(!UseAPX);
 9561   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9562 
 9563   ins_cost(200); // XXX
 9564   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9565   ins_encode %{
 9566     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9567   %}
 9568   ins_pipe(pipe_cmov_reg);  // XXX
 9569 %}
 9570 
 9571 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9572 %{
 9573   predicate(UseAPX);
 9574   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9575 
 9576   ins_cost(200);
 9577   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9578   ins_encode %{
 9579     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9580   %}
 9581   ins_pipe(pipe_cmov_reg);
 9582 %}
 9583 
 9584 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9585 %{
 9586   predicate(!UseAPX);
 9587   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9588 
 9589   ins_cost(200); // XXX
 9590   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9591   ins_encode %{
 9592     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9593   %}
 9594   ins_pipe(pipe_cmov_mem);  // XXX
 9595 %}
 9596 
 9597 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
 9598 %{
 9599   predicate(UseAPX);
 9600   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9601 
 9602   ins_cost(200);
 9603   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9604   ins_encode %{
 9605     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9606   %}
 9607   ins_pipe(pipe_cmov_mem);
 9608 %}
 9609 
 9610 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9611 %{
 9612   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9613   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9614 
 9615   ins_cost(100); // XXX
 9616   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9617   ins_encode %{
 9618     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9619     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9620   %}
 9621   ins_pipe(ialu_reg);
 9622 %}
 9623 
 9624 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9625 %{
 9626   predicate(!UseAPX);
 9627   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9628 
 9629   ins_cost(200); // XXX
 9630   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9631   ins_encode %{
 9632     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9633   %}
 9634   ins_pipe(pipe_cmov_reg); // XXX
 9635 %}
 9636 
 9637 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9638 %{
 9639   predicate(UseAPX);
 9640   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9641 
 9642   ins_cost(200);
 9643   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9644   ins_encode %{
 9645     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9646   %}
 9647   ins_pipe(pipe_cmov_reg);
 9648 %}
 9649 
 9650 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9651 %{
 9652   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9653   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9654 
 9655   ins_cost(100); // XXX
 9656   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9657   ins_encode %{
 9658     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9659     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9660   %}
 9661   ins_pipe(ialu_reg);
 9662 %}
 9663 
 9664 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9665   predicate(!UseAPX);
 9666   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9667   ins_cost(200);
 9668   expand %{
 9669     cmovL_regU(cop, cr, dst, src);
 9670   %}
 9671 %}
 9672 
 9673 instruct cmovL_regUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, rRegL src2)
 9674 %{
 9675   predicate(UseAPX);
 9676   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9677   ins_cost(200);
 9678   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9679   ins_encode %{
 9680     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9681   %}
 9682   ins_pipe(pipe_cmov_reg);
 9683 %}
 9684 
 9685 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9686   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9687   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9688 
 9689   ins_cost(200); // XXX
 9690   format %{ "cmovpq  $dst, $src\n\t"
 9691             "cmovneq $dst, $src" %}
 9692   ins_encode %{
 9693     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9694     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9695   %}
 9696   ins_pipe(pipe_cmov_reg);
 9697 %}
 9698 
 9699 instruct cmovL_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
 9700   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9701   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9702   effect(TEMP dst);
 9703 
 9704   ins_cost(200);
 9705   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9706             "cmovneq  $dst, $src2" %}
 9707   ins_encode %{
 9708     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9709     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9710   %}
 9711   ins_pipe(pipe_cmov_reg);
 9712 %}
 9713 
 9714 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9715 // inputs of the CMove
 9716 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9717   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9718   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9719 
 9720   ins_cost(200); // XXX
 9721   format %{ "cmovpq  $dst, $src\n\t"
 9722             "cmovneq $dst, $src" %}
 9723   ins_encode %{
 9724     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9725     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9726   %}
 9727   ins_pipe(pipe_cmov_reg);
 9728 %}
 9729 
 9730 instruct cmovL_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
 9731   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9732   match(Set dst (CMoveL (Binary cop cr) (Binary src2 src1)));
 9733   effect(TEMP dst);
 9734 
 9735   ins_cost(200);
 9736   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9737             "cmovneq $dst, $src2" %}
 9738   ins_encode %{
 9739     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9740     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9741   %}
 9742   ins_pipe(pipe_cmov_reg);
 9743 %}
 9744 
 9745 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9746 %{
 9747   predicate(!UseAPX);
 9748   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9749 
 9750   ins_cost(200); // XXX
 9751   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9752   ins_encode %{
 9753     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9754   %}
 9755   ins_pipe(pipe_cmov_mem); // XXX
 9756 %}
 9757 
 9758 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9759   predicate(!UseAPX);
 9760   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9761   ins_cost(200);
 9762   expand %{
 9763     cmovL_memU(cop, cr, dst, src);
 9764   %}
 9765 %}
 9766 
 9767 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
 9768 %{
 9769   predicate(UseAPX);
 9770   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9771 
 9772   ins_cost(200);
 9773   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9774   ins_encode %{
 9775     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9776   %}
 9777   ins_pipe(pipe_cmov_mem);
 9778 %}
 9779 
 9780 instruct cmovL_rReg_rReg_memUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, memory src2)
 9781 %{
 9782   predicate(UseAPX);
 9783   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9784   ins_cost(200);
 9785   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9786   ins_encode %{
 9787     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9788   %}
 9789   ins_pipe(pipe_cmov_mem);
 9790 %}
 9791 
 9792 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9793 %{
 9794   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9795 
 9796   ins_cost(200); // XXX
 9797   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9798             "movss     $dst, $src\n"
 9799     "skip:" %}
 9800   ins_encode %{
 9801     Label Lskip;
 9802     // Invert sense of branch from sense of CMOV
 9803     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9804     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9805     __ bind(Lskip);
 9806   %}
 9807   ins_pipe(pipe_slow);
 9808 %}
 9809 
 9810 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9811 %{
 9812   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9813 
 9814   ins_cost(200); // XXX
 9815   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9816             "movss     $dst, $src\n"
 9817     "skip:" %}
 9818   ins_encode %{
 9819     Label Lskip;
 9820     // Invert sense of branch from sense of CMOV
 9821     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9822     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9823     __ bind(Lskip);
 9824   %}
 9825   ins_pipe(pipe_slow);
 9826 %}
 9827 
 9828 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9829   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9830   ins_cost(200);
 9831   expand %{
 9832     cmovF_regU(cop, cr, dst, src);
 9833   %}
 9834 %}
 9835 
 9836 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9837 %{
 9838   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9839 
 9840   ins_cost(200); // XXX
 9841   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9842             "movsd     $dst, $src\n"
 9843     "skip:" %}
 9844   ins_encode %{
 9845     Label Lskip;
 9846     // Invert sense of branch from sense of CMOV
 9847     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9848     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9849     __ bind(Lskip);
 9850   %}
 9851   ins_pipe(pipe_slow);
 9852 %}
 9853 
 9854 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9855 %{
 9856   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9857 
 9858   ins_cost(200); // XXX
 9859   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9860             "movsd     $dst, $src\n"
 9861     "skip:" %}
 9862   ins_encode %{
 9863     Label Lskip;
 9864     // Invert sense of branch from sense of CMOV
 9865     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9866     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9867     __ bind(Lskip);
 9868   %}
 9869   ins_pipe(pipe_slow);
 9870 %}
 9871 
 9872 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9873   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9874   ins_cost(200);
 9875   expand %{
 9876     cmovD_regU(cop, cr, dst, src);
 9877   %}
 9878 %}
 9879 
 9880 //----------Arithmetic Instructions--------------------------------------------
 9881 //----------Addition Instructions----------------------------------------------
 9882 
 9883 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9884 %{
 9885   predicate(!UseAPX);
 9886   match(Set dst (AddI dst src));
 9887   effect(KILL cr);
 9888   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9889   format %{ "addl    $dst, $src\t# int" %}
 9890   ins_encode %{
 9891     __ addl($dst$$Register, $src$$Register);
 9892   %}
 9893   ins_pipe(ialu_reg_reg);
 9894 %}
 9895 
 9896 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
 9897 %{
 9898   predicate(UseAPX);
 9899   match(Set dst (AddI src1 src2));
 9900   effect(KILL cr);
 9901   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
 9902 
 9903   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9904   ins_encode %{
 9905     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
 9906   %}
 9907   ins_pipe(ialu_reg_reg);
 9908 %}
 9909 
 9910 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9911 %{
 9912   predicate(!UseAPX);
 9913   match(Set dst (AddI dst src));
 9914   effect(KILL cr);
 9915   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9916 
 9917   format %{ "addl    $dst, $src\t# int" %}
 9918   ins_encode %{
 9919     __ addl($dst$$Register, $src$$constant);
 9920   %}
 9921   ins_pipe( ialu_reg );
 9922 %}
 9923 
 9924 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
 9925 %{
 9926   predicate(UseAPX);
 9927   match(Set dst (AddI src1 src2));
 9928   effect(KILL cr);
 9929   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
 9930 
 9931   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9932   ins_encode %{
 9933     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
 9934   %}
 9935   ins_pipe( ialu_reg );
 9936 %}
 9937 
 9938 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
 9939 %{
 9940   predicate(UseAPX);
 9941   match(Set dst (AddI (LoadI src1) src2));
 9942   effect(KILL cr);
 9943   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9944 
 9945   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9946   ins_encode %{
 9947     __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
 9948   %}
 9949   ins_pipe( ialu_reg );
 9950 %}
 9951 
 9952 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 9953 %{
 9954   predicate(!UseAPX);
 9955   match(Set dst (AddI dst (LoadI src)));
 9956   effect(KILL cr);
 9957   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9958 
 9959   ins_cost(150); // XXX
 9960   format %{ "addl    $dst, $src\t# int" %}
 9961   ins_encode %{
 9962     __ addl($dst$$Register, $src$$Address);
 9963   %}
 9964   ins_pipe(ialu_reg_mem);
 9965 %}
 9966 
 9967 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
 9968 %{
 9969   predicate(UseAPX);
 9970   match(Set dst (AddI src1 (LoadI src2)));
 9971   effect(KILL cr);
 9972   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
 9973 
 9974   ins_cost(150);
 9975   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9976   ins_encode %{
 9977     __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
 9978   %}
 9979   ins_pipe(ialu_reg_mem);
 9980 %}
 9981 
 9982 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9983 %{
 9984   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 9985   effect(KILL cr);
 9986   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9987 
 9988   ins_cost(150); // XXX
 9989   format %{ "addl    $dst, $src\t# int" %}
 9990   ins_encode %{
 9991     __ addl($dst$$Address, $src$$Register);
 9992   %}
 9993   ins_pipe(ialu_mem_reg);
 9994 %}
 9995 
 9996 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
 9997 %{
 9998   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 9999   effect(KILL cr);
10000   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10001 
10002 
10003   ins_cost(125); // XXX
10004   format %{ "addl    $dst, $src\t# int" %}
10005   ins_encode %{
10006     __ addl($dst$$Address, $src$$constant);
10007   %}
10008   ins_pipe(ialu_mem_imm);
10009 %}
10010 
10011 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10012 %{
10013   predicate(!UseAPX && UseIncDec);
10014   match(Set dst (AddI dst src));
10015   effect(KILL cr);
10016 
10017   format %{ "incl    $dst\t# int" %}
10018   ins_encode %{
10019     __ incrementl($dst$$Register);
10020   %}
10021   ins_pipe(ialu_reg);
10022 %}
10023 
10024 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10025 %{
10026   predicate(UseAPX && UseIncDec);
10027   match(Set dst (AddI src val));
10028   effect(KILL cr);
10029   flag(PD::Flag_ndd_demotable_opr1);
10030 
10031   format %{ "eincl    $dst, $src\t# int ndd" %}
10032   ins_encode %{
10033     __ eincl($dst$$Register, $src$$Register, false);
10034   %}
10035   ins_pipe(ialu_reg);
10036 %}
10037 
10038 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10039 %{
10040   predicate(UseAPX && UseIncDec);
10041   match(Set dst (AddI (LoadI src) val));
10042   effect(KILL cr);
10043 
10044   format %{ "eincl    $dst, $src\t# int ndd" %}
10045   ins_encode %{
10046     __ eincl($dst$$Register, $src$$Address, false);
10047   %}
10048   ins_pipe(ialu_reg);
10049 %}
10050 
10051 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10052 %{
10053   predicate(UseIncDec);
10054   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10055   effect(KILL cr);
10056 
10057   ins_cost(125); // XXX
10058   format %{ "incl    $dst\t# int" %}
10059   ins_encode %{
10060     __ incrementl($dst$$Address);
10061   %}
10062   ins_pipe(ialu_mem_imm);
10063 %}
10064 
10065 // XXX why does that use AddI
10066 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10067 %{
10068   predicate(!UseAPX && UseIncDec);
10069   match(Set dst (AddI dst src));
10070   effect(KILL cr);
10071 
10072   format %{ "decl    $dst\t# int" %}
10073   ins_encode %{
10074     __ decrementl($dst$$Register);
10075   %}
10076   ins_pipe(ialu_reg);
10077 %}
10078 
10079 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10080 %{
10081   predicate(UseAPX && UseIncDec);
10082   match(Set dst (AddI src val));
10083   effect(KILL cr);
10084   flag(PD::Flag_ndd_demotable_opr1);
10085 
10086   format %{ "edecl    $dst, $src\t# int ndd" %}
10087   ins_encode %{
10088     __ edecl($dst$$Register, $src$$Register, false);
10089   %}
10090   ins_pipe(ialu_reg);
10091 %}
10092 
10093 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10094 %{
10095   predicate(UseAPX && UseIncDec);
10096   match(Set dst (AddI (LoadI src) val));
10097   effect(KILL cr);
10098 
10099   format %{ "edecl    $dst, $src\t# int ndd" %}
10100   ins_encode %{
10101     __ edecl($dst$$Register, $src$$Address, false);
10102   %}
10103   ins_pipe(ialu_reg);
10104 %}
10105 
10106 // XXX why does that use AddI
10107 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10108 %{
10109   predicate(UseIncDec);
10110   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10111   effect(KILL cr);
10112 
10113   ins_cost(125); // XXX
10114   format %{ "decl    $dst\t# int" %}
10115   ins_encode %{
10116     __ decrementl($dst$$Address);
10117   %}
10118   ins_pipe(ialu_mem_imm);
10119 %}
10120 
10121 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10122 %{
10123   predicate(VM_Version::supports_fast_2op_lea());
10124   match(Set dst (AddI (LShiftI index scale) disp));
10125 
10126   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10127   ins_encode %{
10128     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10129     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10130   %}
10131   ins_pipe(ialu_reg_reg);
10132 %}
10133 
10134 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10135 %{
10136   predicate(VM_Version::supports_fast_3op_lea());
10137   match(Set dst (AddI (AddI base index) disp));
10138 
10139   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10140   ins_encode %{
10141     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10142   %}
10143   ins_pipe(ialu_reg_reg);
10144 %}
10145 
10146 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10147 %{
10148   predicate(VM_Version::supports_fast_2op_lea());
10149   match(Set dst (AddI base (LShiftI index scale)));
10150 
10151   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10152   ins_encode %{
10153     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10154     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10155   %}
10156   ins_pipe(ialu_reg_reg);
10157 %}
10158 
10159 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10160 %{
10161   predicate(VM_Version::supports_fast_3op_lea());
10162   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10163 
10164   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10165   ins_encode %{
10166     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10167     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10168   %}
10169   ins_pipe(ialu_reg_reg);
10170 %}
10171 
10172 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10173 %{
10174   predicate(!UseAPX);
10175   match(Set dst (AddL dst src));
10176   effect(KILL cr);
10177   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10178 
10179   format %{ "addq    $dst, $src\t# long" %}
10180   ins_encode %{
10181     __ addq($dst$$Register, $src$$Register);
10182   %}
10183   ins_pipe(ialu_reg_reg);
10184 %}
10185 
10186 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10187 %{
10188   predicate(UseAPX);
10189   match(Set dst (AddL src1 src2));
10190   effect(KILL cr);
10191   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10192 
10193   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10194   ins_encode %{
10195     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10196   %}
10197   ins_pipe(ialu_reg_reg);
10198 %}
10199 
10200 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10201 %{
10202   predicate(!UseAPX);
10203   match(Set dst (AddL dst src));
10204   effect(KILL cr);
10205   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10206 
10207   format %{ "addq    $dst, $src\t# long" %}
10208   ins_encode %{
10209     __ addq($dst$$Register, $src$$constant);
10210   %}
10211   ins_pipe( ialu_reg );
10212 %}
10213 
10214 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10215 %{
10216   predicate(UseAPX);
10217   match(Set dst (AddL src1 src2));
10218   effect(KILL cr);
10219   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10220 
10221   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10222   ins_encode %{
10223     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10224   %}
10225   ins_pipe( ialu_reg );
10226 %}
10227 
10228 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10229 %{
10230   predicate(UseAPX);
10231   match(Set dst (AddL (LoadL src1) src2));
10232   effect(KILL cr);
10233   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10234 
10235   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10236   ins_encode %{
10237     __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10238   %}
10239   ins_pipe( ialu_reg );
10240 %}
10241 
10242 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10243 %{
10244   predicate(!UseAPX);
10245   match(Set dst (AddL dst (LoadL src)));
10246   effect(KILL cr);
10247   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10248 
10249   ins_cost(150); // XXX
10250   format %{ "addq    $dst, $src\t# long" %}
10251   ins_encode %{
10252     __ addq($dst$$Register, $src$$Address);
10253   %}
10254   ins_pipe(ialu_reg_mem);
10255 %}
10256 
10257 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10258 %{
10259   predicate(UseAPX);
10260   match(Set dst (AddL src1 (LoadL src2)));
10261   effect(KILL cr);
10262   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10263 
10264   ins_cost(150);
10265   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10266   ins_encode %{
10267     __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10268   %}
10269   ins_pipe(ialu_reg_mem);
10270 %}
10271 
10272 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10273 %{
10274   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10275   effect(KILL cr);
10276   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10277 
10278   ins_cost(150); // XXX
10279   format %{ "addq    $dst, $src\t# long" %}
10280   ins_encode %{
10281     __ addq($dst$$Address, $src$$Register);
10282   %}
10283   ins_pipe(ialu_mem_reg);
10284 %}
10285 
10286 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10287 %{
10288   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10289   effect(KILL cr);
10290   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10291 
10292   ins_cost(125); // XXX
10293   format %{ "addq    $dst, $src\t# long" %}
10294   ins_encode %{
10295     __ addq($dst$$Address, $src$$constant);
10296   %}
10297   ins_pipe(ialu_mem_imm);
10298 %}
10299 
10300 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10301 %{
10302   predicate(!UseAPX && UseIncDec);
10303   match(Set dst (AddL dst src));
10304   effect(KILL cr);
10305 
10306   format %{ "incq    $dst\t# long" %}
10307   ins_encode %{
10308     __ incrementq($dst$$Register);
10309   %}
10310   ins_pipe(ialu_reg);
10311 %}
10312 
10313 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10314 %{
10315   predicate(UseAPX && UseIncDec);
10316   match(Set dst (AddL src val));
10317   effect(KILL cr);
10318   flag(PD::Flag_ndd_demotable_opr1);
10319 
10320   format %{ "eincq    $dst, $src\t# long ndd" %}
10321   ins_encode %{
10322     __ eincq($dst$$Register, $src$$Register, false);
10323   %}
10324   ins_pipe(ialu_reg);
10325 %}
10326 
10327 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10328 %{
10329   predicate(UseAPX && UseIncDec);
10330   match(Set dst (AddL (LoadL src) val));
10331   effect(KILL cr);
10332 
10333   format %{ "eincq    $dst, $src\t# long ndd" %}
10334   ins_encode %{
10335     __ eincq($dst$$Register, $src$$Address, false);
10336   %}
10337   ins_pipe(ialu_reg);
10338 %}
10339 
10340 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10341 %{
10342   predicate(UseIncDec);
10343   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10344   effect(KILL cr);
10345 
10346   ins_cost(125); // XXX
10347   format %{ "incq    $dst\t# long" %}
10348   ins_encode %{
10349     __ incrementq($dst$$Address);
10350   %}
10351   ins_pipe(ialu_mem_imm);
10352 %}
10353 
10354 // XXX why does that use AddL
10355 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10356 %{
10357   predicate(!UseAPX && UseIncDec);
10358   match(Set dst (AddL dst src));
10359   effect(KILL cr);
10360 
10361   format %{ "decq    $dst\t# long" %}
10362   ins_encode %{
10363     __ decrementq($dst$$Register);
10364   %}
10365   ins_pipe(ialu_reg);
10366 %}
10367 
10368 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10369 %{
10370   predicate(UseAPX && UseIncDec);
10371   match(Set dst (AddL src val));
10372   effect(KILL cr);
10373   flag(PD::Flag_ndd_demotable_opr1);
10374 
10375   format %{ "edecq    $dst, $src\t# long ndd" %}
10376   ins_encode %{
10377     __ edecq($dst$$Register, $src$$Register, false);
10378   %}
10379   ins_pipe(ialu_reg);
10380 %}
10381 
10382 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10383 %{
10384   predicate(UseAPX && UseIncDec);
10385   match(Set dst (AddL (LoadL src) val));
10386   effect(KILL cr);
10387 
10388   format %{ "edecq    $dst, $src\t# long ndd" %}
10389   ins_encode %{
10390     __ edecq($dst$$Register, $src$$Address, false);
10391   %}
10392   ins_pipe(ialu_reg);
10393 %}
10394 
10395 // XXX why does that use AddL
10396 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10397 %{
10398   predicate(UseIncDec);
10399   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10400   effect(KILL cr);
10401 
10402   ins_cost(125); // XXX
10403   format %{ "decq    $dst\t# long" %}
10404   ins_encode %{
10405     __ decrementq($dst$$Address);
10406   %}
10407   ins_pipe(ialu_mem_imm);
10408 %}
10409 
10410 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10411 %{
10412   predicate(VM_Version::supports_fast_2op_lea());
10413   match(Set dst (AddL (LShiftL index scale) disp));
10414 
10415   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10416   ins_encode %{
10417     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10418     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10419   %}
10420   ins_pipe(ialu_reg_reg);
10421 %}
10422 
10423 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10424 %{
10425   predicate(VM_Version::supports_fast_3op_lea());
10426   match(Set dst (AddL (AddL base index) disp));
10427 
10428   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10429   ins_encode %{
10430     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10431   %}
10432   ins_pipe(ialu_reg_reg);
10433 %}
10434 
10435 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10436 %{
10437   predicate(VM_Version::supports_fast_2op_lea());
10438   match(Set dst (AddL base (LShiftL index scale)));
10439 
10440   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10441   ins_encode %{
10442     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10443     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10444   %}
10445   ins_pipe(ialu_reg_reg);
10446 %}
10447 
10448 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10449 %{
10450   predicate(VM_Version::supports_fast_3op_lea());
10451   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10452 
10453   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10454   ins_encode %{
10455     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10456     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10457   %}
10458   ins_pipe(ialu_reg_reg);
10459 %}
10460 
10461 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10462 %{
10463   match(Set dst (AddP dst src));
10464   effect(KILL cr);
10465   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10466 
10467   format %{ "addq    $dst, $src\t# ptr" %}
10468   ins_encode %{
10469     __ addq($dst$$Register, $src$$Register);
10470   %}
10471   ins_pipe(ialu_reg_reg);
10472 %}
10473 
10474 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10475 %{
10476   match(Set dst (AddP dst src));
10477   effect(KILL cr);
10478   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10479 
10480   format %{ "addq    $dst, $src\t# ptr" %}
10481   ins_encode %{
10482     __ addq($dst$$Register, $src$$constant);
10483   %}
10484   ins_pipe( ialu_reg );
10485 %}
10486 
10487 // XXX addP mem ops ????
10488 
10489 instruct checkCastPP(rRegP dst)
10490 %{
10491   match(Set dst (CheckCastPP dst));
10492 
10493   size(0);
10494   format %{ "# checkcastPP of $dst" %}
10495   ins_encode(/* empty encoding */);
10496   ins_pipe(empty);
10497 %}
10498 
10499 instruct castPP(rRegP dst)
10500 %{
10501   match(Set dst (CastPP dst));
10502 
10503   size(0);
10504   format %{ "# castPP of $dst" %}
10505   ins_encode(/* empty encoding */);
10506   ins_pipe(empty);
10507 %}
10508 
10509 instruct castII(rRegI dst)
10510 %{
10511   predicate(VerifyConstraintCasts == 0);
10512   match(Set dst (CastII dst));
10513 
10514   size(0);
10515   format %{ "# castII of $dst" %}
10516   ins_encode(/* empty encoding */);
10517   ins_cost(0);
10518   ins_pipe(empty);
10519 %}
10520 
10521 instruct castII_checked(rRegI dst, rFlagsReg cr)
10522 %{
10523   predicate(VerifyConstraintCasts > 0);
10524   match(Set dst (CastII dst));
10525 
10526   effect(KILL cr);
10527   format %{ "# cast_checked_II $dst" %}
10528   ins_encode %{
10529     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10530   %}
10531   ins_pipe(pipe_slow);
10532 %}
10533 
10534 instruct castLL(rRegL dst)
10535 %{
10536   predicate(VerifyConstraintCasts == 0);
10537   match(Set dst (CastLL dst));
10538 
10539   size(0);
10540   format %{ "# castLL of $dst" %}
10541   ins_encode(/* empty encoding */);
10542   ins_cost(0);
10543   ins_pipe(empty);
10544 %}
10545 
10546 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10547 %{
10548   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10549   match(Set dst (CastLL dst));
10550 
10551   effect(KILL cr);
10552   format %{ "# cast_checked_LL $dst" %}
10553   ins_encode %{
10554     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10555   %}
10556   ins_pipe(pipe_slow);
10557 %}
10558 
10559 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10560 %{
10561   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10562   match(Set dst (CastLL dst));
10563 
10564   effect(KILL cr, TEMP tmp);
10565   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10566   ins_encode %{
10567     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10568   %}
10569   ins_pipe(pipe_slow);
10570 %}
10571 
10572 instruct castFF(regF dst)
10573 %{
10574   match(Set dst (CastFF dst));
10575 
10576   size(0);
10577   format %{ "# castFF of $dst" %}
10578   ins_encode(/* empty encoding */);
10579   ins_cost(0);
10580   ins_pipe(empty);
10581 %}
10582 
10583 instruct castHH(regF dst)
10584 %{
10585   match(Set dst (CastHH dst));
10586 
10587   size(0);
10588   format %{ "# castHH of $dst" %}
10589   ins_encode(/* empty encoding */);
10590   ins_cost(0);
10591   ins_pipe(empty);
10592 %}
10593 
10594 instruct castDD(regD dst)
10595 %{
10596   match(Set dst (CastDD dst));
10597 
10598   size(0);
10599   format %{ "# castDD of $dst" %}
10600   ins_encode(/* empty encoding */);
10601   ins_cost(0);
10602   ins_pipe(empty);
10603 %}
10604 
10605 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10606 instruct compareAndSwapP(rRegI res,
10607                          memory mem_ptr,
10608                          rax_RegP oldval, rRegP newval,
10609                          rFlagsReg cr)
10610 %{
10611   predicate(n->as_LoadStore()->barrier_data() == 0);
10612   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10613   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10614   effect(KILL cr, KILL oldval);
10615 
10616   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10617             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10618             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10619   ins_encode %{
10620     __ lock();
10621     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10622     __ setcc(Assembler::equal, $res$$Register);
10623   %}
10624   ins_pipe( pipe_cmpxchg );
10625 %}
10626 
10627 instruct compareAndSwapL(rRegI res,
10628                          memory mem_ptr,
10629                          rax_RegL oldval, rRegL newval,
10630                          rFlagsReg cr)
10631 %{
10632   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10633   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10634   effect(KILL cr, KILL oldval);
10635 
10636   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10637             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10638             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10639   ins_encode %{
10640     __ lock();
10641     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10642     __ setcc(Assembler::equal, $res$$Register);
10643   %}
10644   ins_pipe( pipe_cmpxchg );
10645 %}
10646 
10647 instruct compareAndSwapI(rRegI res,
10648                          memory mem_ptr,
10649                          rax_RegI oldval, rRegI newval,
10650                          rFlagsReg cr)
10651 %{
10652   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10653   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10654   effect(KILL cr, KILL oldval);
10655 
10656   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10657             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10658             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10659   ins_encode %{
10660     __ lock();
10661     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10662     __ setcc(Assembler::equal, $res$$Register);
10663   %}
10664   ins_pipe( pipe_cmpxchg );
10665 %}
10666 
10667 instruct compareAndSwapB(rRegI res,
10668                          memory mem_ptr,
10669                          rax_RegI oldval, rRegI newval,
10670                          rFlagsReg cr)
10671 %{
10672   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10673   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10674   effect(KILL cr, KILL oldval);
10675 
10676   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10677             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10678             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10679   ins_encode %{
10680     __ lock();
10681     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10682     __ setcc(Assembler::equal, $res$$Register);
10683   %}
10684   ins_pipe( pipe_cmpxchg );
10685 %}
10686 
10687 instruct compareAndSwapS(rRegI res,
10688                          memory mem_ptr,
10689                          rax_RegI oldval, rRegI newval,
10690                          rFlagsReg cr)
10691 %{
10692   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10693   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10694   effect(KILL cr, KILL oldval);
10695 
10696   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10697             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10698             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10699   ins_encode %{
10700     __ lock();
10701     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10702     __ setcc(Assembler::equal, $res$$Register);
10703   %}
10704   ins_pipe( pipe_cmpxchg );
10705 %}
10706 
10707 instruct compareAndSwapN(rRegI res,
10708                           memory mem_ptr,
10709                           rax_RegN oldval, rRegN newval,
10710                           rFlagsReg cr) %{
10711   predicate(n->as_LoadStore()->barrier_data() == 0);
10712   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10713   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10714   effect(KILL cr, KILL oldval);
10715 
10716   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10717             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10718             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10719   ins_encode %{
10720     __ lock();
10721     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10722     __ setcc(Assembler::equal, $res$$Register);
10723   %}
10724   ins_pipe( pipe_cmpxchg );
10725 %}
10726 
10727 instruct compareAndExchangeB(
10728                          memory mem_ptr,
10729                          rax_RegI oldval, rRegI newval,
10730                          rFlagsReg cr)
10731 %{
10732   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10733   effect(KILL cr);
10734 
10735   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10736             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10737   ins_encode %{
10738     __ lock();
10739     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10740   %}
10741   ins_pipe( pipe_cmpxchg );
10742 %}
10743 
10744 instruct compareAndExchangeS(
10745                          memory mem_ptr,
10746                          rax_RegI oldval, rRegI newval,
10747                          rFlagsReg cr)
10748 %{
10749   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10750   effect(KILL cr);
10751 
10752   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10753             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10754   ins_encode %{
10755     __ lock();
10756     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10757   %}
10758   ins_pipe( pipe_cmpxchg );
10759 %}
10760 
10761 instruct compareAndExchangeI(
10762                          memory mem_ptr,
10763                          rax_RegI oldval, rRegI newval,
10764                          rFlagsReg cr)
10765 %{
10766   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10767   effect(KILL cr);
10768 
10769   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10770             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10771   ins_encode %{
10772     __ lock();
10773     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10774   %}
10775   ins_pipe( pipe_cmpxchg );
10776 %}
10777 
10778 instruct compareAndExchangeL(
10779                          memory mem_ptr,
10780                          rax_RegL oldval, rRegL newval,
10781                          rFlagsReg cr)
10782 %{
10783   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10784   effect(KILL cr);
10785 
10786   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10787             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10788   ins_encode %{
10789     __ lock();
10790     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10791   %}
10792   ins_pipe( pipe_cmpxchg );
10793 %}
10794 
10795 instruct compareAndExchangeN(
10796                           memory mem_ptr,
10797                           rax_RegN oldval, rRegN newval,
10798                           rFlagsReg cr) %{
10799   predicate(n->as_LoadStore()->barrier_data() == 0);
10800   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10801   effect(KILL cr);
10802 
10803   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10804             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10805   ins_encode %{
10806     __ lock();
10807     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10808   %}
10809   ins_pipe( pipe_cmpxchg );
10810 %}
10811 
10812 instruct compareAndExchangeP(
10813                          memory mem_ptr,
10814                          rax_RegP oldval, rRegP newval,
10815                          rFlagsReg cr)
10816 %{
10817   predicate(n->as_LoadStore()->barrier_data() == 0);
10818   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10819   effect(KILL cr);
10820 
10821   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10822             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10823   ins_encode %{
10824     __ lock();
10825     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10826   %}
10827   ins_pipe( pipe_cmpxchg );
10828 %}
10829 
10830 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10831   predicate(n->as_LoadStore()->result_not_used());
10832   match(Set dummy (GetAndAddB mem add));
10833   effect(KILL cr);
10834   format %{ "addb_lock   $mem, $add" %}
10835   ins_encode %{
10836     __ lock();
10837     __ addb($mem$$Address, $add$$Register);
10838   %}
10839   ins_pipe(pipe_cmpxchg);
10840 %}
10841 
10842 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10843   predicate(n->as_LoadStore()->result_not_used());
10844   match(Set dummy (GetAndAddB mem add));
10845   effect(KILL cr);
10846   format %{ "addb_lock   $mem, $add" %}
10847   ins_encode %{
10848     __ lock();
10849     __ addb($mem$$Address, $add$$constant);
10850   %}
10851   ins_pipe(pipe_cmpxchg);
10852 %}
10853 
10854 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10855   predicate(!n->as_LoadStore()->result_not_used());
10856   match(Set newval (GetAndAddB mem newval));
10857   effect(KILL cr);
10858   format %{ "xaddb_lock  $mem, $newval" %}
10859   ins_encode %{
10860     __ lock();
10861     __ xaddb($mem$$Address, $newval$$Register);
10862   %}
10863   ins_pipe(pipe_cmpxchg);
10864 %}
10865 
10866 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10867   predicate(n->as_LoadStore()->result_not_used());
10868   match(Set dummy (GetAndAddS mem add));
10869   effect(KILL cr);
10870   format %{ "addw_lock   $mem, $add" %}
10871   ins_encode %{
10872     __ lock();
10873     __ addw($mem$$Address, $add$$Register);
10874   %}
10875   ins_pipe(pipe_cmpxchg);
10876 %}
10877 
10878 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10879   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10880   match(Set dummy (GetAndAddS mem add));
10881   effect(KILL cr);
10882   format %{ "addw_lock   $mem, $add" %}
10883   ins_encode %{
10884     __ lock();
10885     __ addw($mem$$Address, $add$$constant);
10886   %}
10887   ins_pipe(pipe_cmpxchg);
10888 %}
10889 
10890 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10891   predicate(!n->as_LoadStore()->result_not_used());
10892   match(Set newval (GetAndAddS mem newval));
10893   effect(KILL cr);
10894   format %{ "xaddw_lock  $mem, $newval" %}
10895   ins_encode %{
10896     __ lock();
10897     __ xaddw($mem$$Address, $newval$$Register);
10898   %}
10899   ins_pipe(pipe_cmpxchg);
10900 %}
10901 
10902 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10903   predicate(n->as_LoadStore()->result_not_used());
10904   match(Set dummy (GetAndAddI mem add));
10905   effect(KILL cr);
10906   format %{ "addl_lock   $mem, $add" %}
10907   ins_encode %{
10908     __ lock();
10909     __ addl($mem$$Address, $add$$Register);
10910   %}
10911   ins_pipe(pipe_cmpxchg);
10912 %}
10913 
10914 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10915   predicate(n->as_LoadStore()->result_not_used());
10916   match(Set dummy (GetAndAddI mem add));
10917   effect(KILL cr);
10918   format %{ "addl_lock   $mem, $add" %}
10919   ins_encode %{
10920     __ lock();
10921     __ addl($mem$$Address, $add$$constant);
10922   %}
10923   ins_pipe(pipe_cmpxchg);
10924 %}
10925 
10926 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10927   predicate(!n->as_LoadStore()->result_not_used());
10928   match(Set newval (GetAndAddI mem newval));
10929   effect(KILL cr);
10930   format %{ "xaddl_lock  $mem, $newval" %}
10931   ins_encode %{
10932     __ lock();
10933     __ xaddl($mem$$Address, $newval$$Register);
10934   %}
10935   ins_pipe(pipe_cmpxchg);
10936 %}
10937 
10938 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10939   predicate(n->as_LoadStore()->result_not_used());
10940   match(Set dummy (GetAndAddL mem add));
10941   effect(KILL cr);
10942   format %{ "addq_lock   $mem, $add" %}
10943   ins_encode %{
10944     __ lock();
10945     __ addq($mem$$Address, $add$$Register);
10946   %}
10947   ins_pipe(pipe_cmpxchg);
10948 %}
10949 
10950 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10951   predicate(n->as_LoadStore()->result_not_used());
10952   match(Set dummy (GetAndAddL mem add));
10953   effect(KILL cr);
10954   format %{ "addq_lock   $mem, $add" %}
10955   ins_encode %{
10956     __ lock();
10957     __ addq($mem$$Address, $add$$constant);
10958   %}
10959   ins_pipe(pipe_cmpxchg);
10960 %}
10961 
10962 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10963   predicate(!n->as_LoadStore()->result_not_used());
10964   match(Set newval (GetAndAddL mem newval));
10965   effect(KILL cr);
10966   format %{ "xaddq_lock  $mem, $newval" %}
10967   ins_encode %{
10968     __ lock();
10969     __ xaddq($mem$$Address, $newval$$Register);
10970   %}
10971   ins_pipe(pipe_cmpxchg);
10972 %}
10973 
10974 instruct xchgB( memory mem, rRegI newval) %{
10975   match(Set newval (GetAndSetB mem newval));
10976   format %{ "XCHGB  $newval,[$mem]" %}
10977   ins_encode %{
10978     __ xchgb($newval$$Register, $mem$$Address);
10979   %}
10980   ins_pipe( pipe_cmpxchg );
10981 %}
10982 
10983 instruct xchgS( memory mem, rRegI newval) %{
10984   match(Set newval (GetAndSetS mem newval));
10985   format %{ "XCHGW  $newval,[$mem]" %}
10986   ins_encode %{
10987     __ xchgw($newval$$Register, $mem$$Address);
10988   %}
10989   ins_pipe( pipe_cmpxchg );
10990 %}
10991 
10992 instruct xchgI( memory mem, rRegI newval) %{
10993   match(Set newval (GetAndSetI mem newval));
10994   format %{ "XCHGL  $newval,[$mem]" %}
10995   ins_encode %{
10996     __ xchgl($newval$$Register, $mem$$Address);
10997   %}
10998   ins_pipe( pipe_cmpxchg );
10999 %}
11000 
11001 instruct xchgL( memory mem, rRegL newval) %{
11002   match(Set newval (GetAndSetL mem newval));
11003   format %{ "XCHGL  $newval,[$mem]" %}
11004   ins_encode %{
11005     __ xchgq($newval$$Register, $mem$$Address);
11006   %}
11007   ins_pipe( pipe_cmpxchg );
11008 %}
11009 
11010 instruct xchgP( memory mem, rRegP newval) %{
11011   match(Set newval (GetAndSetP mem newval));
11012   predicate(n->as_LoadStore()->barrier_data() == 0);
11013   format %{ "XCHGQ  $newval,[$mem]" %}
11014   ins_encode %{
11015     __ xchgq($newval$$Register, $mem$$Address);
11016   %}
11017   ins_pipe( pipe_cmpxchg );
11018 %}
11019 
11020 instruct xchgN( memory mem, rRegN newval) %{
11021   predicate(n->as_LoadStore()->barrier_data() == 0);
11022   match(Set newval (GetAndSetN mem newval));
11023   format %{ "XCHGL  $newval,$mem]" %}
11024   ins_encode %{
11025     __ xchgl($newval$$Register, $mem$$Address);
11026   %}
11027   ins_pipe( pipe_cmpxchg );
11028 %}
11029 
11030 //----------Abs Instructions-------------------------------------------
11031 
11032 // Integer Absolute Instructions
11033 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11034 %{
11035   match(Set dst (AbsI src));
11036   effect(TEMP dst, KILL cr);
11037   format %{ "xorl    $dst, $dst\t# abs int\n\t"
11038             "subl    $dst, $src\n\t"
11039             "cmovll  $dst, $src" %}
11040   ins_encode %{
11041     __ xorl($dst$$Register, $dst$$Register);
11042     __ subl($dst$$Register, $src$$Register);
11043     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11044   %}
11045 
11046   ins_pipe(ialu_reg_reg);
11047 %}
11048 
11049 // Long Absolute Instructions
11050 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11051 %{
11052   match(Set dst (AbsL src));
11053   effect(TEMP dst, KILL cr);
11054   format %{ "xorl    $dst, $dst\t# abs long\n\t"
11055             "subq    $dst, $src\n\t"
11056             "cmovlq  $dst, $src" %}
11057   ins_encode %{
11058     __ xorl($dst$$Register, $dst$$Register);
11059     __ subq($dst$$Register, $src$$Register);
11060     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11061   %}
11062 
11063   ins_pipe(ialu_reg_reg);
11064 %}
11065 
11066 //----------Subtraction Instructions-------------------------------------------
11067 
11068 // Integer Subtraction Instructions
11069 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11070 %{
11071   predicate(!UseAPX);
11072   match(Set dst (SubI dst src));
11073   effect(KILL cr);
11074   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11075 
11076   format %{ "subl    $dst, $src\t# int" %}
11077   ins_encode %{
11078     __ subl($dst$$Register, $src$$Register);
11079   %}
11080   ins_pipe(ialu_reg_reg);
11081 %}
11082 
11083 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11084 %{
11085   predicate(UseAPX);
11086   match(Set dst (SubI src1 src2));
11087   effect(KILL cr);
11088   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11089 
11090   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11091   ins_encode %{
11092     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11093   %}
11094   ins_pipe(ialu_reg_reg);
11095 %}
11096 
11097 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11098 %{
11099   predicate(UseAPX);
11100   match(Set dst (SubI src1 src2));
11101   effect(KILL cr);
11102   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11103 
11104   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11105   ins_encode %{
11106     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11107   %}
11108   ins_pipe(ialu_reg_reg);
11109 %}
11110 
11111 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11112 %{
11113   predicate(UseAPX);
11114   match(Set dst (SubI (LoadI src1) src2));
11115   effect(KILL cr);
11116   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11117 
11118   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11119   ins_encode %{
11120     __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11121   %}
11122   ins_pipe(ialu_reg_reg);
11123 %}
11124 
11125 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11126 %{
11127   predicate(!UseAPX);
11128   match(Set dst (SubI dst (LoadI src)));
11129   effect(KILL cr);
11130   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11131 
11132   ins_cost(150);
11133   format %{ "subl    $dst, $src\t# int" %}
11134   ins_encode %{
11135     __ subl($dst$$Register, $src$$Address);
11136   %}
11137   ins_pipe(ialu_reg_mem);
11138 %}
11139 
11140 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11141 %{
11142   predicate(UseAPX);
11143   match(Set dst (SubI src1 (LoadI src2)));
11144   effect(KILL cr);
11145   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11146 
11147   ins_cost(150);
11148   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11149   ins_encode %{
11150     __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11151   %}
11152   ins_pipe(ialu_reg_mem);
11153 %}
11154 
11155 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11156 %{
11157   predicate(UseAPX);
11158   match(Set dst (SubI (LoadI src1) src2));
11159   effect(KILL cr);
11160   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11161 
11162   ins_cost(150);
11163   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11164   ins_encode %{
11165     __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11166   %}
11167   ins_pipe(ialu_reg_mem);
11168 %}
11169 
11170 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11171 %{
11172   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11173   effect(KILL cr);
11174   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11175 
11176   ins_cost(150);
11177   format %{ "subl    $dst, $src\t# int" %}
11178   ins_encode %{
11179     __ subl($dst$$Address, $src$$Register);
11180   %}
11181   ins_pipe(ialu_mem_reg);
11182 %}
11183 
11184 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11185 %{
11186   predicate(!UseAPX);
11187   match(Set dst (SubL dst src));
11188   effect(KILL cr);
11189   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11190 
11191   format %{ "subq    $dst, $src\t# long" %}
11192   ins_encode %{
11193     __ subq($dst$$Register, $src$$Register);
11194   %}
11195   ins_pipe(ialu_reg_reg);
11196 %}
11197 
11198 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11199 %{
11200   predicate(UseAPX);
11201   match(Set dst (SubL src1 src2));
11202   effect(KILL cr);
11203   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11204 
11205   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11206   ins_encode %{
11207     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11208   %}
11209   ins_pipe(ialu_reg_reg);
11210 %}
11211 
11212 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11213 %{
11214   predicate(UseAPX);
11215   match(Set dst (SubL src1 src2));
11216   effect(KILL cr);
11217   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11218 
11219   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11220   ins_encode %{
11221     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11222   %}
11223   ins_pipe(ialu_reg_reg);
11224 %}
11225 
11226 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11227 %{
11228   predicate(UseAPX);
11229   match(Set dst (SubL (LoadL src1) src2));
11230   effect(KILL cr);
11231   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11232 
11233   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11234   ins_encode %{
11235     __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11236   %}
11237   ins_pipe(ialu_reg_reg);
11238 %}
11239 
11240 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11241 %{
11242   predicate(!UseAPX);
11243   match(Set dst (SubL dst (LoadL src)));
11244   effect(KILL cr);
11245   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11246 
11247   ins_cost(150);
11248   format %{ "subq    $dst, $src\t# long" %}
11249   ins_encode %{
11250     __ subq($dst$$Register, $src$$Address);
11251   %}
11252   ins_pipe(ialu_reg_mem);
11253 %}
11254 
11255 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11256 %{
11257   predicate(UseAPX);
11258   match(Set dst (SubL src1 (LoadL src2)));
11259   effect(KILL cr);
11260   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11261 
11262   ins_cost(150);
11263   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11264   ins_encode %{
11265     __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11266   %}
11267   ins_pipe(ialu_reg_mem);
11268 %}
11269 
11270 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11271 %{
11272   predicate(UseAPX);
11273   match(Set dst (SubL (LoadL src1) src2));
11274   effect(KILL cr);
11275   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11276 
11277   ins_cost(150);
11278   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11279   ins_encode %{
11280     __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11281   %}
11282   ins_pipe(ialu_reg_mem);
11283 %}
11284 
11285 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11286 %{
11287   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11288   effect(KILL cr);
11289   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11290 
11291   ins_cost(150);
11292   format %{ "subq    $dst, $src\t# long" %}
11293   ins_encode %{
11294     __ subq($dst$$Address, $src$$Register);
11295   %}
11296   ins_pipe(ialu_mem_reg);
11297 %}
11298 
11299 // Subtract from a pointer
11300 // XXX hmpf???
11301 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11302 %{
11303   match(Set dst (AddP dst (SubI zero src)));
11304   effect(KILL cr);
11305 
11306   format %{ "subq    $dst, $src\t# ptr - int" %}
11307   ins_encode %{
11308     __ subq($dst$$Register, $src$$Register);
11309   %}
11310   ins_pipe(ialu_reg_reg);
11311 %}
11312 
11313 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11314 %{
11315   predicate(!UseAPX);
11316   match(Set dst (SubI zero dst));
11317   effect(KILL cr);
11318   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11319 
11320   format %{ "negl    $dst\t# int" %}
11321   ins_encode %{
11322     __ negl($dst$$Register);
11323   %}
11324   ins_pipe(ialu_reg);
11325 %}
11326 
11327 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11328 %{
11329   predicate(UseAPX);
11330   match(Set dst (SubI zero src));
11331   effect(KILL cr);
11332   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11333 
11334   format %{ "enegl    $dst, $src\t# int ndd" %}
11335   ins_encode %{
11336     __ enegl($dst$$Register, $src$$Register, false);
11337   %}
11338   ins_pipe(ialu_reg);
11339 %}
11340 
11341 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11342 %{
11343   predicate(!UseAPX);
11344   match(Set dst (NegI dst));
11345   effect(KILL cr);
11346   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11347 
11348   format %{ "negl    $dst\t# int" %}
11349   ins_encode %{
11350     __ negl($dst$$Register);
11351   %}
11352   ins_pipe(ialu_reg);
11353 %}
11354 
11355 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11356 %{
11357   predicate(UseAPX);
11358   match(Set dst (NegI src));
11359   effect(KILL cr);
11360   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11361 
11362   format %{ "enegl    $dst, $src\t# int ndd" %}
11363   ins_encode %{
11364     __ enegl($dst$$Register, $src$$Register, false);
11365   %}
11366   ins_pipe(ialu_reg);
11367 %}
11368 
11369 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11370 %{
11371   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11372   effect(KILL cr);
11373   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11374 
11375   format %{ "negl    $dst\t# int" %}
11376   ins_encode %{
11377     __ negl($dst$$Address);
11378   %}
11379   ins_pipe(ialu_reg);
11380 %}
11381 
11382 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11383 %{
11384   predicate(!UseAPX);
11385   match(Set dst (SubL zero dst));
11386   effect(KILL cr);
11387   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11388 
11389   format %{ "negq    $dst\t# long" %}
11390   ins_encode %{
11391     __ negq($dst$$Register);
11392   %}
11393   ins_pipe(ialu_reg);
11394 %}
11395 
11396 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11397 %{
11398   predicate(UseAPX);
11399   match(Set dst (SubL zero src));
11400   effect(KILL cr);
11401   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11402 
11403   format %{ "enegq    $dst, $src\t# long ndd" %}
11404   ins_encode %{
11405     __ enegq($dst$$Register, $src$$Register, false);
11406   %}
11407   ins_pipe(ialu_reg);
11408 %}
11409 
11410 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11411 %{
11412   predicate(!UseAPX);
11413   match(Set dst (NegL dst));
11414   effect(KILL cr);
11415   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11416 
11417   format %{ "negq    $dst\t# int" %}
11418   ins_encode %{
11419     __ negq($dst$$Register);
11420   %}
11421   ins_pipe(ialu_reg);
11422 %}
11423 
11424 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11425 %{
11426   predicate(UseAPX);
11427   match(Set dst (NegL src));
11428   effect(KILL cr);
11429   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11430 
11431   format %{ "enegq    $dst, $src\t# long ndd" %}
11432   ins_encode %{
11433     __ enegq($dst$$Register, $src$$Register, false);
11434   %}
11435   ins_pipe(ialu_reg);
11436 %}
11437 
11438 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11439 %{
11440   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11441   effect(KILL cr);
11442   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11443 
11444   format %{ "negq    $dst\t# long" %}
11445   ins_encode %{
11446     __ negq($dst$$Address);
11447   %}
11448   ins_pipe(ialu_reg);
11449 %}
11450 
11451 //----------Multiplication/Division Instructions-------------------------------
11452 // Integer Multiplication Instructions
11453 // Multiply Register
11454 
11455 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11456 %{
11457   predicate(!UseAPX);
11458   match(Set dst (MulI dst src));
11459   effect(KILL cr);
11460 
11461   ins_cost(300);
11462   format %{ "imull   $dst, $src\t# int" %}
11463   ins_encode %{
11464     __ imull($dst$$Register, $src$$Register);
11465   %}
11466   ins_pipe(ialu_reg_reg_alu0);
11467 %}
11468 
11469 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11470 %{
11471   predicate(UseAPX);
11472   match(Set dst (MulI src1 src2));
11473   effect(KILL cr);
11474   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11475 
11476   ins_cost(300);
11477   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11478   ins_encode %{
11479     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11480   %}
11481   ins_pipe(ialu_reg_reg_alu0);
11482 %}
11483 
11484 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11485 %{
11486   match(Set dst (MulI src imm));
11487   effect(KILL cr);
11488 
11489   ins_cost(300);
11490   format %{ "imull   $dst, $src, $imm\t# int" %}
11491   ins_encode %{
11492     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11493   %}
11494   ins_pipe(ialu_reg_reg_alu0);
11495 %}
11496 
11497 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11498 %{
11499   predicate(!UseAPX);
11500   match(Set dst (MulI dst (LoadI src)));
11501   effect(KILL cr);
11502 
11503   ins_cost(350);
11504   format %{ "imull   $dst, $src\t# int" %}
11505   ins_encode %{
11506     __ imull($dst$$Register, $src$$Address);
11507   %}
11508   ins_pipe(ialu_reg_mem_alu0);
11509 %}
11510 
11511 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11512 %{
11513   predicate(UseAPX);
11514   match(Set dst (MulI src1 (LoadI src2)));
11515   effect(KILL cr);
11516   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11517 
11518   ins_cost(350);
11519   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11520   ins_encode %{
11521     __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11522   %}
11523   ins_pipe(ialu_reg_mem_alu0);
11524 %}
11525 
11526 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11527 %{
11528   match(Set dst (MulI (LoadI src) imm));
11529   effect(KILL cr);
11530 
11531   ins_cost(300);
11532   format %{ "imull   $dst, $src, $imm\t# int" %}
11533   ins_encode %{
11534     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11535   %}
11536   ins_pipe(ialu_reg_mem_alu0);
11537 %}
11538 
11539 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11540 %{
11541   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11542   effect(KILL cr, KILL src2);
11543 
11544   expand %{ mulI_rReg(dst, src1, cr);
11545            mulI_rReg(src2, src3, cr);
11546            addI_rReg(dst, src2, cr); %}
11547 %}
11548 
11549 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11550 %{
11551   predicate(!UseAPX);
11552   match(Set dst (MulL dst src));
11553   effect(KILL cr);
11554 
11555   ins_cost(300);
11556   format %{ "imulq   $dst, $src\t# long" %}
11557   ins_encode %{
11558     __ imulq($dst$$Register, $src$$Register);
11559   %}
11560   ins_pipe(ialu_reg_reg_alu0);
11561 %}
11562 
11563 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11564 %{
11565   predicate(UseAPX);
11566   match(Set dst (MulL src1 src2));
11567   effect(KILL cr);
11568   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11569 
11570   ins_cost(300);
11571   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11572   ins_encode %{
11573     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11574   %}
11575   ins_pipe(ialu_reg_reg_alu0);
11576 %}
11577 
11578 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11579 %{
11580   match(Set dst (MulL src imm));
11581   effect(KILL cr);
11582 
11583   ins_cost(300);
11584   format %{ "imulq   $dst, $src, $imm\t# long" %}
11585   ins_encode %{
11586     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11587   %}
11588   ins_pipe(ialu_reg_reg_alu0);
11589 %}
11590 
11591 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11592 %{
11593   predicate(!UseAPX);
11594   match(Set dst (MulL dst (LoadL src)));
11595   effect(KILL cr);
11596 
11597   ins_cost(350);
11598   format %{ "imulq   $dst, $src\t# long" %}
11599   ins_encode %{
11600     __ imulq($dst$$Register, $src$$Address);
11601   %}
11602   ins_pipe(ialu_reg_mem_alu0);
11603 %}
11604 
11605 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11606 %{
11607   predicate(UseAPX);
11608   match(Set dst (MulL src1 (LoadL src2)));
11609   effect(KILL cr);
11610   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11611 
11612   ins_cost(350);
11613   format %{ "eimulq   $dst, $src1, $src2 \t# long" %}
11614   ins_encode %{
11615     __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11616   %}
11617   ins_pipe(ialu_reg_mem_alu0);
11618 %}
11619 
11620 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11621 %{
11622   match(Set dst (MulL (LoadL src) imm));
11623   effect(KILL cr);
11624 
11625   ins_cost(300);
11626   format %{ "imulq   $dst, $src, $imm\t# long" %}
11627   ins_encode %{
11628     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11629   %}
11630   ins_pipe(ialu_reg_mem_alu0);
11631 %}
11632 
11633 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11634 %{
11635   match(Set dst (MulHiL src rax));
11636   effect(USE_KILL rax, KILL cr);
11637 
11638   ins_cost(300);
11639   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11640   ins_encode %{
11641     __ imulq($src$$Register);
11642   %}
11643   ins_pipe(ialu_reg_reg_alu0);
11644 %}
11645 
11646 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11647 %{
11648   match(Set dst (UMulHiL src rax));
11649   effect(USE_KILL rax, KILL cr);
11650 
11651   ins_cost(300);
11652   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11653   ins_encode %{
11654     __ mulq($src$$Register);
11655   %}
11656   ins_pipe(ialu_reg_reg_alu0);
11657 %}
11658 
11659 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11660                    rFlagsReg cr)
11661 %{
11662   match(Set rax (DivI rax div));
11663   effect(KILL rdx, KILL cr);
11664 
11665   ins_cost(30*100+10*100); // XXX
11666   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11667             "jne,s   normal\n\t"
11668             "xorl    rdx, rdx\n\t"
11669             "cmpl    $div, -1\n\t"
11670             "je,s    done\n"
11671     "normal: cdql\n\t"
11672             "idivl   $div\n"
11673     "done:"        %}
11674   ins_encode(cdql_enc(div));
11675   ins_pipe(ialu_reg_reg_alu0);
11676 %}
11677 
11678 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11679                    rFlagsReg cr)
11680 %{
11681   match(Set rax (DivL rax div));
11682   effect(KILL rdx, KILL cr);
11683 
11684   ins_cost(30*100+10*100); // XXX
11685   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11686             "cmpq    rax, rdx\n\t"
11687             "jne,s   normal\n\t"
11688             "xorl    rdx, rdx\n\t"
11689             "cmpq    $div, -1\n\t"
11690             "je,s    done\n"
11691     "normal: cdqq\n\t"
11692             "idivq   $div\n"
11693     "done:"        %}
11694   ins_encode(cdqq_enc(div));
11695   ins_pipe(ialu_reg_reg_alu0);
11696 %}
11697 
11698 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11699 %{
11700   match(Set rax (UDivI rax div));
11701   effect(KILL rdx, KILL cr);
11702 
11703   ins_cost(300);
11704   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11705   ins_encode %{
11706     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11707   %}
11708   ins_pipe(ialu_reg_reg_alu0);
11709 %}
11710 
11711 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11712 %{
11713   match(Set rax (UDivL rax div));
11714   effect(KILL rdx, KILL cr);
11715 
11716   ins_cost(300);
11717   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11718   ins_encode %{
11719      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11720   %}
11721   ins_pipe(ialu_reg_reg_alu0);
11722 %}
11723 
11724 // Integer DIVMOD with Register, both quotient and mod results
11725 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11726                              rFlagsReg cr)
11727 %{
11728   match(DivModI rax div);
11729   effect(KILL cr);
11730 
11731   ins_cost(30*100+10*100); // XXX
11732   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11733             "jne,s   normal\n\t"
11734             "xorl    rdx, rdx\n\t"
11735             "cmpl    $div, -1\n\t"
11736             "je,s    done\n"
11737     "normal: cdql\n\t"
11738             "idivl   $div\n"
11739     "done:"        %}
11740   ins_encode(cdql_enc(div));
11741   ins_pipe(pipe_slow);
11742 %}
11743 
11744 // Long DIVMOD with Register, both quotient and mod results
11745 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11746                              rFlagsReg cr)
11747 %{
11748   match(DivModL rax div);
11749   effect(KILL cr);
11750 
11751   ins_cost(30*100+10*100); // XXX
11752   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11753             "cmpq    rax, rdx\n\t"
11754             "jne,s   normal\n\t"
11755             "xorl    rdx, rdx\n\t"
11756             "cmpq    $div, -1\n\t"
11757             "je,s    done\n"
11758     "normal: cdqq\n\t"
11759             "idivq   $div\n"
11760     "done:"        %}
11761   ins_encode(cdqq_enc(div));
11762   ins_pipe(pipe_slow);
11763 %}
11764 
11765 // Unsigned integer DIVMOD with Register, both quotient and mod results
11766 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11767                               no_rax_rdx_RegI div, rFlagsReg cr)
11768 %{
11769   match(UDivModI rax div);
11770   effect(TEMP tmp, KILL cr);
11771 
11772   ins_cost(300);
11773   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11774             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11775           %}
11776   ins_encode %{
11777     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11778   %}
11779   ins_pipe(pipe_slow);
11780 %}
11781 
11782 // Unsigned long DIVMOD with Register, both quotient and mod results
11783 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11784                               no_rax_rdx_RegL div, rFlagsReg cr)
11785 %{
11786   match(UDivModL rax div);
11787   effect(TEMP tmp, KILL cr);
11788 
11789   ins_cost(300);
11790   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11791             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11792           %}
11793   ins_encode %{
11794     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11795   %}
11796   ins_pipe(pipe_slow);
11797 %}
11798 
11799 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11800                    rFlagsReg cr)
11801 %{
11802   match(Set rdx (ModI rax div));
11803   effect(KILL rax, KILL cr);
11804 
11805   ins_cost(300); // XXX
11806   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11807             "jne,s   normal\n\t"
11808             "xorl    rdx, rdx\n\t"
11809             "cmpl    $div, -1\n\t"
11810             "je,s    done\n"
11811     "normal: cdql\n\t"
11812             "idivl   $div\n"
11813     "done:"        %}
11814   ins_encode(cdql_enc(div));
11815   ins_pipe(ialu_reg_reg_alu0);
11816 %}
11817 
11818 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11819                    rFlagsReg cr)
11820 %{
11821   match(Set rdx (ModL rax div));
11822   effect(KILL rax, KILL cr);
11823 
11824   ins_cost(300); // XXX
11825   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11826             "cmpq    rax, rdx\n\t"
11827             "jne,s   normal\n\t"
11828             "xorl    rdx, rdx\n\t"
11829             "cmpq    $div, -1\n\t"
11830             "je,s    done\n"
11831     "normal: cdqq\n\t"
11832             "idivq   $div\n"
11833     "done:"        %}
11834   ins_encode(cdqq_enc(div));
11835   ins_pipe(ialu_reg_reg_alu0);
11836 %}
11837 
11838 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11839 %{
11840   match(Set rdx (UModI rax div));
11841   effect(KILL rax, KILL cr);
11842 
11843   ins_cost(300);
11844   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11845   ins_encode %{
11846     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11847   %}
11848   ins_pipe(ialu_reg_reg_alu0);
11849 %}
11850 
11851 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11852 %{
11853   match(Set rdx (UModL rax div));
11854   effect(KILL rax, KILL cr);
11855 
11856   ins_cost(300);
11857   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11858   ins_encode %{
11859     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11860   %}
11861   ins_pipe(ialu_reg_reg_alu0);
11862 %}
11863 
11864 // Integer Shift Instructions
11865 // Shift Left by one, two, three
11866 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11867 %{
11868   predicate(!UseAPX);
11869   match(Set dst (LShiftI dst shift));
11870   effect(KILL cr);
11871 
11872   format %{ "sall    $dst, $shift" %}
11873   ins_encode %{
11874     __ sall($dst$$Register, $shift$$constant);
11875   %}
11876   ins_pipe(ialu_reg);
11877 %}
11878 
11879 // Shift Left by one, two, three
11880 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11881 %{
11882   predicate(UseAPX);
11883   match(Set dst (LShiftI src shift));
11884   effect(KILL cr);
11885   flag(PD::Flag_ndd_demotable_opr1);
11886 
11887   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
11888   ins_encode %{
11889     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11890   %}
11891   ins_pipe(ialu_reg);
11892 %}
11893 
11894 // Shift Left by 8-bit immediate
11895 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11896 %{
11897   predicate(!UseAPX);
11898   match(Set dst (LShiftI dst shift));
11899   effect(KILL cr);
11900 
11901   format %{ "sall    $dst, $shift" %}
11902   ins_encode %{
11903     __ sall($dst$$Register, $shift$$constant);
11904   %}
11905   ins_pipe(ialu_reg);
11906 %}
11907 
11908 // Shift Left by 8-bit immediate
11909 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11910 %{
11911   predicate(UseAPX);
11912   match(Set dst (LShiftI src shift));
11913   effect(KILL cr);
11914   flag(PD::Flag_ndd_demotable_opr1);
11915 
11916   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11917   ins_encode %{
11918     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11919   %}
11920   ins_pipe(ialu_reg);
11921 %}
11922 
11923 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11924 %{
11925   predicate(UseAPX);
11926   match(Set dst (LShiftI (LoadI src) shift));
11927   effect(KILL cr);
11928 
11929   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11930   ins_encode %{
11931     __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11932   %}
11933   ins_pipe(ialu_reg);
11934 %}
11935 
11936 // Shift Left by 8-bit immediate
11937 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11938 %{
11939   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11940   effect(KILL cr);
11941 
11942   format %{ "sall    $dst, $shift" %}
11943   ins_encode %{
11944     __ sall($dst$$Address, $shift$$constant);
11945   %}
11946   ins_pipe(ialu_mem_imm);
11947 %}
11948 
11949 // Shift Left by variable
11950 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11951 %{
11952   predicate(!VM_Version::supports_bmi2());
11953   match(Set dst (LShiftI dst shift));
11954   effect(KILL cr);
11955 
11956   format %{ "sall    $dst, $shift" %}
11957   ins_encode %{
11958     __ sall($dst$$Register);
11959   %}
11960   ins_pipe(ialu_reg_reg);
11961 %}
11962 
11963 // Shift Left by variable
11964 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11965 %{
11966   predicate(!VM_Version::supports_bmi2());
11967   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11968   effect(KILL cr);
11969 
11970   format %{ "sall    $dst, $shift" %}
11971   ins_encode %{
11972     __ sall($dst$$Address);
11973   %}
11974   ins_pipe(ialu_mem_reg);
11975 %}
11976 
11977 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11978 %{
11979   predicate(VM_Version::supports_bmi2());
11980   match(Set dst (LShiftI src shift));
11981 
11982   format %{ "shlxl   $dst, $src, $shift" %}
11983   ins_encode %{
11984     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
11985   %}
11986   ins_pipe(ialu_reg_reg);
11987 %}
11988 
11989 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
11990 %{
11991   predicate(VM_Version::supports_bmi2());
11992   match(Set dst (LShiftI (LoadI src) shift));
11993   ins_cost(175);
11994   format %{ "shlxl   $dst, $src, $shift" %}
11995   ins_encode %{
11996     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
11997   %}
11998   ins_pipe(ialu_reg_mem);
11999 %}
12000 
12001 // Arithmetic Shift Right by 8-bit immediate
12002 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12003 %{
12004   predicate(!UseAPX);
12005   match(Set dst (RShiftI dst shift));
12006   effect(KILL cr);
12007 
12008   format %{ "sarl    $dst, $shift" %}
12009   ins_encode %{
12010     __ sarl($dst$$Register, $shift$$constant);
12011   %}
12012   ins_pipe(ialu_mem_imm);
12013 %}
12014 
12015 // Arithmetic Shift Right by 8-bit immediate
12016 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12017 %{
12018   predicate(UseAPX);
12019   match(Set dst (RShiftI src shift));
12020   effect(KILL cr);
12021   flag(PD::Flag_ndd_demotable_opr1);
12022 
12023   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12024   ins_encode %{
12025     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12026   %}
12027   ins_pipe(ialu_mem_imm);
12028 %}
12029 
12030 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12031 %{
12032   predicate(UseAPX);
12033   match(Set dst (RShiftI (LoadI src) shift));
12034   effect(KILL cr);
12035 
12036   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12037   ins_encode %{
12038     __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12039   %}
12040   ins_pipe(ialu_mem_imm);
12041 %}
12042 
12043 // Arithmetic Shift Right by 8-bit immediate
12044 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12045 %{
12046   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12047   effect(KILL cr);
12048 
12049   format %{ "sarl    $dst, $shift" %}
12050   ins_encode %{
12051     __ sarl($dst$$Address, $shift$$constant);
12052   %}
12053   ins_pipe(ialu_mem_imm);
12054 %}
12055 
12056 // Arithmetic Shift Right by variable
12057 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12058 %{
12059   predicate(!VM_Version::supports_bmi2());
12060   match(Set dst (RShiftI dst shift));
12061   effect(KILL cr);
12062 
12063   format %{ "sarl    $dst, $shift" %}
12064   ins_encode %{
12065     __ sarl($dst$$Register);
12066   %}
12067   ins_pipe(ialu_reg_reg);
12068 %}
12069 
12070 // Arithmetic Shift Right by variable
12071 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12072 %{
12073   predicate(!VM_Version::supports_bmi2());
12074   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12075   effect(KILL cr);
12076 
12077   format %{ "sarl    $dst, $shift" %}
12078   ins_encode %{
12079     __ sarl($dst$$Address);
12080   %}
12081   ins_pipe(ialu_mem_reg);
12082 %}
12083 
12084 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12085 %{
12086   predicate(VM_Version::supports_bmi2());
12087   match(Set dst (RShiftI src shift));
12088 
12089   format %{ "sarxl   $dst, $src, $shift" %}
12090   ins_encode %{
12091     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12092   %}
12093   ins_pipe(ialu_reg_reg);
12094 %}
12095 
12096 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12097 %{
12098   predicate(VM_Version::supports_bmi2());
12099   match(Set dst (RShiftI (LoadI src) shift));
12100   ins_cost(175);
12101   format %{ "sarxl   $dst, $src, $shift" %}
12102   ins_encode %{
12103     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12104   %}
12105   ins_pipe(ialu_reg_mem);
12106 %}
12107 
12108 // Logical Shift Right by 8-bit immediate
12109 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12110 %{
12111   predicate(!UseAPX);
12112   match(Set dst (URShiftI dst shift));
12113   effect(KILL cr);
12114 
12115   format %{ "shrl    $dst, $shift" %}
12116   ins_encode %{
12117     __ shrl($dst$$Register, $shift$$constant);
12118   %}
12119   ins_pipe(ialu_reg);
12120 %}
12121 
12122 // Logical Shift Right by 8-bit immediate
12123 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12124 %{
12125   predicate(UseAPX);
12126   match(Set dst (URShiftI src shift));
12127   effect(KILL cr);
12128   flag(PD::Flag_ndd_demotable_opr1);
12129 
12130   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12131   ins_encode %{
12132     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12133   %}
12134   ins_pipe(ialu_reg);
12135 %}
12136 
12137 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12138 %{
12139   predicate(UseAPX);
12140   match(Set dst (URShiftI (LoadI src) shift));
12141   effect(KILL cr);
12142 
12143   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12144   ins_encode %{
12145     __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12146   %}
12147   ins_pipe(ialu_reg);
12148 %}
12149 
12150 // Logical Shift Right by 8-bit immediate
12151 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12152 %{
12153   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12154   effect(KILL cr);
12155 
12156   format %{ "shrl    $dst, $shift" %}
12157   ins_encode %{
12158     __ shrl($dst$$Address, $shift$$constant);
12159   %}
12160   ins_pipe(ialu_mem_imm);
12161 %}
12162 
12163 // Logical Shift Right by variable
12164 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12165 %{
12166   predicate(!VM_Version::supports_bmi2());
12167   match(Set dst (URShiftI dst shift));
12168   effect(KILL cr);
12169 
12170   format %{ "shrl    $dst, $shift" %}
12171   ins_encode %{
12172     __ shrl($dst$$Register);
12173   %}
12174   ins_pipe(ialu_reg_reg);
12175 %}
12176 
12177 // Logical Shift Right by variable
12178 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12179 %{
12180   predicate(!VM_Version::supports_bmi2());
12181   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12182   effect(KILL cr);
12183 
12184   format %{ "shrl    $dst, $shift" %}
12185   ins_encode %{
12186     __ shrl($dst$$Address);
12187   %}
12188   ins_pipe(ialu_mem_reg);
12189 %}
12190 
12191 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12192 %{
12193   predicate(VM_Version::supports_bmi2());
12194   match(Set dst (URShiftI src shift));
12195 
12196   format %{ "shrxl   $dst, $src, $shift" %}
12197   ins_encode %{
12198     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12199   %}
12200   ins_pipe(ialu_reg_reg);
12201 %}
12202 
12203 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12204 %{
12205   predicate(VM_Version::supports_bmi2());
12206   match(Set dst (URShiftI (LoadI src) shift));
12207   ins_cost(175);
12208   format %{ "shrxl   $dst, $src, $shift" %}
12209   ins_encode %{
12210     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12211   %}
12212   ins_pipe(ialu_reg_mem);
12213 %}
12214 
12215 // Long Shift Instructions
12216 // Shift Left by one, two, three
12217 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12218 %{
12219   predicate(!UseAPX);
12220   match(Set dst (LShiftL dst shift));
12221   effect(KILL cr);
12222 
12223   format %{ "salq    $dst, $shift" %}
12224   ins_encode %{
12225     __ salq($dst$$Register, $shift$$constant);
12226   %}
12227   ins_pipe(ialu_reg);
12228 %}
12229 
12230 // Shift Left by one, two, three
12231 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12232 %{
12233   predicate(UseAPX);
12234   match(Set dst (LShiftL src shift));
12235   effect(KILL cr);
12236   flag(PD::Flag_ndd_demotable_opr1);
12237 
12238   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12239   ins_encode %{
12240     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12241   %}
12242   ins_pipe(ialu_reg);
12243 %}
12244 
12245 // Shift Left by 8-bit immediate
12246 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12247 %{
12248   predicate(!UseAPX);
12249   match(Set dst (LShiftL dst shift));
12250   effect(KILL cr);
12251 
12252   format %{ "salq    $dst, $shift" %}
12253   ins_encode %{
12254     __ salq($dst$$Register, $shift$$constant);
12255   %}
12256   ins_pipe(ialu_reg);
12257 %}
12258 
12259 // Shift Left by 8-bit immediate
12260 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12261 %{
12262   predicate(UseAPX);
12263   match(Set dst (LShiftL src shift));
12264   effect(KILL cr);
12265   flag(PD::Flag_ndd_demotable_opr1);
12266 
12267   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12268   ins_encode %{
12269     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12270   %}
12271   ins_pipe(ialu_reg);
12272 %}
12273 
12274 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12275 %{
12276   predicate(UseAPX);
12277   match(Set dst (LShiftL (LoadL src) shift));
12278   effect(KILL cr);
12279 
12280   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12281   ins_encode %{
12282     __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12283   %}
12284   ins_pipe(ialu_reg);
12285 %}
12286 
12287 // Shift Left by 8-bit immediate
12288 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12289 %{
12290   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12291   effect(KILL cr);
12292 
12293   format %{ "salq    $dst, $shift" %}
12294   ins_encode %{
12295     __ salq($dst$$Address, $shift$$constant);
12296   %}
12297   ins_pipe(ialu_mem_imm);
12298 %}
12299 
12300 // Shift Left by variable
12301 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12302 %{
12303   predicate(!VM_Version::supports_bmi2());
12304   match(Set dst (LShiftL dst shift));
12305   effect(KILL cr);
12306 
12307   format %{ "salq    $dst, $shift" %}
12308   ins_encode %{
12309     __ salq($dst$$Register);
12310   %}
12311   ins_pipe(ialu_reg_reg);
12312 %}
12313 
12314 // Shift Left by variable
12315 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12316 %{
12317   predicate(!VM_Version::supports_bmi2());
12318   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12319   effect(KILL cr);
12320 
12321   format %{ "salq    $dst, $shift" %}
12322   ins_encode %{
12323     __ salq($dst$$Address);
12324   %}
12325   ins_pipe(ialu_mem_reg);
12326 %}
12327 
12328 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12329 %{
12330   predicate(VM_Version::supports_bmi2());
12331   match(Set dst (LShiftL src shift));
12332 
12333   format %{ "shlxq   $dst, $src, $shift" %}
12334   ins_encode %{
12335     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12336   %}
12337   ins_pipe(ialu_reg_reg);
12338 %}
12339 
12340 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12341 %{
12342   predicate(VM_Version::supports_bmi2());
12343   match(Set dst (LShiftL (LoadL src) shift));
12344   ins_cost(175);
12345   format %{ "shlxq   $dst, $src, $shift" %}
12346   ins_encode %{
12347     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12348   %}
12349   ins_pipe(ialu_reg_mem);
12350 %}
12351 
12352 // Arithmetic Shift Right by 8-bit immediate
12353 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12354 %{
12355   predicate(!UseAPX);
12356   match(Set dst (RShiftL dst shift));
12357   effect(KILL cr);
12358 
12359   format %{ "sarq    $dst, $shift" %}
12360   ins_encode %{
12361     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12362   %}
12363   ins_pipe(ialu_mem_imm);
12364 %}
12365 
12366 // Arithmetic Shift Right by 8-bit immediate
12367 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12368 %{
12369   predicate(UseAPX);
12370   match(Set dst (RShiftL src shift));
12371   effect(KILL cr);
12372   flag(PD::Flag_ndd_demotable_opr1);
12373 
12374   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12375   ins_encode %{
12376     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12377   %}
12378   ins_pipe(ialu_mem_imm);
12379 %}
12380 
12381 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12382 %{
12383   predicate(UseAPX);
12384   match(Set dst (RShiftL (LoadL src) shift));
12385   effect(KILL cr);
12386 
12387   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12388   ins_encode %{
12389     __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12390   %}
12391   ins_pipe(ialu_mem_imm);
12392 %}
12393 
12394 // Arithmetic Shift Right by 8-bit immediate
12395 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12396 %{
12397   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12398   effect(KILL cr);
12399 
12400   format %{ "sarq    $dst, $shift" %}
12401   ins_encode %{
12402     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12403   %}
12404   ins_pipe(ialu_mem_imm);
12405 %}
12406 
12407 // Arithmetic Shift Right by variable
12408 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12409 %{
12410   predicate(!VM_Version::supports_bmi2());
12411   match(Set dst (RShiftL dst shift));
12412   effect(KILL cr);
12413 
12414   format %{ "sarq    $dst, $shift" %}
12415   ins_encode %{
12416     __ sarq($dst$$Register);
12417   %}
12418   ins_pipe(ialu_reg_reg);
12419 %}
12420 
12421 // Arithmetic Shift Right by variable
12422 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12423 %{
12424   predicate(!VM_Version::supports_bmi2());
12425   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12426   effect(KILL cr);
12427 
12428   format %{ "sarq    $dst, $shift" %}
12429   ins_encode %{
12430     __ sarq($dst$$Address);
12431   %}
12432   ins_pipe(ialu_mem_reg);
12433 %}
12434 
12435 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12436 %{
12437   predicate(VM_Version::supports_bmi2());
12438   match(Set dst (RShiftL src shift));
12439 
12440   format %{ "sarxq   $dst, $src, $shift" %}
12441   ins_encode %{
12442     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12443   %}
12444   ins_pipe(ialu_reg_reg);
12445 %}
12446 
12447 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12448 %{
12449   predicate(VM_Version::supports_bmi2());
12450   match(Set dst (RShiftL (LoadL src) shift));
12451   ins_cost(175);
12452   format %{ "sarxq   $dst, $src, $shift" %}
12453   ins_encode %{
12454     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12455   %}
12456   ins_pipe(ialu_reg_mem);
12457 %}
12458 
12459 // Logical Shift Right by 8-bit immediate
12460 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12461 %{
12462   predicate(!UseAPX);
12463   match(Set dst (URShiftL dst shift));
12464   effect(KILL cr);
12465 
12466   format %{ "shrq    $dst, $shift" %}
12467   ins_encode %{
12468     __ shrq($dst$$Register, $shift$$constant);
12469   %}
12470   ins_pipe(ialu_reg);
12471 %}
12472 
12473 // Logical Shift Right by 8-bit immediate
12474 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12475 %{
12476   predicate(UseAPX);
12477   match(Set dst (URShiftL src shift));
12478   effect(KILL cr);
12479   flag(PD::Flag_ndd_demotable_opr1);
12480 
12481   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12482   ins_encode %{
12483     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12484   %}
12485   ins_pipe(ialu_reg);
12486 %}
12487 
12488 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12489 %{
12490   predicate(UseAPX);
12491   match(Set dst (URShiftL (LoadL src) shift));
12492   effect(KILL cr);
12493 
12494   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12495   ins_encode %{
12496     __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12497   %}
12498   ins_pipe(ialu_reg);
12499 %}
12500 
12501 // Logical Shift Right by 8-bit immediate
12502 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12503 %{
12504   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12505   effect(KILL cr);
12506 
12507   format %{ "shrq    $dst, $shift" %}
12508   ins_encode %{
12509     __ shrq($dst$$Address, $shift$$constant);
12510   %}
12511   ins_pipe(ialu_mem_imm);
12512 %}
12513 
12514 // Logical Shift Right by variable
12515 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12516 %{
12517   predicate(!VM_Version::supports_bmi2());
12518   match(Set dst (URShiftL dst shift));
12519   effect(KILL cr);
12520 
12521   format %{ "shrq    $dst, $shift" %}
12522   ins_encode %{
12523     __ shrq($dst$$Register);
12524   %}
12525   ins_pipe(ialu_reg_reg);
12526 %}
12527 
12528 // Logical Shift Right by variable
12529 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12530 %{
12531   predicate(!VM_Version::supports_bmi2());
12532   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12533   effect(KILL cr);
12534 
12535   format %{ "shrq    $dst, $shift" %}
12536   ins_encode %{
12537     __ shrq($dst$$Address);
12538   %}
12539   ins_pipe(ialu_mem_reg);
12540 %}
12541 
12542 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12543 %{
12544   predicate(VM_Version::supports_bmi2());
12545   match(Set dst (URShiftL src shift));
12546 
12547   format %{ "shrxq   $dst, $src, $shift" %}
12548   ins_encode %{
12549     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12550   %}
12551   ins_pipe(ialu_reg_reg);
12552 %}
12553 
12554 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12555 %{
12556   predicate(VM_Version::supports_bmi2());
12557   match(Set dst (URShiftL (LoadL src) shift));
12558   ins_cost(175);
12559   format %{ "shrxq   $dst, $src, $shift" %}
12560   ins_encode %{
12561     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12562   %}
12563   ins_pipe(ialu_reg_mem);
12564 %}
12565 
12566 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12567 // This idiom is used by the compiler for the i2b bytecode.
12568 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12569 %{
12570   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12571 
12572   format %{ "movsbl  $dst, $src\t# i2b" %}
12573   ins_encode %{
12574     __ movsbl($dst$$Register, $src$$Register);
12575   %}
12576   ins_pipe(ialu_reg_reg);
12577 %}
12578 
12579 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12580 // This idiom is used by the compiler the i2s bytecode.
12581 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12582 %{
12583   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12584 
12585   format %{ "movswl  $dst, $src\t# i2s" %}
12586   ins_encode %{
12587     __ movswl($dst$$Register, $src$$Register);
12588   %}
12589   ins_pipe(ialu_reg_reg);
12590 %}
12591 
12592 // ROL/ROR instructions
12593 
12594 // Rotate left by constant.
12595 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12596 %{
12597   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12598   match(Set dst (RotateLeft dst shift));
12599   effect(KILL cr);
12600   format %{ "roll    $dst, $shift" %}
12601   ins_encode %{
12602     __ roll($dst$$Register, $shift$$constant);
12603   %}
12604   ins_pipe(ialu_reg);
12605 %}
12606 
12607 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12608 %{
12609   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12610   match(Set dst (RotateLeft src shift));
12611   format %{ "rolxl   $dst, $src, $shift" %}
12612   ins_encode %{
12613     int shift = 32 - ($shift$$constant & 31);
12614     __ rorxl($dst$$Register, $src$$Register, shift);
12615   %}
12616   ins_pipe(ialu_reg_reg);
12617 %}
12618 
12619 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12620 %{
12621   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12622   match(Set dst (RotateLeft (LoadI src) shift));
12623   ins_cost(175);
12624   format %{ "rolxl   $dst, $src, $shift" %}
12625   ins_encode %{
12626     int shift = 32 - ($shift$$constant & 31);
12627     __ rorxl($dst$$Register, $src$$Address, shift);
12628   %}
12629   ins_pipe(ialu_reg_mem);
12630 %}
12631 
12632 // Rotate Left by variable
12633 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12634 %{
12635   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12636   match(Set dst (RotateLeft dst shift));
12637   effect(KILL cr);
12638   format %{ "roll    $dst, $shift" %}
12639   ins_encode %{
12640     __ roll($dst$$Register);
12641   %}
12642   ins_pipe(ialu_reg_reg);
12643 %}
12644 
12645 // Rotate Left by variable
12646 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12647 %{
12648   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12649   match(Set dst (RotateLeft src shift));
12650   effect(KILL cr);
12651   flag(PD::Flag_ndd_demotable_opr1);
12652 
12653   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12654   ins_encode %{
12655     __ eroll($dst$$Register, $src$$Register, false);
12656   %}
12657   ins_pipe(ialu_reg_reg);
12658 %}
12659 
12660 // Rotate Right by constant.
12661 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12662 %{
12663   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12664   match(Set dst (RotateRight dst shift));
12665   effect(KILL cr);
12666   format %{ "rorl    $dst, $shift" %}
12667   ins_encode %{
12668     __ rorl($dst$$Register, $shift$$constant);
12669   %}
12670   ins_pipe(ialu_reg);
12671 %}
12672 
12673 // Rotate Right by constant.
12674 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12675 %{
12676   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12677   match(Set dst (RotateRight src shift));
12678   format %{ "rorxl   $dst, $src, $shift" %}
12679   ins_encode %{
12680     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12681   %}
12682   ins_pipe(ialu_reg_reg);
12683 %}
12684 
12685 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12686 %{
12687   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12688   match(Set dst (RotateRight (LoadI src) shift));
12689   ins_cost(175);
12690   format %{ "rorxl   $dst, $src, $shift" %}
12691   ins_encode %{
12692     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12693   %}
12694   ins_pipe(ialu_reg_mem);
12695 %}
12696 
12697 // Rotate Right by variable
12698 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12699 %{
12700   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12701   match(Set dst (RotateRight dst shift));
12702   effect(KILL cr);
12703   format %{ "rorl    $dst, $shift" %}
12704   ins_encode %{
12705     __ rorl($dst$$Register);
12706   %}
12707   ins_pipe(ialu_reg_reg);
12708 %}
12709 
12710 // Rotate Right by variable
12711 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12712 %{
12713   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12714   match(Set dst (RotateRight src shift));
12715   effect(KILL cr);
12716   flag(PD::Flag_ndd_demotable_opr1);
12717 
12718   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12719   ins_encode %{
12720     __ erorl($dst$$Register, $src$$Register, false);
12721   %}
12722   ins_pipe(ialu_reg_reg);
12723 %}
12724 
12725 // Rotate Left by constant.
12726 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12727 %{
12728   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12729   match(Set dst (RotateLeft dst shift));
12730   effect(KILL cr);
12731   format %{ "rolq    $dst, $shift" %}
12732   ins_encode %{
12733     __ rolq($dst$$Register, $shift$$constant);
12734   %}
12735   ins_pipe(ialu_reg);
12736 %}
12737 
12738 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12739 %{
12740   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12741   match(Set dst (RotateLeft src shift));
12742   format %{ "rolxq   $dst, $src, $shift" %}
12743   ins_encode %{
12744     int shift = 64 - ($shift$$constant & 63);
12745     __ rorxq($dst$$Register, $src$$Register, shift);
12746   %}
12747   ins_pipe(ialu_reg_reg);
12748 %}
12749 
12750 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12751 %{
12752   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12753   match(Set dst (RotateLeft (LoadL src) shift));
12754   ins_cost(175);
12755   format %{ "rolxq   $dst, $src, $shift" %}
12756   ins_encode %{
12757     int shift = 64 - ($shift$$constant & 63);
12758     __ rorxq($dst$$Register, $src$$Address, shift);
12759   %}
12760   ins_pipe(ialu_reg_mem);
12761 %}
12762 
12763 // Rotate Left by variable
12764 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12765 %{
12766   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12767   match(Set dst (RotateLeft dst shift));
12768   effect(KILL cr);
12769 
12770   format %{ "rolq    $dst, $shift" %}
12771   ins_encode %{
12772     __ rolq($dst$$Register);
12773   %}
12774   ins_pipe(ialu_reg_reg);
12775 %}
12776 
12777 // Rotate Left by variable
12778 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12779 %{
12780   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12781   match(Set dst (RotateLeft src shift));
12782   effect(KILL cr);
12783   flag(PD::Flag_ndd_demotable_opr1);
12784 
12785   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12786   ins_encode %{
12787     __ erolq($dst$$Register, $src$$Register, false);
12788   %}
12789   ins_pipe(ialu_reg_reg);
12790 %}
12791 
12792 // Rotate Right by constant.
12793 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12794 %{
12795   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12796   match(Set dst (RotateRight dst shift));
12797   effect(KILL cr);
12798   format %{ "rorq    $dst, $shift" %}
12799   ins_encode %{
12800     __ rorq($dst$$Register, $shift$$constant);
12801   %}
12802   ins_pipe(ialu_reg);
12803 %}
12804 
12805 // Rotate Right by constant
12806 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12807 %{
12808   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12809   match(Set dst (RotateRight src shift));
12810   format %{ "rorxq   $dst, $src, $shift" %}
12811   ins_encode %{
12812     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12813   %}
12814   ins_pipe(ialu_reg_reg);
12815 %}
12816 
12817 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12818 %{
12819   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12820   match(Set dst (RotateRight (LoadL src) shift));
12821   ins_cost(175);
12822   format %{ "rorxq   $dst, $src, $shift" %}
12823   ins_encode %{
12824     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12825   %}
12826   ins_pipe(ialu_reg_mem);
12827 %}
12828 
12829 // Rotate Right by variable
12830 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12831 %{
12832   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12833   match(Set dst (RotateRight dst shift));
12834   effect(KILL cr);
12835   format %{ "rorq    $dst, $shift" %}
12836   ins_encode %{
12837     __ rorq($dst$$Register);
12838   %}
12839   ins_pipe(ialu_reg_reg);
12840 %}
12841 
12842 // Rotate Right by variable
12843 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12844 %{
12845   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12846   match(Set dst (RotateRight src shift));
12847   effect(KILL cr);
12848   flag(PD::Flag_ndd_demotable_opr1);
12849 
12850   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12851   ins_encode %{
12852     __ erorq($dst$$Register, $src$$Register, false);
12853   %}
12854   ins_pipe(ialu_reg_reg);
12855 %}
12856 
12857 //----------------------------- CompressBits/ExpandBits ------------------------
12858 
12859 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12860   predicate(n->bottom_type()->isa_long());
12861   match(Set dst (CompressBits src mask));
12862   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12863   ins_encode %{
12864     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12865   %}
12866   ins_pipe( pipe_slow );
12867 %}
12868 
12869 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12870   predicate(n->bottom_type()->isa_long());
12871   match(Set dst (ExpandBits src mask));
12872   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12873   ins_encode %{
12874     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12875   %}
12876   ins_pipe( pipe_slow );
12877 %}
12878 
12879 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12880   predicate(n->bottom_type()->isa_long());
12881   match(Set dst (CompressBits src (LoadL mask)));
12882   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12883   ins_encode %{
12884     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12885   %}
12886   ins_pipe( pipe_slow );
12887 %}
12888 
12889 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12890   predicate(n->bottom_type()->isa_long());
12891   match(Set dst (ExpandBits src (LoadL mask)));
12892   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12893   ins_encode %{
12894     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12895   %}
12896   ins_pipe( pipe_slow );
12897 %}
12898 
12899 
12900 // Logical Instructions
12901 
12902 // Integer Logical Instructions
12903 
12904 // And Instructions
12905 // And Register with Register
12906 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12907 %{
12908   predicate(!UseAPX);
12909   match(Set dst (AndI dst src));
12910   effect(KILL cr);
12911   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12912 
12913   format %{ "andl    $dst, $src\t# int" %}
12914   ins_encode %{
12915     __ andl($dst$$Register, $src$$Register);
12916   %}
12917   ins_pipe(ialu_reg_reg);
12918 %}
12919 
12920 // And Register with Register using New Data Destination (NDD)
12921 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12922 %{
12923   predicate(UseAPX);
12924   match(Set dst (AndI src1 src2));
12925   effect(KILL cr);
12926   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12927 
12928   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
12929   ins_encode %{
12930     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12931 
12932   %}
12933   ins_pipe(ialu_reg_reg);
12934 %}
12935 
12936 // And Register with Immediate 255
12937 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12938 %{
12939   match(Set dst (AndI src mask));
12940 
12941   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
12942   ins_encode %{
12943     __ movzbl($dst$$Register, $src$$Register);
12944   %}
12945   ins_pipe(ialu_reg);
12946 %}
12947 
12948 // And Register with Immediate 255 and promote to long
12949 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12950 %{
12951   match(Set dst (ConvI2L (AndI src mask)));
12952 
12953   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
12954   ins_encode %{
12955     __ movzbl($dst$$Register, $src$$Register);
12956   %}
12957   ins_pipe(ialu_reg);
12958 %}
12959 
12960 // And Register with Immediate 65535
12961 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12962 %{
12963   match(Set dst (AndI src mask));
12964 
12965   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
12966   ins_encode %{
12967     __ movzwl($dst$$Register, $src$$Register);
12968   %}
12969   ins_pipe(ialu_reg);
12970 %}
12971 
12972 // And Register with Immediate 65535 and promote to long
12973 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12974 %{
12975   match(Set dst (ConvI2L (AndI src mask)));
12976 
12977   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
12978   ins_encode %{
12979     __ movzwl($dst$$Register, $src$$Register);
12980   %}
12981   ins_pipe(ialu_reg);
12982 %}
12983 
12984 // Can skip int2long conversions after AND with small bitmask
12985 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
12986 %{
12987   predicate(VM_Version::supports_bmi2());
12988   ins_cost(125);
12989   effect(TEMP tmp, KILL cr);
12990   match(Set dst (ConvI2L (AndI src mask)));
12991   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
12992   ins_encode %{
12993     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
12994     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
12995   %}
12996   ins_pipe(ialu_reg_reg);
12997 %}
12998 
12999 // And Register with Immediate
13000 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13001 %{
13002   predicate(!UseAPX);
13003   match(Set dst (AndI dst src));
13004   effect(KILL cr);
13005   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13006 
13007   format %{ "andl    $dst, $src\t# int" %}
13008   ins_encode %{
13009     __ andl($dst$$Register, $src$$constant);
13010   %}
13011   ins_pipe(ialu_reg);
13012 %}
13013 
13014 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13015 %{
13016   predicate(UseAPX);
13017   match(Set dst (AndI src1 src2));
13018   effect(KILL cr);
13019   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13020 
13021   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13022   ins_encode %{
13023     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13024   %}
13025   ins_pipe(ialu_reg);
13026 %}
13027 
13028 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13029 %{
13030   predicate(UseAPX);
13031   match(Set dst (AndI (LoadI src1) src2));
13032   effect(KILL cr);
13033   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13034 
13035   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13036   ins_encode %{
13037     __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13038   %}
13039   ins_pipe(ialu_reg);
13040 %}
13041 
13042 // And Register with Memory
13043 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13044 %{
13045   predicate(!UseAPX);
13046   match(Set dst (AndI dst (LoadI src)));
13047   effect(KILL cr);
13048   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13049 
13050   ins_cost(150);
13051   format %{ "andl    $dst, $src\t# int" %}
13052   ins_encode %{
13053     __ andl($dst$$Register, $src$$Address);
13054   %}
13055   ins_pipe(ialu_reg_mem);
13056 %}
13057 
13058 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13059 %{
13060   predicate(UseAPX);
13061   match(Set dst (AndI src1 (LoadI src2)));
13062   effect(KILL cr);
13063   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13064 
13065   ins_cost(150);
13066   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13067   ins_encode %{
13068     __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13069   %}
13070   ins_pipe(ialu_reg_mem);
13071 %}
13072 
13073 // And Memory with Register
13074 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13075 %{
13076   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13077   effect(KILL cr);
13078   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13079 
13080   ins_cost(150);
13081   format %{ "andb    $dst, $src\t# byte" %}
13082   ins_encode %{
13083     __ andb($dst$$Address, $src$$Register);
13084   %}
13085   ins_pipe(ialu_mem_reg);
13086 %}
13087 
13088 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13089 %{
13090   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13091   effect(KILL cr);
13092   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13093 
13094   ins_cost(150);
13095   format %{ "andl    $dst, $src\t# int" %}
13096   ins_encode %{
13097     __ andl($dst$$Address, $src$$Register);
13098   %}
13099   ins_pipe(ialu_mem_reg);
13100 %}
13101 
13102 // And Memory with Immediate
13103 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13104 %{
13105   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13106   effect(KILL cr);
13107   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13108 
13109   ins_cost(125);
13110   format %{ "andl    $dst, $src\t# int" %}
13111   ins_encode %{
13112     __ andl($dst$$Address, $src$$constant);
13113   %}
13114   ins_pipe(ialu_mem_imm);
13115 %}
13116 
13117 // BMI1 instructions
13118 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13119   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13120   predicate(UseBMI1Instructions);
13121   effect(KILL cr);
13122   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13123 
13124   ins_cost(125);
13125   format %{ "andnl  $dst, $src1, $src2" %}
13126 
13127   ins_encode %{
13128     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13129   %}
13130   ins_pipe(ialu_reg_mem);
13131 %}
13132 
13133 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13134   match(Set dst (AndI (XorI src1 minus_1) src2));
13135   predicate(UseBMI1Instructions);
13136   effect(KILL cr);
13137   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13138 
13139   format %{ "andnl  $dst, $src1, $src2" %}
13140 
13141   ins_encode %{
13142     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13143   %}
13144   ins_pipe(ialu_reg);
13145 %}
13146 
13147 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13148   match(Set dst (AndI (SubI imm_zero src) src));
13149   predicate(UseBMI1Instructions);
13150   effect(KILL cr);
13151   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13152 
13153   format %{ "blsil  $dst, $src" %}
13154 
13155   ins_encode %{
13156     __ blsil($dst$$Register, $src$$Register);
13157   %}
13158   ins_pipe(ialu_reg);
13159 %}
13160 
13161 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13162   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13163   predicate(UseBMI1Instructions);
13164   effect(KILL cr);
13165   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13166 
13167   ins_cost(125);
13168   format %{ "blsil  $dst, $src" %}
13169 
13170   ins_encode %{
13171     __ blsil($dst$$Register, $src$$Address);
13172   %}
13173   ins_pipe(ialu_reg_mem);
13174 %}
13175 
13176 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13177 %{
13178   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13179   predicate(UseBMI1Instructions);
13180   effect(KILL cr);
13181   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13182 
13183   ins_cost(125);
13184   format %{ "blsmskl $dst, $src" %}
13185 
13186   ins_encode %{
13187     __ blsmskl($dst$$Register, $src$$Address);
13188   %}
13189   ins_pipe(ialu_reg_mem);
13190 %}
13191 
13192 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13193 %{
13194   match(Set dst (XorI (AddI src minus_1) src));
13195   predicate(UseBMI1Instructions);
13196   effect(KILL cr);
13197   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13198 
13199   format %{ "blsmskl $dst, $src" %}
13200 
13201   ins_encode %{
13202     __ blsmskl($dst$$Register, $src$$Register);
13203   %}
13204 
13205   ins_pipe(ialu_reg);
13206 %}
13207 
13208 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13209 %{
13210   match(Set dst (AndI (AddI src minus_1) src) );
13211   predicate(UseBMI1Instructions);
13212   effect(KILL cr);
13213   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13214 
13215   format %{ "blsrl  $dst, $src" %}
13216 
13217   ins_encode %{
13218     __ blsrl($dst$$Register, $src$$Register);
13219   %}
13220 
13221   ins_pipe(ialu_reg_mem);
13222 %}
13223 
13224 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13225 %{
13226   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13227   predicate(UseBMI1Instructions);
13228   effect(KILL cr);
13229   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13230 
13231   ins_cost(125);
13232   format %{ "blsrl  $dst, $src" %}
13233 
13234   ins_encode %{
13235     __ blsrl($dst$$Register, $src$$Address);
13236   %}
13237 
13238   ins_pipe(ialu_reg);
13239 %}
13240 
13241 // Or Instructions
13242 // Or Register with Register
13243 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13244 %{
13245   predicate(!UseAPX);
13246   match(Set dst (OrI dst src));
13247   effect(KILL cr);
13248   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13249 
13250   format %{ "orl     $dst, $src\t# int" %}
13251   ins_encode %{
13252     __ orl($dst$$Register, $src$$Register);
13253   %}
13254   ins_pipe(ialu_reg_reg);
13255 %}
13256 
13257 // Or Register with Register using New Data Destination (NDD)
13258 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13259 %{
13260   predicate(UseAPX);
13261   match(Set dst (OrI src1 src2));
13262   effect(KILL cr);
13263   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13264 
13265   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13266   ins_encode %{
13267     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13268   %}
13269   ins_pipe(ialu_reg_reg);
13270 %}
13271 
13272 // Or Register with Immediate
13273 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13274 %{
13275   predicate(!UseAPX);
13276   match(Set dst (OrI dst src));
13277   effect(KILL cr);
13278   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13279 
13280   format %{ "orl     $dst, $src\t# int" %}
13281   ins_encode %{
13282     __ orl($dst$$Register, $src$$constant);
13283   %}
13284   ins_pipe(ialu_reg);
13285 %}
13286 
13287 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13288 %{
13289   predicate(UseAPX);
13290   match(Set dst (OrI src1 src2));
13291   effect(KILL cr);
13292   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13293 
13294   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13295   ins_encode %{
13296     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13297   %}
13298   ins_pipe(ialu_reg);
13299 %}
13300 
13301 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13302 %{
13303   predicate(UseAPX);
13304   match(Set dst (OrI src1 src2));
13305   effect(KILL cr);
13306   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13307 
13308   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
13309   ins_encode %{
13310     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13311   %}
13312   ins_pipe(ialu_reg);
13313 %}
13314 
13315 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13316 %{
13317   predicate(UseAPX);
13318   match(Set dst (OrI (LoadI src1) src2));
13319   effect(KILL cr);
13320   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13321 
13322   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13323   ins_encode %{
13324     __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13325   %}
13326   ins_pipe(ialu_reg);
13327 %}
13328 
13329 // Or Register with Memory
13330 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13331 %{
13332   predicate(!UseAPX);
13333   match(Set dst (OrI dst (LoadI src)));
13334   effect(KILL cr);
13335   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13336 
13337   ins_cost(150);
13338   format %{ "orl     $dst, $src\t# int" %}
13339   ins_encode %{
13340     __ orl($dst$$Register, $src$$Address);
13341   %}
13342   ins_pipe(ialu_reg_mem);
13343 %}
13344 
13345 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13346 %{
13347   predicate(UseAPX);
13348   match(Set dst (OrI src1 (LoadI src2)));
13349   effect(KILL cr);
13350   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13351 
13352   ins_cost(150);
13353   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13354   ins_encode %{
13355     __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13356   %}
13357   ins_pipe(ialu_reg_mem);
13358 %}
13359 
13360 // Or Memory with Register
13361 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13362 %{
13363   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13364   effect(KILL cr);
13365   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13366 
13367   ins_cost(150);
13368   format %{ "orb    $dst, $src\t# byte" %}
13369   ins_encode %{
13370     __ orb($dst$$Address, $src$$Register);
13371   %}
13372   ins_pipe(ialu_mem_reg);
13373 %}
13374 
13375 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13376 %{
13377   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13378   effect(KILL cr);
13379   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13380 
13381   ins_cost(150);
13382   format %{ "orl     $dst, $src\t# int" %}
13383   ins_encode %{
13384     __ orl($dst$$Address, $src$$Register);
13385   %}
13386   ins_pipe(ialu_mem_reg);
13387 %}
13388 
13389 // Or Memory with Immediate
13390 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13391 %{
13392   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13393   effect(KILL cr);
13394   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13395 
13396   ins_cost(125);
13397   format %{ "orl     $dst, $src\t# int" %}
13398   ins_encode %{
13399     __ orl($dst$$Address, $src$$constant);
13400   %}
13401   ins_pipe(ialu_mem_imm);
13402 %}
13403 
13404 // Xor Instructions
13405 // Xor Register with Register
13406 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13407 %{
13408   predicate(!UseAPX);
13409   match(Set dst (XorI dst src));
13410   effect(KILL cr);
13411   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13412 
13413   format %{ "xorl    $dst, $src\t# int" %}
13414   ins_encode %{
13415     __ xorl($dst$$Register, $src$$Register);
13416   %}
13417   ins_pipe(ialu_reg_reg);
13418 %}
13419 
13420 // Xor Register with Register using New Data Destination (NDD)
13421 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13422 %{
13423   predicate(UseAPX);
13424   match(Set dst (XorI src1 src2));
13425   effect(KILL cr);
13426   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13427 
13428   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13429   ins_encode %{
13430     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13431   %}
13432   ins_pipe(ialu_reg_reg);
13433 %}
13434 
13435 // Xor Register with Immediate -1
13436 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13437 %{
13438   predicate(!UseAPX);
13439   match(Set dst (XorI dst imm));
13440 
13441   format %{ "notl    $dst" %}
13442   ins_encode %{
13443      __ notl($dst$$Register);
13444   %}
13445   ins_pipe(ialu_reg);
13446 %}
13447 
13448 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13449 %{
13450   match(Set dst (XorI src imm));
13451   predicate(UseAPX);
13452   flag(PD::Flag_ndd_demotable_opr1);
13453 
13454   format %{ "enotl    $dst, $src" %}
13455   ins_encode %{
13456      __ enotl($dst$$Register, $src$$Register);
13457   %}
13458   ins_pipe(ialu_reg);
13459 %}
13460 
13461 // Xor Register with Immediate
13462 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13463 %{
13464   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13465   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13466   match(Set dst (XorI dst src));
13467   effect(KILL cr);
13468   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13469 
13470   format %{ "xorl    $dst, $src\t# int" %}
13471   ins_encode %{
13472     __ xorl($dst$$Register, $src$$constant);
13473   %}
13474   ins_pipe(ialu_reg);
13475 %}
13476 
13477 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13478 %{
13479   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13480   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13481   match(Set dst (XorI src1 src2));
13482   effect(KILL cr);
13483   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13484 
13485   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13486   ins_encode %{
13487     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13488   %}
13489   ins_pipe(ialu_reg);
13490 %}
13491 
13492 // Xor Memory with Immediate
13493 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13494 %{
13495   predicate(UseAPX);
13496   match(Set dst (XorI (LoadI src1) src2));
13497   effect(KILL cr);
13498   ins_cost(150);
13499   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13500 
13501   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13502   ins_encode %{
13503     __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13504   %}
13505   ins_pipe(ialu_reg);
13506 %}
13507 
13508 // Xor Register with Memory
13509 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13510 %{
13511   predicate(!UseAPX);
13512   match(Set dst (XorI dst (LoadI src)));
13513   effect(KILL cr);
13514   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13515 
13516   ins_cost(150);
13517   format %{ "xorl    $dst, $src\t# int" %}
13518   ins_encode %{
13519     __ xorl($dst$$Register, $src$$Address);
13520   %}
13521   ins_pipe(ialu_reg_mem);
13522 %}
13523 
13524 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13525 %{
13526   predicate(UseAPX);
13527   match(Set dst (XorI src1 (LoadI src2)));
13528   effect(KILL cr);
13529   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13530 
13531   ins_cost(150);
13532   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13533   ins_encode %{
13534     __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13535   %}
13536   ins_pipe(ialu_reg_mem);
13537 %}
13538 
13539 // Xor Memory with Register
13540 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13541 %{
13542   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13543   effect(KILL cr);
13544   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13545 
13546   ins_cost(150);
13547   format %{ "xorb    $dst, $src\t# byte" %}
13548   ins_encode %{
13549     __ xorb($dst$$Address, $src$$Register);
13550   %}
13551   ins_pipe(ialu_mem_reg);
13552 %}
13553 
13554 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13555 %{
13556   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13557   effect(KILL cr);
13558   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13559 
13560   ins_cost(150);
13561   format %{ "xorl    $dst, $src\t# int" %}
13562   ins_encode %{
13563     __ xorl($dst$$Address, $src$$Register);
13564   %}
13565   ins_pipe(ialu_mem_reg);
13566 %}
13567 
13568 // Xor Memory with Immediate
13569 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13570 %{
13571   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13572   effect(KILL cr);
13573   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13574 
13575   ins_cost(125);
13576   format %{ "xorl    $dst, $src\t# int" %}
13577   ins_encode %{
13578     __ xorl($dst$$Address, $src$$constant);
13579   %}
13580   ins_pipe(ialu_mem_imm);
13581 %}
13582 
13583 
13584 // Long Logical Instructions
13585 
13586 // And Instructions
13587 // And Register with Register
13588 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13589 %{
13590   predicate(!UseAPX);
13591   match(Set dst (AndL dst src));
13592   effect(KILL cr);
13593   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13594 
13595   format %{ "andq    $dst, $src\t# long" %}
13596   ins_encode %{
13597     __ andq($dst$$Register, $src$$Register);
13598   %}
13599   ins_pipe(ialu_reg_reg);
13600 %}
13601 
13602 // And Register with Register using New Data Destination (NDD)
13603 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13604 %{
13605   predicate(UseAPX);
13606   match(Set dst (AndL src1 src2));
13607   effect(KILL cr);
13608   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13609 
13610   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13611   ins_encode %{
13612     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13613 
13614   %}
13615   ins_pipe(ialu_reg_reg);
13616 %}
13617 
13618 // And Register with Immediate 255
13619 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13620 %{
13621   match(Set dst (AndL src mask));
13622 
13623   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13624   ins_encode %{
13625     // movzbl zeroes out the upper 32-bit and does not need REX.W
13626     __ movzbl($dst$$Register, $src$$Register);
13627   %}
13628   ins_pipe(ialu_reg);
13629 %}
13630 
13631 // And Register with Immediate 65535
13632 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13633 %{
13634   match(Set dst (AndL src mask));
13635 
13636   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13637   ins_encode %{
13638     // movzwl zeroes out the upper 32-bit and does not need REX.W
13639     __ movzwl($dst$$Register, $src$$Register);
13640   %}
13641   ins_pipe(ialu_reg);
13642 %}
13643 
13644 // And Register with Immediate
13645 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13646 %{
13647   predicate(!UseAPX);
13648   match(Set dst (AndL dst src));
13649   effect(KILL cr);
13650   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13651 
13652   format %{ "andq    $dst, $src\t# long" %}
13653   ins_encode %{
13654     __ andq($dst$$Register, $src$$constant);
13655   %}
13656   ins_pipe(ialu_reg);
13657 %}
13658 
13659 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13660 %{
13661   predicate(UseAPX);
13662   match(Set dst (AndL src1 src2));
13663   effect(KILL cr);
13664   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13665 
13666   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13667   ins_encode %{
13668     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13669   %}
13670   ins_pipe(ialu_reg);
13671 %}
13672 
13673 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13674 %{
13675   predicate(UseAPX);
13676   match(Set dst (AndL (LoadL src1) src2));
13677   effect(KILL cr);
13678   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13679 
13680   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13681   ins_encode %{
13682     __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13683   %}
13684   ins_pipe(ialu_reg);
13685 %}
13686 
13687 // And Register with Memory
13688 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13689 %{
13690   predicate(!UseAPX);
13691   match(Set dst (AndL dst (LoadL src)));
13692   effect(KILL cr);
13693   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13694 
13695   ins_cost(150);
13696   format %{ "andq    $dst, $src\t# long" %}
13697   ins_encode %{
13698     __ andq($dst$$Register, $src$$Address);
13699   %}
13700   ins_pipe(ialu_reg_mem);
13701 %}
13702 
13703 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13704 %{
13705   predicate(UseAPX);
13706   match(Set dst (AndL src1 (LoadL src2)));
13707   effect(KILL cr);
13708   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13709 
13710   ins_cost(150);
13711   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13712   ins_encode %{
13713     __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13714   %}
13715   ins_pipe(ialu_reg_mem);
13716 %}
13717 
13718 // And Memory with Register
13719 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13720 %{
13721   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13722   effect(KILL cr);
13723   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13724 
13725   ins_cost(150);
13726   format %{ "andq    $dst, $src\t# long" %}
13727   ins_encode %{
13728     __ andq($dst$$Address, $src$$Register);
13729   %}
13730   ins_pipe(ialu_mem_reg);
13731 %}
13732 
13733 // And Memory with Immediate
13734 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13735 %{
13736   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13737   effect(KILL cr);
13738   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13739 
13740   ins_cost(125);
13741   format %{ "andq    $dst, $src\t# long" %}
13742   ins_encode %{
13743     __ andq($dst$$Address, $src$$constant);
13744   %}
13745   ins_pipe(ialu_mem_imm);
13746 %}
13747 
13748 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13749 %{
13750   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13751   // because AND/OR works well enough for 8/32-bit values.
13752   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13753 
13754   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13755   effect(KILL cr);
13756 
13757   ins_cost(125);
13758   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13759   ins_encode %{
13760     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13761   %}
13762   ins_pipe(ialu_mem_imm);
13763 %}
13764 
13765 // BMI1 instructions
13766 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13767   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13768   predicate(UseBMI1Instructions);
13769   effect(KILL cr);
13770   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13771 
13772   ins_cost(125);
13773   format %{ "andnq  $dst, $src1, $src2" %}
13774 
13775   ins_encode %{
13776     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13777   %}
13778   ins_pipe(ialu_reg_mem);
13779 %}
13780 
13781 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13782   match(Set dst (AndL (XorL src1 minus_1) src2));
13783   predicate(UseBMI1Instructions);
13784   effect(KILL cr);
13785   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13786 
13787   format %{ "andnq  $dst, $src1, $src2" %}
13788 
13789   ins_encode %{
13790   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13791   %}
13792   ins_pipe(ialu_reg_mem);
13793 %}
13794 
13795 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13796   match(Set dst (AndL (SubL imm_zero src) src));
13797   predicate(UseBMI1Instructions);
13798   effect(KILL cr);
13799   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13800 
13801   format %{ "blsiq  $dst, $src" %}
13802 
13803   ins_encode %{
13804     __ blsiq($dst$$Register, $src$$Register);
13805   %}
13806   ins_pipe(ialu_reg);
13807 %}
13808 
13809 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13810   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13811   predicate(UseBMI1Instructions);
13812   effect(KILL cr);
13813   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13814 
13815   ins_cost(125);
13816   format %{ "blsiq  $dst, $src" %}
13817 
13818   ins_encode %{
13819     __ blsiq($dst$$Register, $src$$Address);
13820   %}
13821   ins_pipe(ialu_reg_mem);
13822 %}
13823 
13824 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13825 %{
13826   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13827   predicate(UseBMI1Instructions);
13828   effect(KILL cr);
13829   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13830 
13831   ins_cost(125);
13832   format %{ "blsmskq $dst, $src" %}
13833 
13834   ins_encode %{
13835     __ blsmskq($dst$$Register, $src$$Address);
13836   %}
13837   ins_pipe(ialu_reg_mem);
13838 %}
13839 
13840 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13841 %{
13842   match(Set dst (XorL (AddL src minus_1) src));
13843   predicate(UseBMI1Instructions);
13844   effect(KILL cr);
13845   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13846 
13847   format %{ "blsmskq $dst, $src" %}
13848 
13849   ins_encode %{
13850     __ blsmskq($dst$$Register, $src$$Register);
13851   %}
13852 
13853   ins_pipe(ialu_reg);
13854 %}
13855 
13856 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13857 %{
13858   match(Set dst (AndL (AddL src minus_1) src) );
13859   predicate(UseBMI1Instructions);
13860   effect(KILL cr);
13861   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13862 
13863   format %{ "blsrq  $dst, $src" %}
13864 
13865   ins_encode %{
13866     __ blsrq($dst$$Register, $src$$Register);
13867   %}
13868 
13869   ins_pipe(ialu_reg);
13870 %}
13871 
13872 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13873 %{
13874   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13875   predicate(UseBMI1Instructions);
13876   effect(KILL cr);
13877   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13878 
13879   ins_cost(125);
13880   format %{ "blsrq  $dst, $src" %}
13881 
13882   ins_encode %{
13883     __ blsrq($dst$$Register, $src$$Address);
13884   %}
13885 
13886   ins_pipe(ialu_reg);
13887 %}
13888 
13889 // Or Instructions
13890 // Or Register with Register
13891 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13892 %{
13893   predicate(!UseAPX);
13894   match(Set dst (OrL dst src));
13895   effect(KILL cr);
13896   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13897 
13898   format %{ "orq     $dst, $src\t# long" %}
13899   ins_encode %{
13900     __ orq($dst$$Register, $src$$Register);
13901   %}
13902   ins_pipe(ialu_reg_reg);
13903 %}
13904 
13905 // Or Register with Register using New Data Destination (NDD)
13906 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13907 %{
13908   predicate(UseAPX);
13909   match(Set dst (OrL src1 src2));
13910   effect(KILL cr);
13911   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13912 
13913   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13914   ins_encode %{
13915     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13916 
13917   %}
13918   ins_pipe(ialu_reg_reg);
13919 %}
13920 
13921 // Use any_RegP to match R15 (TLS register) without spilling.
13922 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13923   match(Set dst (OrL dst (CastP2X src)));
13924   effect(KILL cr);
13925   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13926 
13927   format %{ "orq     $dst, $src\t# long" %}
13928   ins_encode %{
13929     __ orq($dst$$Register, $src$$Register);
13930   %}
13931   ins_pipe(ialu_reg_reg);
13932 %}
13933 
13934 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13935   match(Set dst (OrL src1 (CastP2X src2)));
13936   effect(KILL cr);
13937   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13938 
13939   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13940   ins_encode %{
13941     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13942   %}
13943   ins_pipe(ialu_reg_reg);
13944 %}
13945 
13946 // Or Register with Immediate
13947 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13948 %{
13949   predicate(!UseAPX);
13950   match(Set dst (OrL dst src));
13951   effect(KILL cr);
13952   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13953 
13954   format %{ "orq     $dst, $src\t# long" %}
13955   ins_encode %{
13956     __ orq($dst$$Register, $src$$constant);
13957   %}
13958   ins_pipe(ialu_reg);
13959 %}
13960 
13961 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13962 %{
13963   predicate(UseAPX);
13964   match(Set dst (OrL src1 src2));
13965   effect(KILL cr);
13966   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13967 
13968   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13969   ins_encode %{
13970     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13971   %}
13972   ins_pipe(ialu_reg);
13973 %}
13974 
13975 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13976 %{
13977   predicate(UseAPX);
13978   match(Set dst (OrL src1 src2));
13979   effect(KILL cr);
13980   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13981 
13982   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
13983   ins_encode %{
13984     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
13985   %}
13986   ins_pipe(ialu_reg);
13987 %}
13988 
13989 // Or Memory with Immediate
13990 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13991 %{
13992   predicate(UseAPX);
13993   match(Set dst (OrL (LoadL src1) src2));
13994   effect(KILL cr);
13995   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13996 
13997   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13998   ins_encode %{
13999     __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14000   %}
14001   ins_pipe(ialu_reg);
14002 %}
14003 
14004 // Or Register with Memory
14005 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14006 %{
14007   predicate(!UseAPX);
14008   match(Set dst (OrL dst (LoadL src)));
14009   effect(KILL cr);
14010   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14011 
14012   ins_cost(150);
14013   format %{ "orq     $dst, $src\t# long" %}
14014   ins_encode %{
14015     __ orq($dst$$Register, $src$$Address);
14016   %}
14017   ins_pipe(ialu_reg_mem);
14018 %}
14019 
14020 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14021 %{
14022   predicate(UseAPX);
14023   match(Set dst (OrL src1 (LoadL src2)));
14024   effect(KILL cr);
14025   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14026 
14027   ins_cost(150);
14028   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14029   ins_encode %{
14030     __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14031   %}
14032   ins_pipe(ialu_reg_mem);
14033 %}
14034 
14035 // Or Memory with Register
14036 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14037 %{
14038   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14039   effect(KILL cr);
14040   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14041 
14042   ins_cost(150);
14043   format %{ "orq     $dst, $src\t# long" %}
14044   ins_encode %{
14045     __ orq($dst$$Address, $src$$Register);
14046   %}
14047   ins_pipe(ialu_mem_reg);
14048 %}
14049 
14050 // Or Memory with Immediate
14051 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14052 %{
14053   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14054   effect(KILL cr);
14055   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14056 
14057   ins_cost(125);
14058   format %{ "orq     $dst, $src\t# long" %}
14059   ins_encode %{
14060     __ orq($dst$$Address, $src$$constant);
14061   %}
14062   ins_pipe(ialu_mem_imm);
14063 %}
14064 
14065 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14066 %{
14067   // con should be a pure 64-bit power of 2 immediate
14068   // because AND/OR works well enough for 8/32-bit values.
14069   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14070 
14071   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14072   effect(KILL cr);
14073 
14074   ins_cost(125);
14075   format %{ "btsq    $dst, log2($con)\t# long" %}
14076   ins_encode %{
14077     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14078   %}
14079   ins_pipe(ialu_mem_imm);
14080 %}
14081 
14082 // Xor Instructions
14083 // Xor Register with Register
14084 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14085 %{
14086   predicate(!UseAPX);
14087   match(Set dst (XorL dst src));
14088   effect(KILL cr);
14089   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14090 
14091   format %{ "xorq    $dst, $src\t# long" %}
14092   ins_encode %{
14093     __ xorq($dst$$Register, $src$$Register);
14094   %}
14095   ins_pipe(ialu_reg_reg);
14096 %}
14097 
14098 // Xor Register with Register using New Data Destination (NDD)
14099 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14100 %{
14101   predicate(UseAPX);
14102   match(Set dst (XorL src1 src2));
14103   effect(KILL cr);
14104   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14105 
14106   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14107   ins_encode %{
14108     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14109   %}
14110   ins_pipe(ialu_reg_reg);
14111 %}
14112 
14113 // Xor Register with Immediate -1
14114 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14115 %{
14116   predicate(!UseAPX);
14117   match(Set dst (XorL dst imm));
14118 
14119   format %{ "notq   $dst" %}
14120   ins_encode %{
14121      __ notq($dst$$Register);
14122   %}
14123   ins_pipe(ialu_reg);
14124 %}
14125 
14126 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14127 %{
14128   predicate(UseAPX);
14129   match(Set dst (XorL src imm));
14130   flag(PD::Flag_ndd_demotable_opr1);
14131 
14132   format %{ "enotq   $dst, $src" %}
14133   ins_encode %{
14134     __ enotq($dst$$Register, $src$$Register);
14135   %}
14136   ins_pipe(ialu_reg);
14137 %}
14138 
14139 // Xor Register with Immediate
14140 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14141 %{
14142   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14143   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14144   match(Set dst (XorL dst src));
14145   effect(KILL cr);
14146   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14147 
14148   format %{ "xorq    $dst, $src\t# long" %}
14149   ins_encode %{
14150     __ xorq($dst$$Register, $src$$constant);
14151   %}
14152   ins_pipe(ialu_reg);
14153 %}
14154 
14155 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14156 %{
14157   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14158   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14159   match(Set dst (XorL src1 src2));
14160   effect(KILL cr);
14161   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14162 
14163   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14164   ins_encode %{
14165     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14166   %}
14167   ins_pipe(ialu_reg);
14168 %}
14169 
14170 // Xor Memory with Immediate
14171 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14172 %{
14173   predicate(UseAPX);
14174   match(Set dst (XorL (LoadL src1) src2));
14175   effect(KILL cr);
14176   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14177   ins_cost(150);
14178 
14179   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14180   ins_encode %{
14181     __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14182   %}
14183   ins_pipe(ialu_reg);
14184 %}
14185 
14186 // Xor Register with Memory
14187 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14188 %{
14189   predicate(!UseAPX);
14190   match(Set dst (XorL dst (LoadL src)));
14191   effect(KILL cr);
14192   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14193 
14194   ins_cost(150);
14195   format %{ "xorq    $dst, $src\t# long" %}
14196   ins_encode %{
14197     __ xorq($dst$$Register, $src$$Address);
14198   %}
14199   ins_pipe(ialu_reg_mem);
14200 %}
14201 
14202 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14203 %{
14204   predicate(UseAPX);
14205   match(Set dst (XorL src1 (LoadL src2)));
14206   effect(KILL cr);
14207   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14208 
14209   ins_cost(150);
14210   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14211   ins_encode %{
14212     __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14213   %}
14214   ins_pipe(ialu_reg_mem);
14215 %}
14216 
14217 // Xor Memory with Register
14218 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14219 %{
14220   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14221   effect(KILL cr);
14222   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14223 
14224   ins_cost(150);
14225   format %{ "xorq    $dst, $src\t# long" %}
14226   ins_encode %{
14227     __ xorq($dst$$Address, $src$$Register);
14228   %}
14229   ins_pipe(ialu_mem_reg);
14230 %}
14231 
14232 // Xor Memory with Immediate
14233 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14234 %{
14235   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14236   effect(KILL cr);
14237   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14238 
14239   ins_cost(125);
14240   format %{ "xorq    $dst, $src\t# long" %}
14241   ins_encode %{
14242     __ xorq($dst$$Address, $src$$constant);
14243   %}
14244   ins_pipe(ialu_mem_imm);
14245 %}
14246 
14247 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14248 %{
14249   match(Set dst (CmpLTMask p q));
14250   effect(KILL cr);
14251 
14252   ins_cost(400);
14253   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
14254             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
14255             "negl    $dst" %}
14256   ins_encode %{
14257     __ cmpl($p$$Register, $q$$Register);
14258     __ setcc(Assembler::less, $dst$$Register);
14259     __ negl($dst$$Register);
14260   %}
14261   ins_pipe(pipe_slow);
14262 %}
14263 
14264 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14265 %{
14266   match(Set dst (CmpLTMask dst zero));
14267   effect(KILL cr);
14268 
14269   ins_cost(100);
14270   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
14271   ins_encode %{
14272     __ sarl($dst$$Register, 31);
14273   %}
14274   ins_pipe(ialu_reg);
14275 %}
14276 
14277 /* Better to save a register than avoid a branch */
14278 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14279 %{
14280   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14281   effect(KILL cr);
14282   ins_cost(300);
14283   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
14284             "jge     done\n\t"
14285             "addl    $p,$y\n"
14286             "done:   " %}
14287   ins_encode %{
14288     Register Rp = $p$$Register;
14289     Register Rq = $q$$Register;
14290     Register Ry = $y$$Register;
14291     Label done;
14292     __ subl(Rp, Rq);
14293     __ jccb(Assembler::greaterEqual, done);
14294     __ addl(Rp, Ry);
14295     __ bind(done);
14296   %}
14297   ins_pipe(pipe_cmplt);
14298 %}
14299 
14300 /* Better to save a register than avoid a branch */
14301 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14302 %{
14303   match(Set y (AndI (CmpLTMask p q) y));
14304   effect(KILL cr);
14305 
14306   ins_cost(300);
14307 
14308   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
14309             "jlt     done\n\t"
14310             "xorl    $y, $y\n"
14311             "done:   " %}
14312   ins_encode %{
14313     Register Rp = $p$$Register;
14314     Register Rq = $q$$Register;
14315     Register Ry = $y$$Register;
14316     Label done;
14317     __ cmpl(Rp, Rq);
14318     __ jccb(Assembler::less, done);
14319     __ xorl(Ry, Ry);
14320     __ bind(done);
14321   %}
14322   ins_pipe(pipe_cmplt);
14323 %}
14324 
14325 
14326 //---------- FP Instructions------------------------------------------------
14327 
14328 // Really expensive, avoid
14329 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14330 %{
14331   match(Set cr (CmpF src1 src2));
14332 
14333   ins_cost(500);
14334   format %{ "ucomiss $src1, $src2\n\t"
14335             "jnp,s   exit\n\t"
14336             "pushfq\t# saw NaN, set CF\n\t"
14337             "andq    [rsp], #0xffffff2b\n\t"
14338             "popfq\n"
14339     "exit:" %}
14340   ins_encode %{
14341     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14342     emit_cmpfp_fixup(masm);
14343   %}
14344   ins_pipe(pipe_slow);
14345 %}
14346 
14347 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
14348   match(Set cr (CmpF src1 src2));
14349 
14350   ins_cost(100);
14351   format %{ "ucomiss $src1, $src2" %}
14352   ins_encode %{
14353     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14354   %}
14355   ins_pipe(pipe_slow);
14356 %}
14357 
14358 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14359   match(Set cr (CmpF src1 (LoadF src2)));
14360 
14361   ins_cost(100);
14362   format %{ "ucomiss $src1, $src2" %}
14363   ins_encode %{
14364     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14365   %}
14366   ins_pipe(pipe_slow);
14367 %}
14368 
14369 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14370   match(Set cr (CmpF src con));
14371   ins_cost(100);
14372   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14373   ins_encode %{
14374     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14375   %}
14376   ins_pipe(pipe_slow);
14377 %}
14378 
14379 // Really expensive, avoid
14380 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14381 %{
14382   match(Set cr (CmpD src1 src2));
14383 
14384   ins_cost(500);
14385   format %{ "ucomisd $src1, $src2\n\t"
14386             "jnp,s   exit\n\t"
14387             "pushfq\t# saw NaN, set CF\n\t"
14388             "andq    [rsp], #0xffffff2b\n\t"
14389             "popfq\n"
14390     "exit:" %}
14391   ins_encode %{
14392     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14393     emit_cmpfp_fixup(masm);
14394   %}
14395   ins_pipe(pipe_slow);
14396 %}
14397 
14398 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
14399   match(Set cr (CmpD src1 src2));
14400 
14401   ins_cost(100);
14402   format %{ "ucomisd $src1, $src2 test" %}
14403   ins_encode %{
14404     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14405   %}
14406   ins_pipe(pipe_slow);
14407 %}
14408 
14409 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14410   match(Set cr (CmpD src1 (LoadD src2)));
14411 
14412   ins_cost(100);
14413   format %{ "ucomisd $src1, $src2" %}
14414   ins_encode %{
14415     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14416   %}
14417   ins_pipe(pipe_slow);
14418 %}
14419 
14420 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14421   match(Set cr (CmpD src con));
14422   ins_cost(100);
14423   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14424   ins_encode %{
14425     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14426   %}
14427   ins_pipe(pipe_slow);
14428 %}
14429 
14430 // Compare into -1,0,1
14431 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14432 %{
14433   match(Set dst (CmpF3 src1 src2));
14434   effect(KILL cr);
14435 
14436   ins_cost(275);
14437   format %{ "ucomiss $src1, $src2\n\t"
14438             "movl    $dst, #-1\n\t"
14439             "jp,s    done\n\t"
14440             "jb,s    done\n\t"
14441             "setne   $dst\n\t"
14442             "movzbl  $dst, $dst\n"
14443     "done:" %}
14444   ins_encode %{
14445     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14446     emit_cmpfp3(masm, $dst$$Register);
14447   %}
14448   ins_pipe(pipe_slow);
14449 %}
14450 
14451 // Compare into -1,0,1
14452 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14453 %{
14454   match(Set dst (CmpF3 src1 (LoadF src2)));
14455   effect(KILL cr);
14456 
14457   ins_cost(275);
14458   format %{ "ucomiss $src1, $src2\n\t"
14459             "movl    $dst, #-1\n\t"
14460             "jp,s    done\n\t"
14461             "jb,s    done\n\t"
14462             "setne   $dst\n\t"
14463             "movzbl  $dst, $dst\n"
14464     "done:" %}
14465   ins_encode %{
14466     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14467     emit_cmpfp3(masm, $dst$$Register);
14468   %}
14469   ins_pipe(pipe_slow);
14470 %}
14471 
14472 // Compare into -1,0,1
14473 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14474   match(Set dst (CmpF3 src con));
14475   effect(KILL cr);
14476 
14477   ins_cost(275);
14478   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14479             "movl    $dst, #-1\n\t"
14480             "jp,s    done\n\t"
14481             "jb,s    done\n\t"
14482             "setne   $dst\n\t"
14483             "movzbl  $dst, $dst\n"
14484     "done:" %}
14485   ins_encode %{
14486     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14487     emit_cmpfp3(masm, $dst$$Register);
14488   %}
14489   ins_pipe(pipe_slow);
14490 %}
14491 
14492 // Compare into -1,0,1
14493 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14494 %{
14495   match(Set dst (CmpD3 src1 src2));
14496   effect(KILL cr);
14497 
14498   ins_cost(275);
14499   format %{ "ucomisd $src1, $src2\n\t"
14500             "movl    $dst, #-1\n\t"
14501             "jp,s    done\n\t"
14502             "jb,s    done\n\t"
14503             "setne   $dst\n\t"
14504             "movzbl  $dst, $dst\n"
14505     "done:" %}
14506   ins_encode %{
14507     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14508     emit_cmpfp3(masm, $dst$$Register);
14509   %}
14510   ins_pipe(pipe_slow);
14511 %}
14512 
14513 // Compare into -1,0,1
14514 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14515 %{
14516   match(Set dst (CmpD3 src1 (LoadD src2)));
14517   effect(KILL cr);
14518 
14519   ins_cost(275);
14520   format %{ "ucomisd $src1, $src2\n\t"
14521             "movl    $dst, #-1\n\t"
14522             "jp,s    done\n\t"
14523             "jb,s    done\n\t"
14524             "setne   $dst\n\t"
14525             "movzbl  $dst, $dst\n"
14526     "done:" %}
14527   ins_encode %{
14528     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14529     emit_cmpfp3(masm, $dst$$Register);
14530   %}
14531   ins_pipe(pipe_slow);
14532 %}
14533 
14534 // Compare into -1,0,1
14535 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14536   match(Set dst (CmpD3 src con));
14537   effect(KILL cr);
14538 
14539   ins_cost(275);
14540   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14541             "movl    $dst, #-1\n\t"
14542             "jp,s    done\n\t"
14543             "jb,s    done\n\t"
14544             "setne   $dst\n\t"
14545             "movzbl  $dst, $dst\n"
14546     "done:" %}
14547   ins_encode %{
14548     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14549     emit_cmpfp3(masm, $dst$$Register);
14550   %}
14551   ins_pipe(pipe_slow);
14552 %}
14553 
14554 //----------Arithmetic Conversion Instructions---------------------------------
14555 
14556 instruct convF2D_reg_reg(regD dst, regF src)
14557 %{
14558   match(Set dst (ConvF2D src));
14559 
14560   format %{ "cvtss2sd $dst, $src" %}
14561   ins_encode %{
14562     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14563   %}
14564   ins_pipe(pipe_slow); // XXX
14565 %}
14566 
14567 instruct convF2D_reg_mem(regD dst, memory src)
14568 %{
14569   predicate(UseAVX == 0);
14570   match(Set dst (ConvF2D (LoadF src)));
14571 
14572   format %{ "cvtss2sd $dst, $src" %}
14573   ins_encode %{
14574     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14575   %}
14576   ins_pipe(pipe_slow); // XXX
14577 %}
14578 
14579 instruct convD2F_reg_reg(regF dst, regD src)
14580 %{
14581   match(Set dst (ConvD2F src));
14582 
14583   format %{ "cvtsd2ss $dst, $src" %}
14584   ins_encode %{
14585     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14586   %}
14587   ins_pipe(pipe_slow); // XXX
14588 %}
14589 
14590 instruct convD2F_reg_mem(regF dst, memory src)
14591 %{
14592   predicate(UseAVX == 0);
14593   match(Set dst (ConvD2F (LoadD src)));
14594 
14595   format %{ "cvtsd2ss $dst, $src" %}
14596   ins_encode %{
14597     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14598   %}
14599   ins_pipe(pipe_slow); // XXX
14600 %}
14601 
14602 // XXX do mem variants
14603 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14604 %{
14605   predicate(!VM_Version::supports_avx10_2());
14606   match(Set dst (ConvF2I src));
14607   effect(KILL cr);
14608   format %{ "convert_f2i $dst, $src" %}
14609   ins_encode %{
14610     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14611   %}
14612   ins_pipe(pipe_slow);
14613 %}
14614 
14615 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14616 %{
14617   predicate(VM_Version::supports_avx10_2());
14618   match(Set dst (ConvF2I src));
14619   format %{ "evcvttss2sisl $dst, $src" %}
14620   ins_encode %{
14621     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14622   %}
14623   ins_pipe(pipe_slow);
14624 %}
14625 
14626 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14627 %{
14628   predicate(VM_Version::supports_avx10_2());
14629   match(Set dst (ConvF2I (LoadF src)));
14630   format %{ "evcvttss2sisl $dst, $src" %}
14631   ins_encode %{
14632     __ evcvttss2sisl($dst$$Register, $src$$Address);
14633   %}
14634   ins_pipe(pipe_slow);
14635 %}
14636 
14637 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14638 %{
14639   predicate(!VM_Version::supports_avx10_2());
14640   match(Set dst (ConvF2L src));
14641   effect(KILL cr);
14642   format %{ "convert_f2l $dst, $src"%}
14643   ins_encode %{
14644     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14645   %}
14646   ins_pipe(pipe_slow);
14647 %}
14648 
14649 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14650 %{
14651   predicate(VM_Version::supports_avx10_2());
14652   match(Set dst (ConvF2L src));
14653   format %{ "evcvttss2sisq $dst, $src" %}
14654   ins_encode %{
14655     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14656   %}
14657   ins_pipe(pipe_slow);
14658 %}
14659 
14660 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14661 %{
14662   predicate(VM_Version::supports_avx10_2());
14663   match(Set dst (ConvF2L (LoadF src)));
14664   format %{ "evcvttss2sisq $dst, $src" %}
14665   ins_encode %{
14666     __ evcvttss2sisq($dst$$Register, $src$$Address);
14667   %}
14668   ins_pipe(pipe_slow);
14669 %}
14670 
14671 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14672 %{
14673   predicate(!VM_Version::supports_avx10_2());
14674   match(Set dst (ConvD2I src));
14675   effect(KILL cr);
14676   format %{ "convert_d2i $dst, $src"%}
14677   ins_encode %{
14678     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14679   %}
14680   ins_pipe(pipe_slow);
14681 %}
14682 
14683 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14684 %{
14685   predicate(VM_Version::supports_avx10_2());
14686   match(Set dst (ConvD2I src));
14687   format %{ "evcvttsd2sisl $dst, $src" %}
14688   ins_encode %{
14689     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14690   %}
14691   ins_pipe(pipe_slow);
14692 %}
14693 
14694 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14695 %{
14696   predicate(VM_Version::supports_avx10_2());
14697   match(Set dst (ConvD2I (LoadD src)));
14698   format %{ "evcvttsd2sisl $dst, $src" %}
14699   ins_encode %{
14700     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14701   %}
14702   ins_pipe(pipe_slow);
14703 %}
14704 
14705 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14706 %{
14707   predicate(!VM_Version::supports_avx10_2());
14708   match(Set dst (ConvD2L src));
14709   effect(KILL cr);
14710   format %{ "convert_d2l $dst, $src"%}
14711   ins_encode %{
14712     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14713   %}
14714   ins_pipe(pipe_slow);
14715 %}
14716 
14717 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14718 %{
14719   predicate(VM_Version::supports_avx10_2());
14720   match(Set dst (ConvD2L src));
14721   format %{ "evcvttsd2sisq $dst, $src" %}
14722   ins_encode %{
14723     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14724   %}
14725   ins_pipe(pipe_slow);
14726 %}
14727 
14728 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14729 %{
14730   predicate(VM_Version::supports_avx10_2());
14731   match(Set dst (ConvD2L (LoadD src)));
14732   format %{ "evcvttsd2sisq $dst, $src" %}
14733   ins_encode %{
14734     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14735   %}
14736   ins_pipe(pipe_slow);
14737 %}
14738 
14739 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14740 %{
14741   match(Set dst (RoundD src));
14742   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14743   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14744   ins_encode %{
14745     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14746   %}
14747   ins_pipe(pipe_slow);
14748 %}
14749 
14750 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14751 %{
14752   match(Set dst (RoundF src));
14753   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14754   format %{ "round_float $dst,$src" %}
14755   ins_encode %{
14756     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14757   %}
14758   ins_pipe(pipe_slow);
14759 %}
14760 
14761 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14762 %{
14763   predicate(!UseXmmI2F);
14764   match(Set dst (ConvI2F src));
14765 
14766   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14767   ins_encode %{
14768     if (UseAVX > 0) {
14769       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14770     }
14771     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14772   %}
14773   ins_pipe(pipe_slow); // XXX
14774 %}
14775 
14776 instruct convI2F_reg_mem(regF dst, memory src)
14777 %{
14778   predicate(UseAVX == 0);
14779   match(Set dst (ConvI2F (LoadI src)));
14780 
14781   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14782   ins_encode %{
14783     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14784   %}
14785   ins_pipe(pipe_slow); // XXX
14786 %}
14787 
14788 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14789 %{
14790   predicate(!UseXmmI2D);
14791   match(Set dst (ConvI2D src));
14792 
14793   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14794   ins_encode %{
14795     if (UseAVX > 0) {
14796       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14797     }
14798     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14799   %}
14800   ins_pipe(pipe_slow); // XXX
14801 %}
14802 
14803 instruct convI2D_reg_mem(regD dst, memory src)
14804 %{
14805   predicate(UseAVX == 0);
14806   match(Set dst (ConvI2D (LoadI src)));
14807 
14808   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14809   ins_encode %{
14810     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14811   %}
14812   ins_pipe(pipe_slow); // XXX
14813 %}
14814 
14815 instruct convXI2F_reg(regF dst, rRegI src)
14816 %{
14817   predicate(UseXmmI2F);
14818   match(Set dst (ConvI2F src));
14819 
14820   format %{ "movdl $dst, $src\n\t"
14821             "cvtdq2psl $dst, $dst\t# i2f" %}
14822   ins_encode %{
14823     __ movdl($dst$$XMMRegister, $src$$Register);
14824     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14825   %}
14826   ins_pipe(pipe_slow); // XXX
14827 %}
14828 
14829 instruct convXI2D_reg(regD dst, rRegI src)
14830 %{
14831   predicate(UseXmmI2D);
14832   match(Set dst (ConvI2D src));
14833 
14834   format %{ "movdl $dst, $src\n\t"
14835             "cvtdq2pdl $dst, $dst\t# i2d" %}
14836   ins_encode %{
14837     __ movdl($dst$$XMMRegister, $src$$Register);
14838     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14839   %}
14840   ins_pipe(pipe_slow); // XXX
14841 %}
14842 
14843 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14844 %{
14845   match(Set dst (ConvL2F src));
14846 
14847   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14848   ins_encode %{
14849     if (UseAVX > 0) {
14850       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14851     }
14852     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14853   %}
14854   ins_pipe(pipe_slow); // XXX
14855 %}
14856 
14857 instruct convL2F_reg_mem(regF dst, memory src)
14858 %{
14859   predicate(UseAVX == 0);
14860   match(Set dst (ConvL2F (LoadL src)));
14861 
14862   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14863   ins_encode %{
14864     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14865   %}
14866   ins_pipe(pipe_slow); // XXX
14867 %}
14868 
14869 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14870 %{
14871   match(Set dst (ConvL2D src));
14872 
14873   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14874   ins_encode %{
14875     if (UseAVX > 0) {
14876       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14877     }
14878     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14879   %}
14880   ins_pipe(pipe_slow); // XXX
14881 %}
14882 
14883 instruct convL2D_reg_mem(regD dst, memory src)
14884 %{
14885   predicate(UseAVX == 0);
14886   match(Set dst (ConvL2D (LoadL src)));
14887 
14888   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14889   ins_encode %{
14890     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14891   %}
14892   ins_pipe(pipe_slow); // XXX
14893 %}
14894 
14895 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14896 %{
14897   match(Set dst (ConvI2L src));
14898 
14899   ins_cost(125);
14900   format %{ "movslq  $dst, $src\t# i2l" %}
14901   ins_encode %{
14902     __ movslq($dst$$Register, $src$$Register);
14903   %}
14904   ins_pipe(ialu_reg_reg);
14905 %}
14906 
14907 // Zero-extend convert int to long
14908 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14909 %{
14910   match(Set dst (AndL (ConvI2L src) mask));
14911 
14912   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14913   ins_encode %{
14914     if ($dst$$reg != $src$$reg) {
14915       __ movl($dst$$Register, $src$$Register);
14916     }
14917   %}
14918   ins_pipe(ialu_reg_reg);
14919 %}
14920 
14921 // Zero-extend convert int to long
14922 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14923 %{
14924   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14925 
14926   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14927   ins_encode %{
14928     __ movl($dst$$Register, $src$$Address);
14929   %}
14930   ins_pipe(ialu_reg_mem);
14931 %}
14932 
14933 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14934 %{
14935   match(Set dst (AndL src mask));
14936 
14937   format %{ "movl    $dst, $src\t# zero-extend long" %}
14938   ins_encode %{
14939     __ movl($dst$$Register, $src$$Register);
14940   %}
14941   ins_pipe(ialu_reg_reg);
14942 %}
14943 
14944 instruct convL2I_reg_reg(rRegI dst, rRegL src)
14945 %{
14946   match(Set dst (ConvL2I src));
14947 
14948   format %{ "movl    $dst, $src\t# l2i" %}
14949   ins_encode %{
14950     __ movl($dst$$Register, $src$$Register);
14951   %}
14952   ins_pipe(ialu_reg_reg);
14953 %}
14954 
14955 
14956 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
14957   match(Set dst (MoveF2I src));
14958   effect(DEF dst, USE src);
14959 
14960   ins_cost(125);
14961   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
14962   ins_encode %{
14963     __ movl($dst$$Register, Address(rsp, $src$$disp));
14964   %}
14965   ins_pipe(ialu_reg_mem);
14966 %}
14967 
14968 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
14969   match(Set dst (MoveI2F src));
14970   effect(DEF dst, USE src);
14971 
14972   ins_cost(125);
14973   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
14974   ins_encode %{
14975     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
14976   %}
14977   ins_pipe(pipe_slow);
14978 %}
14979 
14980 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
14981   match(Set dst (MoveD2L src));
14982   effect(DEF dst, USE src);
14983 
14984   ins_cost(125);
14985   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
14986   ins_encode %{
14987     __ movq($dst$$Register, Address(rsp, $src$$disp));
14988   %}
14989   ins_pipe(ialu_reg_mem);
14990 %}
14991 
14992 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
14993   predicate(!UseXmmLoadAndClearUpper);
14994   match(Set dst (MoveL2D src));
14995   effect(DEF dst, USE src);
14996 
14997   ins_cost(125);
14998   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
14999   ins_encode %{
15000     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15001   %}
15002   ins_pipe(pipe_slow);
15003 %}
15004 
15005 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15006   predicate(UseXmmLoadAndClearUpper);
15007   match(Set dst (MoveL2D src));
15008   effect(DEF dst, USE src);
15009 
15010   ins_cost(125);
15011   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
15012   ins_encode %{
15013     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15014   %}
15015   ins_pipe(pipe_slow);
15016 %}
15017 
15018 
15019 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15020   match(Set dst (MoveF2I src));
15021   effect(DEF dst, USE src);
15022 
15023   ins_cost(95); // XXX
15024   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
15025   ins_encode %{
15026     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15027   %}
15028   ins_pipe(pipe_slow);
15029 %}
15030 
15031 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15032   match(Set dst (MoveI2F src));
15033   effect(DEF dst, USE src);
15034 
15035   ins_cost(100);
15036   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
15037   ins_encode %{
15038     __ movl(Address(rsp, $dst$$disp), $src$$Register);
15039   %}
15040   ins_pipe( ialu_mem_reg );
15041 %}
15042 
15043 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15044   match(Set dst (MoveD2L src));
15045   effect(DEF dst, USE src);
15046 
15047   ins_cost(95); // XXX
15048   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
15049   ins_encode %{
15050     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15051   %}
15052   ins_pipe(pipe_slow);
15053 %}
15054 
15055 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15056   match(Set dst (MoveL2D src));
15057   effect(DEF dst, USE src);
15058 
15059   ins_cost(100);
15060   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
15061   ins_encode %{
15062     __ movq(Address(rsp, $dst$$disp), $src$$Register);
15063   %}
15064   ins_pipe(ialu_mem_reg);
15065 %}
15066 
15067 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15068   match(Set dst (MoveF2I src));
15069   effect(DEF dst, USE src);
15070   ins_cost(85);
15071   format %{ "movd    $dst,$src\t# MoveF2I" %}
15072   ins_encode %{
15073     __ movdl($dst$$Register, $src$$XMMRegister);
15074   %}
15075   ins_pipe( pipe_slow );
15076 %}
15077 
15078 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15079   match(Set dst (MoveD2L src));
15080   effect(DEF dst, USE src);
15081   ins_cost(85);
15082   format %{ "movd    $dst,$src\t# MoveD2L" %}
15083   ins_encode %{
15084     __ movdq($dst$$Register, $src$$XMMRegister);
15085   %}
15086   ins_pipe( pipe_slow );
15087 %}
15088 
15089 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15090   match(Set dst (MoveI2F src));
15091   effect(DEF dst, USE src);
15092   ins_cost(100);
15093   format %{ "movd    $dst,$src\t# MoveI2F" %}
15094   ins_encode %{
15095     __ movdl($dst$$XMMRegister, $src$$Register);
15096   %}
15097   ins_pipe( pipe_slow );
15098 %}
15099 
15100 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15101   match(Set dst (MoveL2D src));
15102   effect(DEF dst, USE src);
15103   ins_cost(100);
15104   format %{ "movd    $dst,$src\t# MoveL2D" %}
15105   ins_encode %{
15106      __ movdq($dst$$XMMRegister, $src$$Register);
15107   %}
15108   ins_pipe( pipe_slow );
15109 %}
15110 
15111 // Fast clearing of an array
15112 // Small non-constant lenght ClearArray for non-AVX512 targets.
15113 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15114                   Universe dummy, rFlagsReg cr)
15115 %{
15116   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15117   match(Set dummy (ClearArray cnt base));
15118   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15119 
15120   format %{ $$template
15121     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15122     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15123     $$emit$$"jg      LARGE\n\t"
15124     $$emit$$"dec     rcx\n\t"
15125     $$emit$$"js      DONE\t# Zero length\n\t"
15126     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15127     $$emit$$"dec     rcx\n\t"
15128     $$emit$$"jge     LOOP\n\t"
15129     $$emit$$"jmp     DONE\n\t"
15130     $$emit$$"# LARGE:\n\t"
15131     if (UseFastStosb) {
15132        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15133        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15134     } else if (UseXMMForObjInit) {
15135        $$emit$$"mov     rdi,rax\n\t"
15136        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15137        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15138        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15139        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15140        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15141        $$emit$$"add     0x40,rax\n\t"
15142        $$emit$$"# L_zero_64_bytes:\n\t"
15143        $$emit$$"sub     0x8,rcx\n\t"
15144        $$emit$$"jge     L_loop\n\t"
15145        $$emit$$"add     0x4,rcx\n\t"
15146        $$emit$$"jl      L_tail\n\t"
15147        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15148        $$emit$$"add     0x20,rax\n\t"
15149        $$emit$$"sub     0x4,rcx\n\t"
15150        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15151        $$emit$$"add     0x4,rcx\n\t"
15152        $$emit$$"jle     L_end\n\t"
15153        $$emit$$"dec     rcx\n\t"
15154        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15155        $$emit$$"vmovq   xmm0,(rax)\n\t"
15156        $$emit$$"add     0x8,rax\n\t"
15157        $$emit$$"dec     rcx\n\t"
15158        $$emit$$"jge     L_sloop\n\t"
15159        $$emit$$"# L_end:\n\t"
15160     } else {
15161        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15162     }
15163     $$emit$$"# DONE"
15164   %}
15165   ins_encode %{
15166     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15167                  $tmp$$XMMRegister, false, knoreg);
15168   %}
15169   ins_pipe(pipe_slow);
15170 %}
15171 
15172 // Small non-constant length ClearArray for AVX512 targets.
15173 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15174                        Universe dummy, rFlagsReg cr)
15175 %{
15176   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15177   match(Set dummy (ClearArray cnt base));
15178   ins_cost(125);
15179   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15180 
15181   format %{ $$template
15182     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15183     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15184     $$emit$$"jg      LARGE\n\t"
15185     $$emit$$"dec     rcx\n\t"
15186     $$emit$$"js      DONE\t# Zero length\n\t"
15187     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15188     $$emit$$"dec     rcx\n\t"
15189     $$emit$$"jge     LOOP\n\t"
15190     $$emit$$"jmp     DONE\n\t"
15191     $$emit$$"# LARGE:\n\t"
15192     if (UseFastStosb) {
15193        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15194        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15195     } else if (UseXMMForObjInit) {
15196        $$emit$$"mov     rdi,rax\n\t"
15197        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15198        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15199        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15200        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15201        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15202        $$emit$$"add     0x40,rax\n\t"
15203        $$emit$$"# L_zero_64_bytes:\n\t"
15204        $$emit$$"sub     0x8,rcx\n\t"
15205        $$emit$$"jge     L_loop\n\t"
15206        $$emit$$"add     0x4,rcx\n\t"
15207        $$emit$$"jl      L_tail\n\t"
15208        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15209        $$emit$$"add     0x20,rax\n\t"
15210        $$emit$$"sub     0x4,rcx\n\t"
15211        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15212        $$emit$$"add     0x4,rcx\n\t"
15213        $$emit$$"jle     L_end\n\t"
15214        $$emit$$"dec     rcx\n\t"
15215        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15216        $$emit$$"vmovq   xmm0,(rax)\n\t"
15217        $$emit$$"add     0x8,rax\n\t"
15218        $$emit$$"dec     rcx\n\t"
15219        $$emit$$"jge     L_sloop\n\t"
15220        $$emit$$"# L_end:\n\t"
15221     } else {
15222        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15223     }
15224     $$emit$$"# DONE"
15225   %}
15226   ins_encode %{
15227     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15228                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15229   %}
15230   ins_pipe(pipe_slow);
15231 %}
15232 
15233 // Large non-constant length ClearArray for non-AVX512 targets.
15234 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15235                         Universe dummy, rFlagsReg cr)
15236 %{
15237   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15238   match(Set dummy (ClearArray cnt base));
15239   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15240 
15241   format %{ $$template
15242     if (UseFastStosb) {
15243        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15244        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15245        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15246     } else if (UseXMMForObjInit) {
15247        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15248        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15249        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15250        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15251        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15252        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15253        $$emit$$"add     0x40,rax\n\t"
15254        $$emit$$"# L_zero_64_bytes:\n\t"
15255        $$emit$$"sub     0x8,rcx\n\t"
15256        $$emit$$"jge     L_loop\n\t"
15257        $$emit$$"add     0x4,rcx\n\t"
15258        $$emit$$"jl      L_tail\n\t"
15259        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15260        $$emit$$"add     0x20,rax\n\t"
15261        $$emit$$"sub     0x4,rcx\n\t"
15262        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15263        $$emit$$"add     0x4,rcx\n\t"
15264        $$emit$$"jle     L_end\n\t"
15265        $$emit$$"dec     rcx\n\t"
15266        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15267        $$emit$$"vmovq   xmm0,(rax)\n\t"
15268        $$emit$$"add     0x8,rax\n\t"
15269        $$emit$$"dec     rcx\n\t"
15270        $$emit$$"jge     L_sloop\n\t"
15271        $$emit$$"# L_end:\n\t"
15272     } else {
15273        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15274        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15275     }
15276   %}
15277   ins_encode %{
15278     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15279                  $tmp$$XMMRegister, true, knoreg);
15280   %}
15281   ins_pipe(pipe_slow);
15282 %}
15283 
15284 // Large non-constant length ClearArray for AVX512 targets.
15285 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15286                              Universe dummy, rFlagsReg cr)
15287 %{
15288   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15289   match(Set dummy (ClearArray cnt base));
15290   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15291 
15292   format %{ $$template
15293     if (UseFastStosb) {
15294        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15295        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15296        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15297     } else if (UseXMMForObjInit) {
15298        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15299        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15300        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15301        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15302        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15303        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15304        $$emit$$"add     0x40,rax\n\t"
15305        $$emit$$"# L_zero_64_bytes:\n\t"
15306        $$emit$$"sub     0x8,rcx\n\t"
15307        $$emit$$"jge     L_loop\n\t"
15308        $$emit$$"add     0x4,rcx\n\t"
15309        $$emit$$"jl      L_tail\n\t"
15310        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15311        $$emit$$"add     0x20,rax\n\t"
15312        $$emit$$"sub     0x4,rcx\n\t"
15313        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15314        $$emit$$"add     0x4,rcx\n\t"
15315        $$emit$$"jle     L_end\n\t"
15316        $$emit$$"dec     rcx\n\t"
15317        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15318        $$emit$$"vmovq   xmm0,(rax)\n\t"
15319        $$emit$$"add     0x8,rax\n\t"
15320        $$emit$$"dec     rcx\n\t"
15321        $$emit$$"jge     L_sloop\n\t"
15322        $$emit$$"# L_end:\n\t"
15323     } else {
15324        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15325        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15326     }
15327   %}
15328   ins_encode %{
15329     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15330                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15331   %}
15332   ins_pipe(pipe_slow);
15333 %}
15334 
15335 // Small constant length ClearArray for AVX512 targets.
15336 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15337 %{
15338   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15339   match(Set dummy (ClearArray cnt base));
15340   ins_cost(100);
15341   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15342   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15343   ins_encode %{
15344    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15345   %}
15346   ins_pipe(pipe_slow);
15347 %}
15348 
15349 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15350                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15351 %{
15352   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15353   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15354   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15355 
15356   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15357   ins_encode %{
15358     __ string_compare($str1$$Register, $str2$$Register,
15359                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15360                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15361   %}
15362   ins_pipe( pipe_slow );
15363 %}
15364 
15365 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15366                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15367 %{
15368   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15369   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15370   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15371 
15372   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15373   ins_encode %{
15374     __ string_compare($str1$$Register, $str2$$Register,
15375                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15376                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15377   %}
15378   ins_pipe( pipe_slow );
15379 %}
15380 
15381 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15382                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15383 %{
15384   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15385   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15386   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15387 
15388   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15389   ins_encode %{
15390     __ string_compare($str1$$Register, $str2$$Register,
15391                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15392                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15393   %}
15394   ins_pipe( pipe_slow );
15395 %}
15396 
15397 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15398                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15399 %{
15400   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15401   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15402   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15403 
15404   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15405   ins_encode %{
15406     __ string_compare($str1$$Register, $str2$$Register,
15407                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15408                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15409   %}
15410   ins_pipe( pipe_slow );
15411 %}
15412 
15413 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15414                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15415 %{
15416   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15417   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15418   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15419 
15420   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15421   ins_encode %{
15422     __ string_compare($str1$$Register, $str2$$Register,
15423                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15424                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15425   %}
15426   ins_pipe( pipe_slow );
15427 %}
15428 
15429 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15430                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15431 %{
15432   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15433   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15434   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15435 
15436   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15437   ins_encode %{
15438     __ string_compare($str1$$Register, $str2$$Register,
15439                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15440                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15441   %}
15442   ins_pipe( pipe_slow );
15443 %}
15444 
15445 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15446                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15447 %{
15448   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15449   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15450   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15451 
15452   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15453   ins_encode %{
15454     __ string_compare($str2$$Register, $str1$$Register,
15455                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15456                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15457   %}
15458   ins_pipe( pipe_slow );
15459 %}
15460 
15461 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15462                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15463 %{
15464   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15465   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15466   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15467 
15468   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15469   ins_encode %{
15470     __ string_compare($str2$$Register, $str1$$Register,
15471                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15472                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15473   %}
15474   ins_pipe( pipe_slow );
15475 %}
15476 
15477 // fast search of substring with known size.
15478 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15479                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15480 %{
15481   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15482   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15483   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15484 
15485   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15486   ins_encode %{
15487     int icnt2 = (int)$int_cnt2$$constant;
15488     if (icnt2 >= 16) {
15489       // IndexOf for constant substrings with size >= 16 elements
15490       // which don't need to be loaded through stack.
15491       __ string_indexofC8($str1$$Register, $str2$$Register,
15492                           $cnt1$$Register, $cnt2$$Register,
15493                           icnt2, $result$$Register,
15494                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15495     } else {
15496       // Small strings are loaded through stack if they cross page boundary.
15497       __ string_indexof($str1$$Register, $str2$$Register,
15498                         $cnt1$$Register, $cnt2$$Register,
15499                         icnt2, $result$$Register,
15500                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15501     }
15502   %}
15503   ins_pipe( pipe_slow );
15504 %}
15505 
15506 // fast search of substring with known size.
15507 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15508                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15509 %{
15510   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15511   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15512   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15513 
15514   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15515   ins_encode %{
15516     int icnt2 = (int)$int_cnt2$$constant;
15517     if (icnt2 >= 8) {
15518       // IndexOf for constant substrings with size >= 8 elements
15519       // which don't need to be loaded through stack.
15520       __ string_indexofC8($str1$$Register, $str2$$Register,
15521                           $cnt1$$Register, $cnt2$$Register,
15522                           icnt2, $result$$Register,
15523                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15524     } else {
15525       // Small strings are loaded through stack if they cross page boundary.
15526       __ string_indexof($str1$$Register, $str2$$Register,
15527                         $cnt1$$Register, $cnt2$$Register,
15528                         icnt2, $result$$Register,
15529                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15530     }
15531   %}
15532   ins_pipe( pipe_slow );
15533 %}
15534 
15535 // fast search of substring with known size.
15536 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15537                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15538 %{
15539   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15540   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15541   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15542 
15543   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15544   ins_encode %{
15545     int icnt2 = (int)$int_cnt2$$constant;
15546     if (icnt2 >= 8) {
15547       // IndexOf for constant substrings with size >= 8 elements
15548       // which don't need to be loaded through stack.
15549       __ string_indexofC8($str1$$Register, $str2$$Register,
15550                           $cnt1$$Register, $cnt2$$Register,
15551                           icnt2, $result$$Register,
15552                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15553     } else {
15554       // Small strings are loaded through stack if they cross page boundary.
15555       __ string_indexof($str1$$Register, $str2$$Register,
15556                         $cnt1$$Register, $cnt2$$Register,
15557                         icnt2, $result$$Register,
15558                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15559     }
15560   %}
15561   ins_pipe( pipe_slow );
15562 %}
15563 
15564 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15565                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15566 %{
15567   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15568   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15569   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15570 
15571   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15572   ins_encode %{
15573     __ string_indexof($str1$$Register, $str2$$Register,
15574                       $cnt1$$Register, $cnt2$$Register,
15575                       (-1), $result$$Register,
15576                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15577   %}
15578   ins_pipe( pipe_slow );
15579 %}
15580 
15581 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15582                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15583 %{
15584   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15585   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15586   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15587 
15588   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15589   ins_encode %{
15590     __ string_indexof($str1$$Register, $str2$$Register,
15591                       $cnt1$$Register, $cnt2$$Register,
15592                       (-1), $result$$Register,
15593                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15594   %}
15595   ins_pipe( pipe_slow );
15596 %}
15597 
15598 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15599                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15600 %{
15601   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15602   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15603   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15604 
15605   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15606   ins_encode %{
15607     __ string_indexof($str1$$Register, $str2$$Register,
15608                       $cnt1$$Register, $cnt2$$Register,
15609                       (-1), $result$$Register,
15610                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15611   %}
15612   ins_pipe( pipe_slow );
15613 %}
15614 
15615 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15616                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15617 %{
15618   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15619   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15620   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15621   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15622   ins_encode %{
15623     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15624                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15625   %}
15626   ins_pipe( pipe_slow );
15627 %}
15628 
15629 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15630                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15631 %{
15632   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15633   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15634   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15635   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15636   ins_encode %{
15637     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15638                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15639   %}
15640   ins_pipe( pipe_slow );
15641 %}
15642 
15643 // fast string equals
15644 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15645                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15646 %{
15647   predicate(!VM_Version::supports_avx512vlbw());
15648   match(Set result (StrEquals (Binary str1 str2) cnt));
15649   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15650 
15651   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15652   ins_encode %{
15653     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15654                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15655                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15656   %}
15657   ins_pipe( pipe_slow );
15658 %}
15659 
15660 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15661                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15662 %{
15663   predicate(VM_Version::supports_avx512vlbw());
15664   match(Set result (StrEquals (Binary str1 str2) cnt));
15665   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15666 
15667   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15668   ins_encode %{
15669     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15670                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15671                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15672   %}
15673   ins_pipe( pipe_slow );
15674 %}
15675 
15676 // fast array equals
15677 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15678                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15679 %{
15680   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15681   match(Set result (AryEq ary1 ary2));
15682   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15683 
15684   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15685   ins_encode %{
15686     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15687                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15688                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15689   %}
15690   ins_pipe( pipe_slow );
15691 %}
15692 
15693 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15694                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15695 %{
15696   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15697   match(Set result (AryEq ary1 ary2));
15698   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15699 
15700   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15701   ins_encode %{
15702     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15703                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15704                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15705   %}
15706   ins_pipe( pipe_slow );
15707 %}
15708 
15709 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15710                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15711 %{
15712   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15713   match(Set result (AryEq ary1 ary2));
15714   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15715 
15716   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15717   ins_encode %{
15718     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15719                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15720                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15721   %}
15722   ins_pipe( pipe_slow );
15723 %}
15724 
15725 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15726                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15727 %{
15728   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15729   match(Set result (AryEq ary1 ary2));
15730   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15731 
15732   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15733   ins_encode %{
15734     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15735                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15736                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15737   %}
15738   ins_pipe( pipe_slow );
15739 %}
15740 
15741 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15742                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15743                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15744                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15745                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15746 %{
15747   predicate(UseAVX >= 2);
15748   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15749   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15750          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15751          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15752          USE basic_type, KILL cr);
15753 
15754   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
15755   ins_encode %{
15756     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15757                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15758                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15759                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15760                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15761                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15762                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15763   %}
15764   ins_pipe( pipe_slow );
15765 %}
15766 
15767 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15768                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15769 %{
15770   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15771   match(Set result (CountPositives ary1 len));
15772   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15773 
15774   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15775   ins_encode %{
15776     __ count_positives($ary1$$Register, $len$$Register,
15777                        $result$$Register, $tmp3$$Register,
15778                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15779   %}
15780   ins_pipe( pipe_slow );
15781 %}
15782 
15783 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15784                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15785 %{
15786   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15787   match(Set result (CountPositives ary1 len));
15788   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15789 
15790   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15791   ins_encode %{
15792     __ count_positives($ary1$$Register, $len$$Register,
15793                        $result$$Register, $tmp3$$Register,
15794                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15795   %}
15796   ins_pipe( pipe_slow );
15797 %}
15798 
15799 // fast char[] to byte[] compression
15800 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15801                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15802   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15803   match(Set result (StrCompressedCopy src (Binary dst len)));
15804   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15805          USE_KILL len, KILL tmp5, KILL cr);
15806 
15807   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15808   ins_encode %{
15809     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15810                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15811                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15812                            knoreg, knoreg);
15813   %}
15814   ins_pipe( pipe_slow );
15815 %}
15816 
15817 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15818                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15819   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15820   match(Set result (StrCompressedCopy src (Binary dst len)));
15821   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15822          USE_KILL len, KILL tmp5, KILL cr);
15823 
15824   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15825   ins_encode %{
15826     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15827                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15828                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15829                            $ktmp1$$KRegister, $ktmp2$$KRegister);
15830   %}
15831   ins_pipe( pipe_slow );
15832 %}
15833 // fast byte[] to char[] inflation
15834 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15835                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15836   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15837   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15838   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15839 
15840   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15841   ins_encode %{
15842     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15843                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15844   %}
15845   ins_pipe( pipe_slow );
15846 %}
15847 
15848 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15849                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15850   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15851   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15852   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15853 
15854   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15855   ins_encode %{
15856     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15857                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15858   %}
15859   ins_pipe( pipe_slow );
15860 %}
15861 
15862 // encode char[] to byte[] in ISO_8859_1
15863 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15864                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15865                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15866   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15867   match(Set result (EncodeISOArray src (Binary dst len)));
15868   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15869 
15870   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15871   ins_encode %{
15872     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15873                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15874                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15875   %}
15876   ins_pipe( pipe_slow );
15877 %}
15878 
15879 // encode char[] to byte[] in ASCII
15880 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15881                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15882                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15883   predicate(((EncodeISOArrayNode*)n)->is_ascii());
15884   match(Set result (EncodeISOArray src (Binary dst len)));
15885   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15886 
15887   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15888   ins_encode %{
15889     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15890                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15891                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15892   %}
15893   ins_pipe( pipe_slow );
15894 %}
15895 
15896 //----------Overflow Math Instructions-----------------------------------------
15897 
15898 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15899 %{
15900   match(Set cr (OverflowAddI op1 op2));
15901   effect(DEF cr, USE_KILL op1, USE op2);
15902 
15903   format %{ "addl    $op1, $op2\t# overflow check int" %}
15904 
15905   ins_encode %{
15906     __ addl($op1$$Register, $op2$$Register);
15907   %}
15908   ins_pipe(ialu_reg_reg);
15909 %}
15910 
15911 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
15912 %{
15913   match(Set cr (OverflowAddI op1 op2));
15914   effect(DEF cr, USE_KILL op1, USE op2);
15915 
15916   format %{ "addl    $op1, $op2\t# overflow check int" %}
15917 
15918   ins_encode %{
15919     __ addl($op1$$Register, $op2$$constant);
15920   %}
15921   ins_pipe(ialu_reg_reg);
15922 %}
15923 
15924 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15925 %{
15926   match(Set cr (OverflowAddL op1 op2));
15927   effect(DEF cr, USE_KILL op1, USE op2);
15928 
15929   format %{ "addq    $op1, $op2\t# overflow check long" %}
15930   ins_encode %{
15931     __ addq($op1$$Register, $op2$$Register);
15932   %}
15933   ins_pipe(ialu_reg_reg);
15934 %}
15935 
15936 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
15937 %{
15938   match(Set cr (OverflowAddL op1 op2));
15939   effect(DEF cr, USE_KILL op1, USE op2);
15940 
15941   format %{ "addq    $op1, $op2\t# overflow check long" %}
15942   ins_encode %{
15943     __ addq($op1$$Register, $op2$$constant);
15944   %}
15945   ins_pipe(ialu_reg_reg);
15946 %}
15947 
15948 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15949 %{
15950   match(Set cr (OverflowSubI op1 op2));
15951 
15952   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
15953   ins_encode %{
15954     __ cmpl($op1$$Register, $op2$$Register);
15955   %}
15956   ins_pipe(ialu_reg_reg);
15957 %}
15958 
15959 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15960 %{
15961   match(Set cr (OverflowSubI op1 op2));
15962 
15963   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
15964   ins_encode %{
15965     __ cmpl($op1$$Register, $op2$$constant);
15966   %}
15967   ins_pipe(ialu_reg_reg);
15968 %}
15969 
15970 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15971 %{
15972   match(Set cr (OverflowSubL op1 op2));
15973 
15974   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
15975   ins_encode %{
15976     __ cmpq($op1$$Register, $op2$$Register);
15977   %}
15978   ins_pipe(ialu_reg_reg);
15979 %}
15980 
15981 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15982 %{
15983   match(Set cr (OverflowSubL op1 op2));
15984 
15985   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
15986   ins_encode %{
15987     __ cmpq($op1$$Register, $op2$$constant);
15988   %}
15989   ins_pipe(ialu_reg_reg);
15990 %}
15991 
15992 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
15993 %{
15994   match(Set cr (OverflowSubI zero op2));
15995   effect(DEF cr, USE_KILL op2);
15996 
15997   format %{ "negl    $op2\t# overflow check int" %}
15998   ins_encode %{
15999     __ negl($op2$$Register);
16000   %}
16001   ins_pipe(ialu_reg_reg);
16002 %}
16003 
16004 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16005 %{
16006   match(Set cr (OverflowSubL zero op2));
16007   effect(DEF cr, USE_KILL op2);
16008 
16009   format %{ "negq    $op2\t# overflow check long" %}
16010   ins_encode %{
16011     __ negq($op2$$Register);
16012   %}
16013   ins_pipe(ialu_reg_reg);
16014 %}
16015 
16016 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16017 %{
16018   match(Set cr (OverflowMulI op1 op2));
16019   effect(DEF cr, USE_KILL op1, USE op2);
16020 
16021   format %{ "imull    $op1, $op2\t# overflow check int" %}
16022   ins_encode %{
16023     __ imull($op1$$Register, $op2$$Register);
16024   %}
16025   ins_pipe(ialu_reg_reg_alu0);
16026 %}
16027 
16028 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16029 %{
16030   match(Set cr (OverflowMulI op1 op2));
16031   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16032 
16033   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
16034   ins_encode %{
16035     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16036   %}
16037   ins_pipe(ialu_reg_reg_alu0);
16038 %}
16039 
16040 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16041 %{
16042   match(Set cr (OverflowMulL op1 op2));
16043   effect(DEF cr, USE_KILL op1, USE op2);
16044 
16045   format %{ "imulq    $op1, $op2\t# overflow check long" %}
16046   ins_encode %{
16047     __ imulq($op1$$Register, $op2$$Register);
16048   %}
16049   ins_pipe(ialu_reg_reg_alu0);
16050 %}
16051 
16052 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16053 %{
16054   match(Set cr (OverflowMulL op1 op2));
16055   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16056 
16057   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
16058   ins_encode %{
16059     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16060   %}
16061   ins_pipe(ialu_reg_reg_alu0);
16062 %}
16063 
16064 
16065 //----------Control Flow Instructions------------------------------------------
16066 // Signed compare Instructions
16067 
16068 // XXX more variants!!
16069 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16070 %{
16071   match(Set cr (CmpI op1 op2));
16072   effect(DEF cr, USE op1, USE op2);
16073 
16074   format %{ "cmpl    $op1, $op2" %}
16075   ins_encode %{
16076     __ cmpl($op1$$Register, $op2$$Register);
16077   %}
16078   ins_pipe(ialu_cr_reg_reg);
16079 %}
16080 
16081 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16082 %{
16083   match(Set cr (CmpI op1 op2));
16084 
16085   format %{ "cmpl    $op1, $op2" %}
16086   ins_encode %{
16087     __ cmpl($op1$$Register, $op2$$constant);
16088   %}
16089   ins_pipe(ialu_cr_reg_imm);
16090 %}
16091 
16092 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16093 %{
16094   match(Set cr (CmpI op1 (LoadI op2)));
16095 
16096   ins_cost(500); // XXX
16097   format %{ "cmpl    $op1, $op2" %}
16098   ins_encode %{
16099     __ cmpl($op1$$Register, $op2$$Address);
16100   %}
16101   ins_pipe(ialu_cr_reg_mem);
16102 %}
16103 
16104 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16105 %{
16106   match(Set cr (CmpI src zero));
16107 
16108   format %{ "testl   $src, $src" %}
16109   ins_encode %{
16110     __ testl($src$$Register, $src$$Register);
16111   %}
16112   ins_pipe(ialu_cr_reg_imm);
16113 %}
16114 
16115 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16116 %{
16117   match(Set cr (CmpI (AndI src con) zero));
16118 
16119   format %{ "testl   $src, $con" %}
16120   ins_encode %{
16121     __ testl($src$$Register, $con$$constant);
16122   %}
16123   ins_pipe(ialu_cr_reg_imm);
16124 %}
16125 
16126 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16127 %{
16128   match(Set cr (CmpI (AndI src1 src2) zero));
16129 
16130   format %{ "testl   $src1, $src2" %}
16131   ins_encode %{
16132     __ testl($src1$$Register, $src2$$Register);
16133   %}
16134   ins_pipe(ialu_cr_reg_imm);
16135 %}
16136 
16137 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16138 %{
16139   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16140 
16141   format %{ "testl   $src, $mem" %}
16142   ins_encode %{
16143     __ testl($src$$Register, $mem$$Address);
16144   %}
16145   ins_pipe(ialu_cr_reg_mem);
16146 %}
16147 
16148 // Unsigned compare Instructions; really, same as signed except they
16149 // produce an rFlagsRegU instead of rFlagsReg.
16150 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16151 %{
16152   match(Set cr (CmpU op1 op2));
16153 
16154   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16155   ins_encode %{
16156     __ cmpl($op1$$Register, $op2$$Register);
16157   %}
16158   ins_pipe(ialu_cr_reg_reg);
16159 %}
16160 
16161 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16162 %{
16163   match(Set cr (CmpU op1 op2));
16164 
16165   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16166   ins_encode %{
16167     __ cmpl($op1$$Register, $op2$$constant);
16168   %}
16169   ins_pipe(ialu_cr_reg_imm);
16170 %}
16171 
16172 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16173 %{
16174   match(Set cr (CmpU op1 (LoadI op2)));
16175 
16176   ins_cost(500); // XXX
16177   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16178   ins_encode %{
16179     __ cmpl($op1$$Register, $op2$$Address);
16180   %}
16181   ins_pipe(ialu_cr_reg_mem);
16182 %}
16183 
16184 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16185 %{
16186   match(Set cr (CmpU src zero));
16187 
16188   format %{ "testl   $src, $src\t# unsigned" %}
16189   ins_encode %{
16190     __ testl($src$$Register, $src$$Register);
16191   %}
16192   ins_pipe(ialu_cr_reg_imm);
16193 %}
16194 
16195 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16196 %{
16197   match(Set cr (CmpP op1 op2));
16198 
16199   format %{ "cmpq    $op1, $op2\t# ptr" %}
16200   ins_encode %{
16201     __ cmpq($op1$$Register, $op2$$Register);
16202   %}
16203   ins_pipe(ialu_cr_reg_reg);
16204 %}
16205 
16206 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16207 %{
16208   match(Set cr (CmpP op1 (LoadP op2)));
16209   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16210 
16211   ins_cost(500); // XXX
16212   format %{ "cmpq    $op1, $op2\t# ptr" %}
16213   ins_encode %{
16214     __ cmpq($op1$$Register, $op2$$Address);
16215   %}
16216   ins_pipe(ialu_cr_reg_mem);
16217 %}
16218 
16219 // XXX this is generalized by compP_rReg_mem???
16220 // Compare raw pointer (used in out-of-heap check).
16221 // Only works because non-oop pointers must be raw pointers
16222 // and raw pointers have no anti-dependencies.
16223 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16224 %{
16225   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16226             n->in(2)->as_Load()->barrier_data() == 0);
16227   match(Set cr (CmpP op1 (LoadP op2)));
16228 
16229   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
16230   ins_encode %{
16231     __ cmpq($op1$$Register, $op2$$Address);
16232   %}
16233   ins_pipe(ialu_cr_reg_mem);
16234 %}
16235 
16236 // This will generate a signed flags result. This should be OK since
16237 // any compare to a zero should be eq/neq.
16238 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16239 %{
16240   match(Set cr (CmpP src zero));
16241 
16242   format %{ "testq   $src, $src\t# ptr" %}
16243   ins_encode %{
16244     __ testq($src$$Register, $src$$Register);
16245   %}
16246   ins_pipe(ialu_cr_reg_imm);
16247 %}
16248 
16249 // This will generate a signed flags result. This should be OK since
16250 // any compare to a zero should be eq/neq.
16251 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16252 %{
16253   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16254             n->in(1)->as_Load()->barrier_data() == 0);
16255   match(Set cr (CmpP (LoadP op) zero));
16256 
16257   ins_cost(500); // XXX
16258   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
16259   ins_encode %{
16260     __ testq($op$$Address, 0xFFFFFFFF);
16261   %}
16262   ins_pipe(ialu_cr_reg_imm);
16263 %}
16264 
16265 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16266 %{
16267   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16268             n->in(1)->as_Load()->barrier_data() == 0);
16269   match(Set cr (CmpP (LoadP mem) zero));
16270 
16271   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
16272   ins_encode %{
16273     __ cmpq(r12, $mem$$Address);
16274   %}
16275   ins_pipe(ialu_cr_reg_mem);
16276 %}
16277 
16278 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16279 %{
16280   match(Set cr (CmpN op1 op2));
16281 
16282   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16283   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16284   ins_pipe(ialu_cr_reg_reg);
16285 %}
16286 
16287 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16288 %{
16289   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16290   match(Set cr (CmpN src (LoadN mem)));
16291 
16292   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
16293   ins_encode %{
16294     __ cmpl($src$$Register, $mem$$Address);
16295   %}
16296   ins_pipe(ialu_cr_reg_mem);
16297 %}
16298 
16299 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16300   match(Set cr (CmpN op1 op2));
16301 
16302   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16303   ins_encode %{
16304     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16305   %}
16306   ins_pipe(ialu_cr_reg_imm);
16307 %}
16308 
16309 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16310 %{
16311   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16312   match(Set cr (CmpN src (LoadN mem)));
16313 
16314   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
16315   ins_encode %{
16316     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16317   %}
16318   ins_pipe(ialu_cr_reg_mem);
16319 %}
16320 
16321 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16322   match(Set cr (CmpN op1 op2));
16323 
16324   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
16325   ins_encode %{
16326     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16327   %}
16328   ins_pipe(ialu_cr_reg_imm);
16329 %}
16330 
16331 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16332 %{
16333   predicate(!UseCompactObjectHeaders);
16334   match(Set cr (CmpN src (LoadNKlass mem)));
16335 
16336   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
16337   ins_encode %{
16338     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16339   %}
16340   ins_pipe(ialu_cr_reg_mem);
16341 %}
16342 
16343 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16344   match(Set cr (CmpN src zero));
16345 
16346   format %{ "testl   $src, $src\t# compressed ptr" %}
16347   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16348   ins_pipe(ialu_cr_reg_imm);
16349 %}
16350 
16351 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16352 %{
16353   predicate(CompressedOops::base() != nullptr &&
16354             n->in(1)->as_Load()->barrier_data() == 0);
16355   match(Set cr (CmpN (LoadN mem) zero));
16356 
16357   ins_cost(500); // XXX
16358   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
16359   ins_encode %{
16360     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16361   %}
16362   ins_pipe(ialu_cr_reg_mem);
16363 %}
16364 
16365 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16366 %{
16367   predicate(CompressedOops::base() == nullptr &&
16368             n->in(1)->as_Load()->barrier_data() == 0);
16369   match(Set cr (CmpN (LoadN mem) zero));
16370 
16371   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16372   ins_encode %{
16373     __ cmpl(r12, $mem$$Address);
16374   %}
16375   ins_pipe(ialu_cr_reg_mem);
16376 %}
16377 
16378 // Yanked all unsigned pointer compare operations.
16379 // Pointer compares are done with CmpP which is already unsigned.
16380 
16381 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16382 %{
16383   match(Set cr (CmpL op1 op2));
16384 
16385   format %{ "cmpq    $op1, $op2" %}
16386   ins_encode %{
16387     __ cmpq($op1$$Register, $op2$$Register);
16388   %}
16389   ins_pipe(ialu_cr_reg_reg);
16390 %}
16391 
16392 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16393 %{
16394   match(Set cr (CmpL op1 op2));
16395 
16396   format %{ "cmpq    $op1, $op2" %}
16397   ins_encode %{
16398     __ cmpq($op1$$Register, $op2$$constant);
16399   %}
16400   ins_pipe(ialu_cr_reg_imm);
16401 %}
16402 
16403 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16404 %{
16405   match(Set cr (CmpL op1 (LoadL op2)));
16406 
16407   format %{ "cmpq    $op1, $op2" %}
16408   ins_encode %{
16409     __ cmpq($op1$$Register, $op2$$Address);
16410   %}
16411   ins_pipe(ialu_cr_reg_mem);
16412 %}
16413 
16414 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16415 %{
16416   match(Set cr (CmpL src zero));
16417 
16418   format %{ "testq   $src, $src" %}
16419   ins_encode %{
16420     __ testq($src$$Register, $src$$Register);
16421   %}
16422   ins_pipe(ialu_cr_reg_imm);
16423 %}
16424 
16425 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16426 %{
16427   match(Set cr (CmpL (AndL src con) zero));
16428 
16429   format %{ "testq   $src, $con\t# long" %}
16430   ins_encode %{
16431     __ testq($src$$Register, $con$$constant);
16432   %}
16433   ins_pipe(ialu_cr_reg_imm);
16434 %}
16435 
16436 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16437 %{
16438   match(Set cr (CmpL (AndL src1 src2) zero));
16439 
16440   format %{ "testq   $src1, $src2\t# long" %}
16441   ins_encode %{
16442     __ testq($src1$$Register, $src2$$Register);
16443   %}
16444   ins_pipe(ialu_cr_reg_imm);
16445 %}
16446 
16447 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16448 %{
16449   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16450 
16451   format %{ "testq   $src, $mem" %}
16452   ins_encode %{
16453     __ testq($src$$Register, $mem$$Address);
16454   %}
16455   ins_pipe(ialu_cr_reg_mem);
16456 %}
16457 
16458 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16459 %{
16460   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16461 
16462   format %{ "testq   $src, $mem" %}
16463   ins_encode %{
16464     __ testq($src$$Register, $mem$$Address);
16465   %}
16466   ins_pipe(ialu_cr_reg_mem);
16467 %}
16468 
16469 // Manifest a CmpU result in an integer register.  Very painful.
16470 // This is the test to avoid.
16471 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16472 %{
16473   match(Set dst (CmpU3 src1 src2));
16474   effect(KILL flags);
16475 
16476   ins_cost(275); // XXX
16477   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16478             "movl    $dst, -1\n\t"
16479             "jb,u    done\n\t"
16480             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16481     "done:" %}
16482   ins_encode %{
16483     Label done;
16484     __ cmpl($src1$$Register, $src2$$Register);
16485     __ movl($dst$$Register, -1);
16486     __ jccb(Assembler::below, done);
16487     __ setcc(Assembler::notZero, $dst$$Register);
16488     __ bind(done);
16489   %}
16490   ins_pipe(pipe_slow);
16491 %}
16492 
16493 // Manifest a CmpL result in an integer register.  Very painful.
16494 // This is the test to avoid.
16495 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16496 %{
16497   match(Set dst (CmpL3 src1 src2));
16498   effect(KILL flags);
16499 
16500   ins_cost(275); // XXX
16501   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16502             "movl    $dst, -1\n\t"
16503             "jl,s    done\n\t"
16504             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16505     "done:" %}
16506   ins_encode %{
16507     Label done;
16508     __ cmpq($src1$$Register, $src2$$Register);
16509     __ movl($dst$$Register, -1);
16510     __ jccb(Assembler::less, done);
16511     __ setcc(Assembler::notZero, $dst$$Register);
16512     __ bind(done);
16513   %}
16514   ins_pipe(pipe_slow);
16515 %}
16516 
16517 // Manifest a CmpUL result in an integer register.  Very painful.
16518 // This is the test to avoid.
16519 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16520 %{
16521   match(Set dst (CmpUL3 src1 src2));
16522   effect(KILL flags);
16523 
16524   ins_cost(275); // XXX
16525   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16526             "movl    $dst, -1\n\t"
16527             "jb,u    done\n\t"
16528             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16529     "done:" %}
16530   ins_encode %{
16531     Label done;
16532     __ cmpq($src1$$Register, $src2$$Register);
16533     __ movl($dst$$Register, -1);
16534     __ jccb(Assembler::below, done);
16535     __ setcc(Assembler::notZero, $dst$$Register);
16536     __ bind(done);
16537   %}
16538   ins_pipe(pipe_slow);
16539 %}
16540 
16541 // Unsigned long compare Instructions; really, same as signed long except they
16542 // produce an rFlagsRegU instead of rFlagsReg.
16543 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16544 %{
16545   match(Set cr (CmpUL op1 op2));
16546 
16547   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16548   ins_encode %{
16549     __ cmpq($op1$$Register, $op2$$Register);
16550   %}
16551   ins_pipe(ialu_cr_reg_reg);
16552 %}
16553 
16554 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16555 %{
16556   match(Set cr (CmpUL op1 op2));
16557 
16558   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16559   ins_encode %{
16560     __ cmpq($op1$$Register, $op2$$constant);
16561   %}
16562   ins_pipe(ialu_cr_reg_imm);
16563 %}
16564 
16565 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16566 %{
16567   match(Set cr (CmpUL op1 (LoadL op2)));
16568 
16569   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16570   ins_encode %{
16571     __ cmpq($op1$$Register, $op2$$Address);
16572   %}
16573   ins_pipe(ialu_cr_reg_mem);
16574 %}
16575 
16576 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16577 %{
16578   match(Set cr (CmpUL src zero));
16579 
16580   format %{ "testq   $src, $src\t# unsigned" %}
16581   ins_encode %{
16582     __ testq($src$$Register, $src$$Register);
16583   %}
16584   ins_pipe(ialu_cr_reg_imm);
16585 %}
16586 
16587 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16588 %{
16589   match(Set cr (CmpI (LoadB mem) imm));
16590 
16591   ins_cost(125);
16592   format %{ "cmpb    $mem, $imm" %}
16593   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16594   ins_pipe(ialu_cr_reg_mem);
16595 %}
16596 
16597 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16598 %{
16599   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16600 
16601   ins_cost(125);
16602   format %{ "testb   $mem, $imm\t# ubyte" %}
16603   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16604   ins_pipe(ialu_cr_reg_mem);
16605 %}
16606 
16607 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16608 %{
16609   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16610 
16611   ins_cost(125);
16612   format %{ "testb   $mem, $imm\t# byte" %}
16613   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16614   ins_pipe(ialu_cr_reg_mem);
16615 %}
16616 
16617 //----------Max and Min--------------------------------------------------------
16618 // Min Instructions
16619 
16620 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16621 %{
16622   predicate(!UseAPX);
16623   effect(USE_DEF dst, USE src, USE cr);
16624 
16625   format %{ "cmovlgt $dst, $src\t# min" %}
16626   ins_encode %{
16627     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16628   %}
16629   ins_pipe(pipe_cmov_reg);
16630 %}
16631 
16632 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16633 %{
16634   predicate(UseAPX);
16635   effect(DEF dst, USE src1, USE src2, USE cr);
16636 
16637   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16638   ins_encode %{
16639     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16640   %}
16641   ins_pipe(pipe_cmov_reg);
16642 %}
16643 
16644 instruct minI_rReg(rRegI dst, rRegI src)
16645 %{
16646   predicate(!UseAPX);
16647   match(Set dst (MinI dst src));
16648 
16649   ins_cost(200);
16650   expand %{
16651     rFlagsReg cr;
16652     compI_rReg(cr, dst, src);
16653     cmovI_reg_g(dst, src, cr);
16654   %}
16655 %}
16656 
16657 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16658 %{
16659   predicate(UseAPX);
16660   match(Set dst (MinI src1 src2));
16661   effect(DEF dst, USE src1, USE src2);
16662   flag(PD::Flag_ndd_demotable_opr1);
16663 
16664   ins_cost(200);
16665   expand %{
16666     rFlagsReg cr;
16667     compI_rReg(cr, src1, src2);
16668     cmovI_reg_g_ndd(dst, src1, src2, cr);
16669   %}
16670 %}
16671 
16672 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16673 %{
16674   predicate(!UseAPX);
16675   effect(USE_DEF dst, USE src, USE cr);
16676 
16677   format %{ "cmovllt $dst, $src\t# max" %}
16678   ins_encode %{
16679     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16680   %}
16681   ins_pipe(pipe_cmov_reg);
16682 %}
16683 
16684 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16685 %{
16686   predicate(UseAPX);
16687   effect(DEF dst, USE src1, USE src2, USE cr);
16688 
16689   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16690   ins_encode %{
16691     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16692   %}
16693   ins_pipe(pipe_cmov_reg);
16694 %}
16695 
16696 instruct maxI_rReg(rRegI dst, rRegI src)
16697 %{
16698   predicate(!UseAPX);
16699   match(Set dst (MaxI dst src));
16700 
16701   ins_cost(200);
16702   expand %{
16703     rFlagsReg cr;
16704     compI_rReg(cr, dst, src);
16705     cmovI_reg_l(dst, src, cr);
16706   %}
16707 %}
16708 
16709 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16710 %{
16711   predicate(UseAPX);
16712   match(Set dst (MaxI src1 src2));
16713   effect(DEF dst, USE src1, USE src2);
16714   flag(PD::Flag_ndd_demotable_opr1);
16715 
16716   ins_cost(200);
16717   expand %{
16718     rFlagsReg cr;
16719     compI_rReg(cr, src1, src2);
16720     cmovI_reg_l_ndd(dst, src1, src2, cr);
16721   %}
16722 %}
16723 
16724 // ============================================================================
16725 // Branch Instructions
16726 
16727 // Jump Direct - Label defines a relative address from JMP+1
16728 instruct jmpDir(label labl)
16729 %{
16730   match(Goto);
16731   effect(USE labl);
16732 
16733   ins_cost(300);
16734   format %{ "jmp     $labl" %}
16735   size(5);
16736   ins_encode %{
16737     Label* L = $labl$$label;
16738     __ jmp(*L, false); // Always long jump
16739   %}
16740   ins_pipe(pipe_jmp);
16741 %}
16742 
16743 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16744 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16745 %{
16746   match(If cop cr);
16747   effect(USE labl);
16748 
16749   ins_cost(300);
16750   format %{ "j$cop     $labl" %}
16751   size(6);
16752   ins_encode %{
16753     Label* L = $labl$$label;
16754     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16755   %}
16756   ins_pipe(pipe_jcc);
16757 %}
16758 
16759 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16760 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16761 %{
16762   match(CountedLoopEnd cop cr);
16763   effect(USE labl);
16764 
16765   ins_cost(300);
16766   format %{ "j$cop     $labl\t# loop end" %}
16767   size(6);
16768   ins_encode %{
16769     Label* L = $labl$$label;
16770     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16771   %}
16772   ins_pipe(pipe_jcc);
16773 %}
16774 
16775 // Jump Direct Conditional - using unsigned comparison
16776 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16777   match(If cop cmp);
16778   effect(USE labl);
16779 
16780   ins_cost(300);
16781   format %{ "j$cop,u   $labl" %}
16782   size(6);
16783   ins_encode %{
16784     Label* L = $labl$$label;
16785     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16786   %}
16787   ins_pipe(pipe_jcc);
16788 %}
16789 
16790 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16791   match(If cop cmp);
16792   effect(USE labl);
16793 
16794   ins_cost(200);
16795   format %{ "j$cop,u   $labl" %}
16796   size(6);
16797   ins_encode %{
16798     Label* L = $labl$$label;
16799     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16800   %}
16801   ins_pipe(pipe_jcc);
16802 %}
16803 
16804 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16805   match(If cop cmp);
16806   effect(USE labl);
16807 
16808   ins_cost(200);
16809   format %{ $$template
16810     if ($cop$$cmpcode == Assembler::notEqual) {
16811       $$emit$$"jp,u    $labl\n\t"
16812       $$emit$$"j$cop,u   $labl"
16813     } else {
16814       $$emit$$"jp,u    done\n\t"
16815       $$emit$$"j$cop,u   $labl\n\t"
16816       $$emit$$"done:"
16817     }
16818   %}
16819   ins_encode %{
16820     Label* l = $labl$$label;
16821     if ($cop$$cmpcode == Assembler::notEqual) {
16822       __ jcc(Assembler::parity, *l, false);
16823       __ jcc(Assembler::notEqual, *l, false);
16824     } else if ($cop$$cmpcode == Assembler::equal) {
16825       Label done;
16826       __ jccb(Assembler::parity, done);
16827       __ jcc(Assembler::equal, *l, false);
16828       __ bind(done);
16829     } else {
16830        ShouldNotReachHere();
16831     }
16832   %}
16833   ins_pipe(pipe_jcc);
16834 %}
16835 
16836 // ============================================================================
16837 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
16838 // superklass array for an instance of the superklass.  Set a hidden
16839 // internal cache on a hit (cache is checked with exposed code in
16840 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
16841 // encoding ALSO sets flags.
16842 
16843 instruct partialSubtypeCheck(rdi_RegP result,
16844                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16845                              rFlagsReg cr)
16846 %{
16847   match(Set result (PartialSubtypeCheck sub super));
16848   predicate(!UseSecondarySupersTable);
16849   effect(KILL rcx, KILL cr);
16850 
16851   ins_cost(1100);  // slightly larger than the next version
16852   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16853             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16854             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16855             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16856             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
16857             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16858             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
16859     "miss:\t" %}
16860 
16861   ins_encode %{
16862     Label miss;
16863     // NB: Callers may assume that, when $result is a valid register,
16864     // check_klass_subtype_slow_path_linear sets it to a nonzero
16865     // value.
16866     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16867                                             $rcx$$Register, $result$$Register,
16868                                             nullptr, &miss,
16869                                             /*set_cond_codes:*/ true);
16870     __ xorptr($result$$Register, $result$$Register);
16871     __ bind(miss);
16872   %}
16873 
16874   ins_pipe(pipe_slow);
16875 %}
16876 
16877 // ============================================================================
16878 // Two versions of hashtable-based partialSubtypeCheck, both used when
16879 // we need to search for a super class in the secondary supers array.
16880 // The first is used when we don't know _a priori_ the class being
16881 // searched for. The second, far more common, is used when we do know:
16882 // this is used for instanceof, checkcast, and any case where C2 can
16883 // determine it by constant propagation.
16884 
16885 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
16886                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16887                                        rFlagsReg cr)
16888 %{
16889   match(Set result (PartialSubtypeCheck sub super));
16890   predicate(UseSecondarySupersTable);
16891   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16892 
16893   ins_cost(1000);
16894   format %{ "partialSubtypeCheck $result, $sub, $super" %}
16895 
16896   ins_encode %{
16897     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
16898 					 $temp3$$Register, $temp4$$Register, $result$$Register);
16899   %}
16900 
16901   ins_pipe(pipe_slow);
16902 %}
16903 
16904 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
16905                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16906                                        rFlagsReg cr)
16907 %{
16908   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
16909   predicate(UseSecondarySupersTable);
16910   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16911 
16912   ins_cost(700);  // smaller than the next version
16913   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
16914 
16915   ins_encode %{
16916     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
16917     if (InlineSecondarySupersTest) {
16918       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
16919                                        $temp3$$Register, $temp4$$Register, $result$$Register,
16920                                        super_klass_slot);
16921     } else {
16922       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
16923     }
16924   %}
16925 
16926   ins_pipe(pipe_slow);
16927 %}
16928 
16929 // ============================================================================
16930 // Branch Instructions -- short offset versions
16931 //
16932 // These instructions are used to replace jumps of a long offset (the default
16933 // match) with jumps of a shorter offset.  These instructions are all tagged
16934 // with the ins_short_branch attribute, which causes the ADLC to suppress the
16935 // match rules in general matching.  Instead, the ADLC generates a conversion
16936 // method in the MachNode which can be used to do in-place replacement of the
16937 // long variant with the shorter variant.  The compiler will determine if a
16938 // branch can be taken by the is_short_branch_offset() predicate in the machine
16939 // specific code section of the file.
16940 
16941 // Jump Direct - Label defines a relative address from JMP+1
16942 instruct jmpDir_short(label labl) %{
16943   match(Goto);
16944   effect(USE labl);
16945 
16946   ins_cost(300);
16947   format %{ "jmp,s   $labl" %}
16948   size(2);
16949   ins_encode %{
16950     Label* L = $labl$$label;
16951     __ jmpb(*L);
16952   %}
16953   ins_pipe(pipe_jmp);
16954   ins_short_branch(1);
16955 %}
16956 
16957 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16958 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
16959   match(If cop cr);
16960   effect(USE labl);
16961 
16962   ins_cost(300);
16963   format %{ "j$cop,s   $labl" %}
16964   size(2);
16965   ins_encode %{
16966     Label* L = $labl$$label;
16967     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16968   %}
16969   ins_pipe(pipe_jcc);
16970   ins_short_branch(1);
16971 %}
16972 
16973 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16974 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
16975   match(CountedLoopEnd cop cr);
16976   effect(USE labl);
16977 
16978   ins_cost(300);
16979   format %{ "j$cop,s   $labl\t# loop end" %}
16980   size(2);
16981   ins_encode %{
16982     Label* L = $labl$$label;
16983     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16984   %}
16985   ins_pipe(pipe_jcc);
16986   ins_short_branch(1);
16987 %}
16988 
16989 // Jump Direct Conditional - using unsigned comparison
16990 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16991   match(If cop cmp);
16992   effect(USE labl);
16993 
16994   ins_cost(300);
16995   format %{ "j$cop,us  $labl" %}
16996   size(2);
16997   ins_encode %{
16998     Label* L = $labl$$label;
16999     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17000   %}
17001   ins_pipe(pipe_jcc);
17002   ins_short_branch(1);
17003 %}
17004 
17005 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17006   match(If cop cmp);
17007   effect(USE labl);
17008 
17009   ins_cost(300);
17010   format %{ "j$cop,us  $labl" %}
17011   size(2);
17012   ins_encode %{
17013     Label* L = $labl$$label;
17014     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17015   %}
17016   ins_pipe(pipe_jcc);
17017   ins_short_branch(1);
17018 %}
17019 
17020 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17021   match(If cop cmp);
17022   effect(USE labl);
17023 
17024   ins_cost(300);
17025   format %{ $$template
17026     if ($cop$$cmpcode == Assembler::notEqual) {
17027       $$emit$$"jp,u,s  $labl\n\t"
17028       $$emit$$"j$cop,u,s  $labl"
17029     } else {
17030       $$emit$$"jp,u,s  done\n\t"
17031       $$emit$$"j$cop,u,s  $labl\n\t"
17032       $$emit$$"done:"
17033     }
17034   %}
17035   size(4);
17036   ins_encode %{
17037     Label* l = $labl$$label;
17038     if ($cop$$cmpcode == Assembler::notEqual) {
17039       __ jccb(Assembler::parity, *l);
17040       __ jccb(Assembler::notEqual, *l);
17041     } else if ($cop$$cmpcode == Assembler::equal) {
17042       Label done;
17043       __ jccb(Assembler::parity, done);
17044       __ jccb(Assembler::equal, *l);
17045       __ bind(done);
17046     } else {
17047        ShouldNotReachHere();
17048     }
17049   %}
17050   ins_pipe(pipe_jcc);
17051   ins_short_branch(1);
17052 %}
17053 
17054 // ============================================================================
17055 // inlined locking and unlocking
17056 
17057 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17058   match(Set cr (FastLock object box));
17059   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17060   ins_cost(300);
17061   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17062   ins_encode %{
17063     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17064   %}
17065   ins_pipe(pipe_slow);
17066 %}
17067 
17068 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17069   match(Set cr (FastUnlock object rax_reg));
17070   effect(TEMP tmp, USE_KILL rax_reg);
17071   ins_cost(300);
17072   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17073   ins_encode %{
17074     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17075   %}
17076   ins_pipe(pipe_slow);
17077 %}
17078 
17079 
17080 // ============================================================================
17081 // Safepoint Instructions
17082 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17083 %{
17084   match(SafePoint poll);
17085   effect(KILL cr, USE poll);
17086 
17087   format %{ "testl   rax, [$poll]\t"
17088             "# Safepoint: poll for GC" %}
17089   ins_cost(125);
17090   ins_encode %{
17091     __ relocate(relocInfo::poll_type);
17092     address pre_pc = __ pc();
17093     __ testl(rax, Address($poll$$Register, 0));
17094     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17095   %}
17096   ins_pipe(ialu_reg_mem);
17097 %}
17098 
17099 instruct mask_all_evexL(kReg dst, rRegL src) %{
17100   match(Set dst (MaskAll src));
17101   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17102   ins_encode %{
17103     int mask_len = Matcher::vector_length(this);
17104     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17105   %}
17106   ins_pipe( pipe_slow );
17107 %}
17108 
17109 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17110   predicate(Matcher::vector_length(n) > 32);
17111   match(Set dst (MaskAll src));
17112   effect(TEMP tmp);
17113   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17114   ins_encode %{
17115     int mask_len = Matcher::vector_length(this);
17116     __ movslq($tmp$$Register, $src$$Register);
17117     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17118   %}
17119   ins_pipe( pipe_slow );
17120 %}
17121 
17122 // ============================================================================
17123 // Procedure Call/Return Instructions
17124 // Call Java Static Instruction
17125 // Note: If this code changes, the corresponding ret_addr_offset() and
17126 //       compute_padding() functions will have to be adjusted.
17127 instruct CallStaticJavaDirect(method meth) %{
17128   match(CallStaticJava);
17129   effect(USE meth);
17130 
17131   ins_cost(300);
17132   format %{ "call,static " %}
17133   opcode(0xE8); /* E8 cd */
17134   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17135   ins_pipe(pipe_slow);
17136   ins_alignment(4);
17137 %}
17138 
17139 // Call Java Dynamic Instruction
17140 // Note: If this code changes, the corresponding ret_addr_offset() and
17141 //       compute_padding() functions will have to be adjusted.
17142 instruct CallDynamicJavaDirect(method meth)
17143 %{
17144   match(CallDynamicJava);
17145   effect(USE meth);
17146 
17147   ins_cost(300);
17148   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
17149             "call,dynamic " %}
17150   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17151   ins_pipe(pipe_slow);
17152   ins_alignment(4);
17153 %}
17154 
17155 // Call Runtime Instruction
17156 instruct CallRuntimeDirect(method meth)
17157 %{
17158   match(CallRuntime);
17159   effect(USE meth);
17160 
17161   ins_cost(300);
17162   format %{ "call,runtime " %}
17163   ins_encode(clear_avx, Java_To_Runtime(meth));
17164   ins_pipe(pipe_slow);
17165 %}
17166 
17167 // Call runtime without safepoint
17168 instruct CallLeafDirect(method meth)
17169 %{
17170   match(CallLeaf);
17171   effect(USE meth);
17172 
17173   ins_cost(300);
17174   format %{ "call_leaf,runtime " %}
17175   ins_encode(clear_avx, Java_To_Runtime(meth));
17176   ins_pipe(pipe_slow);
17177 %}
17178 
17179 // Call runtime without safepoint and with vector arguments
17180 instruct CallLeafDirectVector(method meth)
17181 %{
17182   match(CallLeafVector);
17183   effect(USE meth);
17184 
17185   ins_cost(300);
17186   format %{ "call_leaf,vector " %}
17187   ins_encode(Java_To_Runtime(meth));
17188   ins_pipe(pipe_slow);
17189 %}
17190 
17191 // Call runtime without safepoint
17192 instruct CallLeafNoFPDirect(method meth)
17193 %{
17194   match(CallLeafNoFP);
17195   effect(USE meth);
17196 
17197   ins_cost(300);
17198   format %{ "call_leaf_nofp,runtime " %}
17199   ins_encode(clear_avx, Java_To_Runtime(meth));
17200   ins_pipe(pipe_slow);
17201 %}
17202 
17203 // Return Instruction
17204 // Remove the return address & jump to it.
17205 // Notice: We always emit a nop after a ret to make sure there is room
17206 // for safepoint patching
17207 instruct Ret()
17208 %{
17209   match(Return);
17210 
17211   format %{ "ret" %}
17212   ins_encode %{
17213     __ ret(0);
17214   %}
17215   ins_pipe(pipe_jmp);
17216 %}
17217 
17218 // Tail Call; Jump from runtime stub to Java code.
17219 // Also known as an 'interprocedural jump'.
17220 // Target of jump will eventually return to caller.
17221 // TailJump below removes the return address.
17222 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17223 // emitted just above the TailCall which has reset rbp to the caller state.
17224 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17225 %{
17226   match(TailCall jump_target method_ptr);
17227 
17228   ins_cost(300);
17229   format %{ "jmp     $jump_target\t# rbx holds method" %}
17230   ins_encode %{
17231     __ jmp($jump_target$$Register);
17232   %}
17233   ins_pipe(pipe_jmp);
17234 %}
17235 
17236 // Tail Jump; remove the return address; jump to target.
17237 // TailCall above leaves the return address around.
17238 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17239 %{
17240   match(TailJump jump_target ex_oop);
17241 
17242   ins_cost(300);
17243   format %{ "popq    rdx\t# pop return address\n\t"
17244             "jmp     $jump_target" %}
17245   ins_encode %{
17246     __ popq(as_Register(RDX_enc));
17247     __ jmp($jump_target$$Register);
17248   %}
17249   ins_pipe(pipe_jmp);
17250 %}
17251 
17252 // Forward exception.
17253 instruct ForwardExceptionjmp()
17254 %{
17255   match(ForwardException);
17256 
17257   format %{ "jmp     forward_exception_stub" %}
17258   ins_encode %{
17259     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17260   %}
17261   ins_pipe(pipe_jmp);
17262 %}
17263 
17264 // Create exception oop: created by stack-crawling runtime code.
17265 // Created exception is now available to this handler, and is setup
17266 // just prior to jumping to this handler.  No code emitted.
17267 instruct CreateException(rax_RegP ex_oop)
17268 %{
17269   match(Set ex_oop (CreateEx));
17270 
17271   size(0);
17272   // use the following format syntax
17273   format %{ "# exception oop is in rax; no code emitted" %}
17274   ins_encode();
17275   ins_pipe(empty);
17276 %}
17277 
17278 // Rethrow exception:
17279 // The exception oop will come in the first argument position.
17280 // Then JUMP (not call) to the rethrow stub code.
17281 instruct RethrowException()
17282 %{
17283   match(Rethrow);
17284 
17285   // use the following format syntax
17286   format %{ "jmp     rethrow_stub" %}
17287   ins_encode %{
17288     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17289   %}
17290   ins_pipe(pipe_jmp);
17291 %}
17292 
17293 // ============================================================================
17294 // This name is KNOWN by the ADLC and cannot be changed.
17295 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17296 // for this guy.
17297 instruct tlsLoadP(r15_RegP dst) %{
17298   match(Set dst (ThreadLocal));
17299   effect(DEF dst);
17300 
17301   size(0);
17302   format %{ "# TLS is in R15" %}
17303   ins_encode( /*empty encoding*/ );
17304   ins_pipe(ialu_reg_reg);
17305 %}
17306 
17307 instruct addF_reg(regF dst, regF src) %{
17308   predicate(UseAVX == 0);
17309   match(Set dst (AddF dst src));
17310 
17311   format %{ "addss   $dst, $src" %}
17312   ins_cost(150);
17313   ins_encode %{
17314     __ addss($dst$$XMMRegister, $src$$XMMRegister);
17315   %}
17316   ins_pipe(pipe_slow);
17317 %}
17318 
17319 instruct addF_mem(regF dst, memory src) %{
17320   predicate(UseAVX == 0);
17321   match(Set dst (AddF dst (LoadF src)));
17322 
17323   format %{ "addss   $dst, $src" %}
17324   ins_cost(150);
17325   ins_encode %{
17326     __ addss($dst$$XMMRegister, $src$$Address);
17327   %}
17328   ins_pipe(pipe_slow);
17329 %}
17330 
17331 instruct addF_imm(regF dst, immF con) %{
17332   predicate(UseAVX == 0);
17333   match(Set dst (AddF dst con));
17334   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17335   ins_cost(150);
17336   ins_encode %{
17337     __ addss($dst$$XMMRegister, $constantaddress($con));
17338   %}
17339   ins_pipe(pipe_slow);
17340 %}
17341 
17342 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17343   predicate(UseAVX > 0);
17344   match(Set dst (AddF src1 src2));
17345 
17346   format %{ "vaddss  $dst, $src1, $src2" %}
17347   ins_cost(150);
17348   ins_encode %{
17349     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17350   %}
17351   ins_pipe(pipe_slow);
17352 %}
17353 
17354 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17355   predicate(UseAVX > 0);
17356   match(Set dst (AddF src1 (LoadF src2)));
17357 
17358   format %{ "vaddss  $dst, $src1, $src2" %}
17359   ins_cost(150);
17360   ins_encode %{
17361     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17362   %}
17363   ins_pipe(pipe_slow);
17364 %}
17365 
17366 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17367   predicate(UseAVX > 0);
17368   match(Set dst (AddF src con));
17369 
17370   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17371   ins_cost(150);
17372   ins_encode %{
17373     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17374   %}
17375   ins_pipe(pipe_slow);
17376 %}
17377 
17378 instruct addD_reg(regD dst, regD src) %{
17379   predicate(UseAVX == 0);
17380   match(Set dst (AddD dst src));
17381 
17382   format %{ "addsd   $dst, $src" %}
17383   ins_cost(150);
17384   ins_encode %{
17385     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17386   %}
17387   ins_pipe(pipe_slow);
17388 %}
17389 
17390 instruct addD_mem(regD dst, memory src) %{
17391   predicate(UseAVX == 0);
17392   match(Set dst (AddD dst (LoadD src)));
17393 
17394   format %{ "addsd   $dst, $src" %}
17395   ins_cost(150);
17396   ins_encode %{
17397     __ addsd($dst$$XMMRegister, $src$$Address);
17398   %}
17399   ins_pipe(pipe_slow);
17400 %}
17401 
17402 instruct addD_imm(regD dst, immD con) %{
17403   predicate(UseAVX == 0);
17404   match(Set dst (AddD dst con));
17405   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17406   ins_cost(150);
17407   ins_encode %{
17408     __ addsd($dst$$XMMRegister, $constantaddress($con));
17409   %}
17410   ins_pipe(pipe_slow);
17411 %}
17412 
17413 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17414   predicate(UseAVX > 0);
17415   match(Set dst (AddD src1 src2));
17416 
17417   format %{ "vaddsd  $dst, $src1, $src2" %}
17418   ins_cost(150);
17419   ins_encode %{
17420     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17421   %}
17422   ins_pipe(pipe_slow);
17423 %}
17424 
17425 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17426   predicate(UseAVX > 0);
17427   match(Set dst (AddD src1 (LoadD src2)));
17428 
17429   format %{ "vaddsd  $dst, $src1, $src2" %}
17430   ins_cost(150);
17431   ins_encode %{
17432     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17433   %}
17434   ins_pipe(pipe_slow);
17435 %}
17436 
17437 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17438   predicate(UseAVX > 0);
17439   match(Set dst (AddD src con));
17440 
17441   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17442   ins_cost(150);
17443   ins_encode %{
17444     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17445   %}
17446   ins_pipe(pipe_slow);
17447 %}
17448 
17449 instruct subF_reg(regF dst, regF src) %{
17450   predicate(UseAVX == 0);
17451   match(Set dst (SubF dst src));
17452 
17453   format %{ "subss   $dst, $src" %}
17454   ins_cost(150);
17455   ins_encode %{
17456     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17457   %}
17458   ins_pipe(pipe_slow);
17459 %}
17460 
17461 instruct subF_mem(regF dst, memory src) %{
17462   predicate(UseAVX == 0);
17463   match(Set dst (SubF dst (LoadF src)));
17464 
17465   format %{ "subss   $dst, $src" %}
17466   ins_cost(150);
17467   ins_encode %{
17468     __ subss($dst$$XMMRegister, $src$$Address);
17469   %}
17470   ins_pipe(pipe_slow);
17471 %}
17472 
17473 instruct subF_imm(regF dst, immF con) %{
17474   predicate(UseAVX == 0);
17475   match(Set dst (SubF dst con));
17476   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17477   ins_cost(150);
17478   ins_encode %{
17479     __ subss($dst$$XMMRegister, $constantaddress($con));
17480   %}
17481   ins_pipe(pipe_slow);
17482 %}
17483 
17484 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17485   predicate(UseAVX > 0);
17486   match(Set dst (SubF src1 src2));
17487 
17488   format %{ "vsubss  $dst, $src1, $src2" %}
17489   ins_cost(150);
17490   ins_encode %{
17491     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17492   %}
17493   ins_pipe(pipe_slow);
17494 %}
17495 
17496 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17497   predicate(UseAVX > 0);
17498   match(Set dst (SubF src1 (LoadF src2)));
17499 
17500   format %{ "vsubss  $dst, $src1, $src2" %}
17501   ins_cost(150);
17502   ins_encode %{
17503     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17504   %}
17505   ins_pipe(pipe_slow);
17506 %}
17507 
17508 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17509   predicate(UseAVX > 0);
17510   match(Set dst (SubF src con));
17511 
17512   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17513   ins_cost(150);
17514   ins_encode %{
17515     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17516   %}
17517   ins_pipe(pipe_slow);
17518 %}
17519 
17520 instruct subD_reg(regD dst, regD src) %{
17521   predicate(UseAVX == 0);
17522   match(Set dst (SubD dst src));
17523 
17524   format %{ "subsd   $dst, $src" %}
17525   ins_cost(150);
17526   ins_encode %{
17527     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17528   %}
17529   ins_pipe(pipe_slow);
17530 %}
17531 
17532 instruct subD_mem(regD dst, memory src) %{
17533   predicate(UseAVX == 0);
17534   match(Set dst (SubD dst (LoadD src)));
17535 
17536   format %{ "subsd   $dst, $src" %}
17537   ins_cost(150);
17538   ins_encode %{
17539     __ subsd($dst$$XMMRegister, $src$$Address);
17540   %}
17541   ins_pipe(pipe_slow);
17542 %}
17543 
17544 instruct subD_imm(regD dst, immD con) %{
17545   predicate(UseAVX == 0);
17546   match(Set dst (SubD dst con));
17547   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17548   ins_cost(150);
17549   ins_encode %{
17550     __ subsd($dst$$XMMRegister, $constantaddress($con));
17551   %}
17552   ins_pipe(pipe_slow);
17553 %}
17554 
17555 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17556   predicate(UseAVX > 0);
17557   match(Set dst (SubD src1 src2));
17558 
17559   format %{ "vsubsd  $dst, $src1, $src2" %}
17560   ins_cost(150);
17561   ins_encode %{
17562     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17563   %}
17564   ins_pipe(pipe_slow);
17565 %}
17566 
17567 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17568   predicate(UseAVX > 0);
17569   match(Set dst (SubD src1 (LoadD src2)));
17570 
17571   format %{ "vsubsd  $dst, $src1, $src2" %}
17572   ins_cost(150);
17573   ins_encode %{
17574     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17575   %}
17576   ins_pipe(pipe_slow);
17577 %}
17578 
17579 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17580   predicate(UseAVX > 0);
17581   match(Set dst (SubD src con));
17582 
17583   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17584   ins_cost(150);
17585   ins_encode %{
17586     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17587   %}
17588   ins_pipe(pipe_slow);
17589 %}
17590 
17591 instruct mulF_reg(regF dst, regF src) %{
17592   predicate(UseAVX == 0);
17593   match(Set dst (MulF dst src));
17594 
17595   format %{ "mulss   $dst, $src" %}
17596   ins_cost(150);
17597   ins_encode %{
17598     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17599   %}
17600   ins_pipe(pipe_slow);
17601 %}
17602 
17603 instruct mulF_mem(regF dst, memory src) %{
17604   predicate(UseAVX == 0);
17605   match(Set dst (MulF dst (LoadF src)));
17606 
17607   format %{ "mulss   $dst, $src" %}
17608   ins_cost(150);
17609   ins_encode %{
17610     __ mulss($dst$$XMMRegister, $src$$Address);
17611   %}
17612   ins_pipe(pipe_slow);
17613 %}
17614 
17615 instruct mulF_imm(regF dst, immF con) %{
17616   predicate(UseAVX == 0);
17617   match(Set dst (MulF dst con));
17618   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17619   ins_cost(150);
17620   ins_encode %{
17621     __ mulss($dst$$XMMRegister, $constantaddress($con));
17622   %}
17623   ins_pipe(pipe_slow);
17624 %}
17625 
17626 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17627   predicate(UseAVX > 0);
17628   match(Set dst (MulF src1 src2));
17629 
17630   format %{ "vmulss  $dst, $src1, $src2" %}
17631   ins_cost(150);
17632   ins_encode %{
17633     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17634   %}
17635   ins_pipe(pipe_slow);
17636 %}
17637 
17638 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17639   predicate(UseAVX > 0);
17640   match(Set dst (MulF src1 (LoadF src2)));
17641 
17642   format %{ "vmulss  $dst, $src1, $src2" %}
17643   ins_cost(150);
17644   ins_encode %{
17645     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17646   %}
17647   ins_pipe(pipe_slow);
17648 %}
17649 
17650 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17651   predicate(UseAVX > 0);
17652   match(Set dst (MulF src con));
17653 
17654   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17655   ins_cost(150);
17656   ins_encode %{
17657     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17658   %}
17659   ins_pipe(pipe_slow);
17660 %}
17661 
17662 instruct mulD_reg(regD dst, regD src) %{
17663   predicate(UseAVX == 0);
17664   match(Set dst (MulD dst src));
17665 
17666   format %{ "mulsd   $dst, $src" %}
17667   ins_cost(150);
17668   ins_encode %{
17669     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17670   %}
17671   ins_pipe(pipe_slow);
17672 %}
17673 
17674 instruct mulD_mem(regD dst, memory src) %{
17675   predicate(UseAVX == 0);
17676   match(Set dst (MulD dst (LoadD src)));
17677 
17678   format %{ "mulsd   $dst, $src" %}
17679   ins_cost(150);
17680   ins_encode %{
17681     __ mulsd($dst$$XMMRegister, $src$$Address);
17682   %}
17683   ins_pipe(pipe_slow);
17684 %}
17685 
17686 instruct mulD_imm(regD dst, immD con) %{
17687   predicate(UseAVX == 0);
17688   match(Set dst (MulD dst con));
17689   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17690   ins_cost(150);
17691   ins_encode %{
17692     __ mulsd($dst$$XMMRegister, $constantaddress($con));
17693   %}
17694   ins_pipe(pipe_slow);
17695 %}
17696 
17697 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17698   predicate(UseAVX > 0);
17699   match(Set dst (MulD src1 src2));
17700 
17701   format %{ "vmulsd  $dst, $src1, $src2" %}
17702   ins_cost(150);
17703   ins_encode %{
17704     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17705   %}
17706   ins_pipe(pipe_slow);
17707 %}
17708 
17709 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17710   predicate(UseAVX > 0);
17711   match(Set dst (MulD src1 (LoadD src2)));
17712 
17713   format %{ "vmulsd  $dst, $src1, $src2" %}
17714   ins_cost(150);
17715   ins_encode %{
17716     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17717   %}
17718   ins_pipe(pipe_slow);
17719 %}
17720 
17721 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17722   predicate(UseAVX > 0);
17723   match(Set dst (MulD src con));
17724 
17725   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17726   ins_cost(150);
17727   ins_encode %{
17728     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17729   %}
17730   ins_pipe(pipe_slow);
17731 %}
17732 
17733 instruct divF_reg(regF dst, regF src) %{
17734   predicate(UseAVX == 0);
17735   match(Set dst (DivF dst src));
17736 
17737   format %{ "divss   $dst, $src" %}
17738   ins_cost(150);
17739   ins_encode %{
17740     __ divss($dst$$XMMRegister, $src$$XMMRegister);
17741   %}
17742   ins_pipe(pipe_slow);
17743 %}
17744 
17745 instruct divF_mem(regF dst, memory src) %{
17746   predicate(UseAVX == 0);
17747   match(Set dst (DivF dst (LoadF src)));
17748 
17749   format %{ "divss   $dst, $src" %}
17750   ins_cost(150);
17751   ins_encode %{
17752     __ divss($dst$$XMMRegister, $src$$Address);
17753   %}
17754   ins_pipe(pipe_slow);
17755 %}
17756 
17757 instruct divF_imm(regF dst, immF con) %{
17758   predicate(UseAVX == 0);
17759   match(Set dst (DivF dst con));
17760   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17761   ins_cost(150);
17762   ins_encode %{
17763     __ divss($dst$$XMMRegister, $constantaddress($con));
17764   %}
17765   ins_pipe(pipe_slow);
17766 %}
17767 
17768 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17769   predicate(UseAVX > 0);
17770   match(Set dst (DivF src1 src2));
17771 
17772   format %{ "vdivss  $dst, $src1, $src2" %}
17773   ins_cost(150);
17774   ins_encode %{
17775     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17776   %}
17777   ins_pipe(pipe_slow);
17778 %}
17779 
17780 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17781   predicate(UseAVX > 0);
17782   match(Set dst (DivF src1 (LoadF src2)));
17783 
17784   format %{ "vdivss  $dst, $src1, $src2" %}
17785   ins_cost(150);
17786   ins_encode %{
17787     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17788   %}
17789   ins_pipe(pipe_slow);
17790 %}
17791 
17792 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17793   predicate(UseAVX > 0);
17794   match(Set dst (DivF src con));
17795 
17796   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17797   ins_cost(150);
17798   ins_encode %{
17799     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17800   %}
17801   ins_pipe(pipe_slow);
17802 %}
17803 
17804 instruct divD_reg(regD dst, regD src) %{
17805   predicate(UseAVX == 0);
17806   match(Set dst (DivD dst src));
17807 
17808   format %{ "divsd   $dst, $src" %}
17809   ins_cost(150);
17810   ins_encode %{
17811     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17812   %}
17813   ins_pipe(pipe_slow);
17814 %}
17815 
17816 instruct divD_mem(regD dst, memory src) %{
17817   predicate(UseAVX == 0);
17818   match(Set dst (DivD dst (LoadD src)));
17819 
17820   format %{ "divsd   $dst, $src" %}
17821   ins_cost(150);
17822   ins_encode %{
17823     __ divsd($dst$$XMMRegister, $src$$Address);
17824   %}
17825   ins_pipe(pipe_slow);
17826 %}
17827 
17828 instruct divD_imm(regD dst, immD con) %{
17829   predicate(UseAVX == 0);
17830   match(Set dst (DivD dst con));
17831   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17832   ins_cost(150);
17833   ins_encode %{
17834     __ divsd($dst$$XMMRegister, $constantaddress($con));
17835   %}
17836   ins_pipe(pipe_slow);
17837 %}
17838 
17839 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17840   predicate(UseAVX > 0);
17841   match(Set dst (DivD src1 src2));
17842 
17843   format %{ "vdivsd  $dst, $src1, $src2" %}
17844   ins_cost(150);
17845   ins_encode %{
17846     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17847   %}
17848   ins_pipe(pipe_slow);
17849 %}
17850 
17851 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17852   predicate(UseAVX > 0);
17853   match(Set dst (DivD src1 (LoadD src2)));
17854 
17855   format %{ "vdivsd  $dst, $src1, $src2" %}
17856   ins_cost(150);
17857   ins_encode %{
17858     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17859   %}
17860   ins_pipe(pipe_slow);
17861 %}
17862 
17863 instruct divD_reg_imm(regD dst, regD src, immD con) %{
17864   predicate(UseAVX > 0);
17865   match(Set dst (DivD src con));
17866 
17867   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17868   ins_cost(150);
17869   ins_encode %{
17870     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17871   %}
17872   ins_pipe(pipe_slow);
17873 %}
17874 
17875 instruct absF_reg(regF dst) %{
17876   predicate(UseAVX == 0);
17877   match(Set dst (AbsF dst));
17878   ins_cost(150);
17879   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
17880   ins_encode %{
17881     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
17882   %}
17883   ins_pipe(pipe_slow);
17884 %}
17885 
17886 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
17887   predicate(UseAVX > 0);
17888   match(Set dst (AbsF src));
17889   ins_cost(150);
17890   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
17891   ins_encode %{
17892     int vlen_enc = Assembler::AVX_128bit;
17893     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
17894               ExternalAddress(float_signmask()), vlen_enc);
17895   %}
17896   ins_pipe(pipe_slow);
17897 %}
17898 
17899 instruct absD_reg(regD dst) %{
17900   predicate(UseAVX == 0);
17901   match(Set dst (AbsD dst));
17902   ins_cost(150);
17903   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
17904             "# abs double by sign masking" %}
17905   ins_encode %{
17906     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
17907   %}
17908   ins_pipe(pipe_slow);
17909 %}
17910 
17911 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
17912   predicate(UseAVX > 0);
17913   match(Set dst (AbsD src));
17914   ins_cost(150);
17915   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
17916             "# abs double by sign masking" %}
17917   ins_encode %{
17918     int vlen_enc = Assembler::AVX_128bit;
17919     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
17920               ExternalAddress(double_signmask()), vlen_enc);
17921   %}
17922   ins_pipe(pipe_slow);
17923 %}
17924 
17925 instruct negF_reg(regF dst) %{
17926   predicate(UseAVX == 0);
17927   match(Set dst (NegF dst));
17928   ins_cost(150);
17929   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
17930   ins_encode %{
17931     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
17932   %}
17933   ins_pipe(pipe_slow);
17934 %}
17935 
17936 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
17937   predicate(UseAVX > 0);
17938   match(Set dst (NegF src));
17939   ins_cost(150);
17940   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
17941   ins_encode %{
17942     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
17943                  ExternalAddress(float_signflip()));
17944   %}
17945   ins_pipe(pipe_slow);
17946 %}
17947 
17948 instruct negD_reg(regD dst) %{
17949   predicate(UseAVX == 0);
17950   match(Set dst (NegD dst));
17951   ins_cost(150);
17952   format %{ "xorpd   $dst, [0x8000000000000000]\t"
17953             "# neg double by sign flipping" %}
17954   ins_encode %{
17955     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
17956   %}
17957   ins_pipe(pipe_slow);
17958 %}
17959 
17960 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
17961   predicate(UseAVX > 0);
17962   match(Set dst (NegD src));
17963   ins_cost(150);
17964   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
17965             "# neg double by sign flipping" %}
17966   ins_encode %{
17967     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
17968                  ExternalAddress(double_signflip()));
17969   %}
17970   ins_pipe(pipe_slow);
17971 %}
17972 
17973 // sqrtss instruction needs destination register to be pre initialized for best performance
17974 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17975 instruct sqrtF_reg(regF dst) %{
17976   match(Set dst (SqrtF dst));
17977   format %{ "sqrtss  $dst, $dst" %}
17978   ins_encode %{
17979     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
17980   %}
17981   ins_pipe(pipe_slow);
17982 %}
17983 
17984 // sqrtsd instruction needs destination register to be pre initialized for best performance
17985 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17986 instruct sqrtD_reg(regD dst) %{
17987   match(Set dst (SqrtD dst));
17988   format %{ "sqrtsd  $dst, $dst" %}
17989   ins_encode %{
17990     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
17991   %}
17992   ins_pipe(pipe_slow);
17993 %}
17994 
17995 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
17996   effect(TEMP tmp);
17997   match(Set dst (ConvF2HF src));
17998   ins_cost(125);
17999   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18000   ins_encode %{
18001     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18002   %}
18003   ins_pipe( pipe_slow );
18004 %}
18005 
18006 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18007   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18008   effect(TEMP ktmp, TEMP rtmp);
18009   match(Set mem (StoreC mem (ConvF2HF src)));
18010   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18011   ins_encode %{
18012     __ movl($rtmp$$Register, 0x1);
18013     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18014     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18015   %}
18016   ins_pipe( pipe_slow );
18017 %}
18018 
18019 instruct vconvF2HF(vec dst, vec src) %{
18020   match(Set dst (VectorCastF2HF src));
18021   format %{ "vector_conv_F2HF $dst $src" %}
18022   ins_encode %{
18023     int vlen_enc = vector_length_encoding(this, $src);
18024     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18025   %}
18026   ins_pipe( pipe_slow );
18027 %}
18028 
18029 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18030   predicate(n->as_StoreVector()->memory_size() >= 16);
18031   match(Set mem (StoreVector mem (VectorCastF2HF src)));
18032   format %{ "vcvtps2ph $mem,$src" %}
18033   ins_encode %{
18034     int vlen_enc = vector_length_encoding(this, $src);
18035     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18036   %}
18037   ins_pipe( pipe_slow );
18038 %}
18039 
18040 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18041   match(Set dst (ConvHF2F src));
18042   format %{ "vcvtph2ps $dst,$src" %}
18043   ins_encode %{
18044     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18045   %}
18046   ins_pipe( pipe_slow );
18047 %}
18048 
18049 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18050   match(Set dst (VectorCastHF2F (LoadVector mem)));
18051   format %{ "vcvtph2ps $dst,$mem" %}
18052   ins_encode %{
18053     int vlen_enc = vector_length_encoding(this);
18054     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18055   %}
18056   ins_pipe( pipe_slow );
18057 %}
18058 
18059 instruct vconvHF2F(vec dst, vec src) %{
18060   match(Set dst (VectorCastHF2F src));
18061   ins_cost(125);
18062   format %{ "vector_conv_HF2F $dst,$src" %}
18063   ins_encode %{
18064     int vlen_enc = vector_length_encoding(this);
18065     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18066   %}
18067   ins_pipe( pipe_slow );
18068 %}
18069 
18070 // ---------------------------------------- VectorReinterpret ------------------------------------
18071 instruct reinterpret_mask(kReg dst) %{
18072   predicate(n->bottom_type()->isa_vectmask() &&
18073             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18074   match(Set dst (VectorReinterpret dst));
18075   ins_cost(125);
18076   format %{ "vector_reinterpret $dst\t!" %}
18077   ins_encode %{
18078     // empty
18079   %}
18080   ins_pipe( pipe_slow );
18081 %}
18082 
18083 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18084   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18085             n->bottom_type()->isa_vectmask() &&
18086             n->in(1)->bottom_type()->isa_vectmask() &&
18087             n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18088             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18089   match(Set dst (VectorReinterpret src));
18090   effect(TEMP xtmp);
18091   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18092   ins_encode %{
18093      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18094      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18095      assert(src_sz == dst_sz , "src and dst size mismatch");
18096      int vlen_enc = vector_length_encoding(src_sz);
18097      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18098      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18099   %}
18100   ins_pipe( pipe_slow );
18101 %}
18102 
18103 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18104   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18105             n->bottom_type()->isa_vectmask() &&
18106             n->in(1)->bottom_type()->isa_vectmask() &&
18107             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18108              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18109             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18110   match(Set dst (VectorReinterpret src));
18111   effect(TEMP xtmp);
18112   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18113   ins_encode %{
18114      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18115      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18116      assert(src_sz == dst_sz , "src and dst size mismatch");
18117      int vlen_enc = vector_length_encoding(src_sz);
18118      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18119      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18120   %}
18121   ins_pipe( pipe_slow );
18122 %}
18123 
18124 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18125   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18126             n->bottom_type()->isa_vectmask() &&
18127             n->in(1)->bottom_type()->isa_vectmask() &&
18128             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18129              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18130             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18131   match(Set dst (VectorReinterpret src));
18132   effect(TEMP xtmp);
18133   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18134   ins_encode %{
18135      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18136      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18137      assert(src_sz == dst_sz , "src and dst size mismatch");
18138      int vlen_enc = vector_length_encoding(src_sz);
18139      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18140      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18141   %}
18142   ins_pipe( pipe_slow );
18143 %}
18144 
18145 instruct reinterpret(vec dst) %{
18146   predicate(!n->bottom_type()->isa_vectmask() &&
18147             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18148   match(Set dst (VectorReinterpret dst));
18149   ins_cost(125);
18150   format %{ "vector_reinterpret $dst\t!" %}
18151   ins_encode %{
18152     // empty
18153   %}
18154   ins_pipe( pipe_slow );
18155 %}
18156 
18157 instruct reinterpret_expand(vec dst, vec src) %{
18158   predicate(UseAVX == 0 &&
18159             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18160   match(Set dst (VectorReinterpret src));
18161   ins_cost(125);
18162   effect(TEMP dst);
18163   format %{ "vector_reinterpret_expand $dst,$src" %}
18164   ins_encode %{
18165     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
18166     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
18167 
18168     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18169     if (src_vlen_in_bytes == 4) {
18170       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18171     } else {
18172       assert(src_vlen_in_bytes == 8, "");
18173       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18174     }
18175     __ pand($dst$$XMMRegister, $src$$XMMRegister);
18176   %}
18177   ins_pipe( pipe_slow );
18178 %}
18179 
18180 instruct vreinterpret_expand4(legVec dst, vec src) %{
18181   predicate(UseAVX > 0 &&
18182             !n->bottom_type()->isa_vectmask() &&
18183             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18184             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18185   match(Set dst (VectorReinterpret src));
18186   ins_cost(125);
18187   format %{ "vector_reinterpret_expand $dst,$src" %}
18188   ins_encode %{
18189     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18190   %}
18191   ins_pipe( pipe_slow );
18192 %}
18193 
18194 
18195 instruct vreinterpret_expand(legVec dst, vec src) %{
18196   predicate(UseAVX > 0 &&
18197             !n->bottom_type()->isa_vectmask() &&
18198             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18199             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18200   match(Set dst (VectorReinterpret src));
18201   ins_cost(125);
18202   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18203   ins_encode %{
18204     switch (Matcher::vector_length_in_bytes(this, $src)) {
18205       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18206       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18207       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18208       default: ShouldNotReachHere();
18209     }
18210   %}
18211   ins_pipe( pipe_slow );
18212 %}
18213 
18214 instruct reinterpret_shrink(vec dst, legVec src) %{
18215   predicate(!n->bottom_type()->isa_vectmask() &&
18216             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18217   match(Set dst (VectorReinterpret src));
18218   ins_cost(125);
18219   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18220   ins_encode %{
18221     switch (Matcher::vector_length_in_bytes(this)) {
18222       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18223       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18224       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18225       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18226       default: ShouldNotReachHere();
18227     }
18228   %}
18229   ins_pipe( pipe_slow );
18230 %}
18231 
18232 // ----------------------------------------------------------------------------------------------------
18233 
18234 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18235   match(Set dst (RoundDoubleMode src rmode));
18236   format %{ "roundsd $dst,$src" %}
18237   ins_cost(150);
18238   ins_encode %{
18239     assert(UseSSE >= 4, "required");
18240     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18241       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18242     }
18243     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18244   %}
18245   ins_pipe(pipe_slow);
18246 %}
18247 
18248 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18249   match(Set dst (RoundDoubleMode con rmode));
18250   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18251   ins_cost(150);
18252   ins_encode %{
18253     assert(UseSSE >= 4, "required");
18254     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18255   %}
18256   ins_pipe(pipe_slow);
18257 %}
18258 
18259 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18260   predicate(Matcher::vector_length(n) < 8);
18261   match(Set dst (RoundDoubleModeV src rmode));
18262   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18263   ins_encode %{
18264     assert(UseAVX > 0, "required");
18265     int vlen_enc = vector_length_encoding(this);
18266     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18267   %}
18268   ins_pipe( pipe_slow );
18269 %}
18270 
18271 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18272   predicate(Matcher::vector_length(n) == 8);
18273   match(Set dst (RoundDoubleModeV src rmode));
18274   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18275   ins_encode %{
18276     assert(UseAVX > 2, "required");
18277     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18278   %}
18279   ins_pipe( pipe_slow );
18280 %}
18281 
18282 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18283   predicate(Matcher::vector_length(n) < 8);
18284   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18285   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18286   ins_encode %{
18287     assert(UseAVX > 0, "required");
18288     int vlen_enc = vector_length_encoding(this);
18289     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18290   %}
18291   ins_pipe( pipe_slow );
18292 %}
18293 
18294 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18295   predicate(Matcher::vector_length(n) == 8);
18296   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18297   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18298   ins_encode %{
18299     assert(UseAVX > 2, "required");
18300     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18301   %}
18302   ins_pipe( pipe_slow );
18303 %}
18304 
18305 instruct onspinwait() %{
18306   match(OnSpinWait);
18307   ins_cost(200);
18308 
18309   format %{
18310     $$template
18311     $$emit$$"pause\t! membar_onspinwait"
18312   %}
18313   ins_encode %{
18314     __ pause();
18315   %}
18316   ins_pipe(pipe_slow);
18317 %}
18318 
18319 // a * b + c
18320 instruct fmaD_reg(regD a, regD b, regD c) %{
18321   match(Set c (FmaD  c (Binary a b)));
18322   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18323   ins_cost(150);
18324   ins_encode %{
18325     assert(UseFMA, "Needs FMA instructions support.");
18326     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18327   %}
18328   ins_pipe( pipe_slow );
18329 %}
18330 
18331 // a * b + c
18332 instruct fmaF_reg(regF a, regF b, regF c) %{
18333   match(Set c (FmaF  c (Binary a b)));
18334   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18335   ins_cost(150);
18336   ins_encode %{
18337     assert(UseFMA, "Needs FMA instructions support.");
18338     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18339   %}
18340   ins_pipe( pipe_slow );
18341 %}
18342 
18343 // ====================VECTOR INSTRUCTIONS=====================================
18344 
18345 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18346 instruct MoveVec2Leg(legVec dst, vec src) %{
18347   match(Set dst src);
18348   format %{ "" %}
18349   ins_encode %{
18350     ShouldNotReachHere();
18351   %}
18352   ins_pipe( fpu_reg_reg );
18353 %}
18354 
18355 instruct MoveLeg2Vec(vec dst, legVec src) %{
18356   match(Set dst src);
18357   format %{ "" %}
18358   ins_encode %{
18359     ShouldNotReachHere();
18360   %}
18361   ins_pipe( fpu_reg_reg );
18362 %}
18363 
18364 // ============================================================================
18365 
18366 // Load vectors generic operand pattern
18367 instruct loadV(vec dst, memory mem) %{
18368   match(Set dst (LoadVector mem));
18369   ins_cost(125);
18370   format %{ "load_vector $dst,$mem" %}
18371   ins_encode %{
18372     BasicType bt = Matcher::vector_element_basic_type(this);
18373     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18374   %}
18375   ins_pipe( pipe_slow );
18376 %}
18377 
18378 // Store vectors generic operand pattern.
18379 instruct storeV(memory mem, vec src) %{
18380   match(Set mem (StoreVector mem src));
18381   ins_cost(145);
18382   format %{ "store_vector $mem,$src\n\t" %}
18383   ins_encode %{
18384     switch (Matcher::vector_length_in_bytes(this, $src)) {
18385       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
18386       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
18387       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
18388       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
18389       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18390       default: ShouldNotReachHere();
18391     }
18392   %}
18393   ins_pipe( pipe_slow );
18394 %}
18395 
18396 // ---------------------------------------- Gather ------------------------------------
18397 
18398 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18399 
18400 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18401   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18402             Matcher::vector_length_in_bytes(n) <= 32);
18403   match(Set dst (LoadVectorGather mem idx));
18404   effect(TEMP dst, TEMP tmp, TEMP mask);
18405   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18406   ins_encode %{
18407     int vlen_enc = vector_length_encoding(this);
18408     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18409     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18410     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18411     __ lea($tmp$$Register, $mem$$Address);
18412     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18413   %}
18414   ins_pipe( pipe_slow );
18415 %}
18416 
18417 
18418 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18419   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18420             !is_subword_type(Matcher::vector_element_basic_type(n)));
18421   match(Set dst (LoadVectorGather mem idx));
18422   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18423   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18424   ins_encode %{
18425     int vlen_enc = vector_length_encoding(this);
18426     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18427     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18428     __ lea($tmp$$Register, $mem$$Address);
18429     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18430   %}
18431   ins_pipe( pipe_slow );
18432 %}
18433 
18434 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18435   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18436             !is_subword_type(Matcher::vector_element_basic_type(n)));
18437   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18438   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18439   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18440   ins_encode %{
18441     assert(UseAVX > 2, "sanity");
18442     int vlen_enc = vector_length_encoding(this);
18443     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18444     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18445     // Note: Since gather instruction partially updates the opmask register used
18446     // for predication hense moving mask operand to a temporary.
18447     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18448     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18449     __ lea($tmp$$Register, $mem$$Address);
18450     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18451   %}
18452   ins_pipe( pipe_slow );
18453 %}
18454 
18455 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18456   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18457   match(Set dst (LoadVectorGather mem idx_base));
18458   effect(TEMP tmp, TEMP rtmp);
18459   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18460   ins_encode %{
18461     int vlen_enc = vector_length_encoding(this);
18462     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18463     __ lea($tmp$$Register, $mem$$Address);
18464     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18465   %}
18466   ins_pipe( pipe_slow );
18467 %}
18468 
18469 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18470                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18471   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18472   match(Set dst (LoadVectorGather mem idx_base));
18473   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18474   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18475   ins_encode %{
18476     int vlen_enc = vector_length_encoding(this);
18477     int vector_len = Matcher::vector_length(this);
18478     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18479     __ lea($tmp$$Register, $mem$$Address);
18480     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18481     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18482                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18483   %}
18484   ins_pipe( pipe_slow );
18485 %}
18486 
18487 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18488   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18489   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18490   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18491   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18492   ins_encode %{
18493     int vlen_enc = vector_length_encoding(this);
18494     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18495     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18496     __ lea($tmp$$Register, $mem$$Address);
18497     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18498     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18499   %}
18500   ins_pipe( pipe_slow );
18501 %}
18502 
18503 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18504                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18505   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18506   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18507   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18508   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18509   ins_encode %{
18510     int vlen_enc = vector_length_encoding(this);
18511     int vector_len = Matcher::vector_length(this);
18512     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18513     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18514     __ lea($tmp$$Register, $mem$$Address);
18515     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18516     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18517     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18518                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18519   %}
18520   ins_pipe( pipe_slow );
18521 %}
18522 
18523 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18524   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18525   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18526   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18527   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18528   ins_encode %{
18529     int vlen_enc = vector_length_encoding(this);
18530     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18531     __ lea($tmp$$Register, $mem$$Address);
18532     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18533     if (elem_bt == T_SHORT) {
18534       __ movl($mask_idx$$Register, 0x55555555);
18535       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18536     }
18537     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18538     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18539   %}
18540   ins_pipe( pipe_slow );
18541 %}
18542 
18543 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18544                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18545   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18546   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18547   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18548   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18549   ins_encode %{
18550     int vlen_enc = vector_length_encoding(this);
18551     int vector_len = Matcher::vector_length(this);
18552     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18553     __ lea($tmp$$Register, $mem$$Address);
18554     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18555     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18556     if (elem_bt == T_SHORT) {
18557       __ movl($mask_idx$$Register, 0x55555555);
18558       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18559     }
18560     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18561     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18562                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18563   %}
18564   ins_pipe( pipe_slow );
18565 %}
18566 
18567 // ====================Scatter=======================================
18568 
18569 // Scatter INT, LONG, FLOAT, DOUBLE
18570 
18571 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18572   predicate(UseAVX > 2);
18573   match(Set mem (StoreVectorScatter mem (Binary src idx)));
18574   effect(TEMP tmp, TEMP ktmp);
18575   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18576   ins_encode %{
18577     int vlen_enc = vector_length_encoding(this, $src);
18578     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18579 
18580     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18581     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18582 
18583     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18584     __ lea($tmp$$Register, $mem$$Address);
18585     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18586   %}
18587   ins_pipe( pipe_slow );
18588 %}
18589 
18590 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18591   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18592   effect(TEMP tmp, TEMP ktmp);
18593   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18594   ins_encode %{
18595     int vlen_enc = vector_length_encoding(this, $src);
18596     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18597     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18598     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18599     // Note: Since scatter instruction partially updates the opmask register used
18600     // for predication hense moving mask operand to a temporary.
18601     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18602     __ lea($tmp$$Register, $mem$$Address);
18603     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18604   %}
18605   ins_pipe( pipe_slow );
18606 %}
18607 
18608 // ====================REPLICATE=======================================
18609 
18610 // Replicate byte scalar to be vector
18611 instruct vReplB_reg(vec dst, rRegI src) %{
18612   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18613   match(Set dst (Replicate src));
18614   format %{ "replicateB $dst,$src" %}
18615   ins_encode %{
18616     uint vlen = Matcher::vector_length(this);
18617     if (UseAVX >= 2) {
18618       int vlen_enc = vector_length_encoding(this);
18619       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18620         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18621         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18622       } else {
18623         __ movdl($dst$$XMMRegister, $src$$Register);
18624         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18625       }
18626     } else {
18627        assert(UseAVX < 2, "");
18628       __ movdl($dst$$XMMRegister, $src$$Register);
18629       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18630       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18631       if (vlen >= 16) {
18632         assert(vlen == 16, "");
18633         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18634       }
18635     }
18636   %}
18637   ins_pipe( pipe_slow );
18638 %}
18639 
18640 instruct ReplB_mem(vec dst, memory mem) %{
18641   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18642   match(Set dst (Replicate (LoadB mem)));
18643   format %{ "replicateB $dst,$mem" %}
18644   ins_encode %{
18645     int vlen_enc = vector_length_encoding(this);
18646     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18647   %}
18648   ins_pipe( pipe_slow );
18649 %}
18650 
18651 // ====================ReplicateS=======================================
18652 
18653 instruct vReplS_reg(vec dst, rRegI src) %{
18654   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18655   match(Set dst (Replicate src));
18656   format %{ "replicateS $dst,$src" %}
18657   ins_encode %{
18658     uint vlen = Matcher::vector_length(this);
18659     int vlen_enc = vector_length_encoding(this);
18660     if (UseAVX >= 2) {
18661       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18662         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18663         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18664       } else {
18665         __ movdl($dst$$XMMRegister, $src$$Register);
18666         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18667       }
18668     } else {
18669       assert(UseAVX < 2, "");
18670       __ movdl($dst$$XMMRegister, $src$$Register);
18671       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18672       if (vlen >= 8) {
18673         assert(vlen == 8, "");
18674         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18675       }
18676     }
18677   %}
18678   ins_pipe( pipe_slow );
18679 %}
18680 
18681 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18682   match(Set dst (Replicate con));
18683   effect(TEMP rtmp);
18684   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18685   ins_encode %{
18686     int vlen_enc = vector_length_encoding(this);
18687     BasicType bt = Matcher::vector_element_basic_type(this);
18688     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18689     __ movl($rtmp$$Register, $con$$constant);
18690     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18691   %}
18692   ins_pipe( pipe_slow );
18693 %}
18694 
18695 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18696   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18697   match(Set dst (Replicate src));
18698   effect(TEMP rtmp);
18699   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18700   ins_encode %{
18701     int vlen_enc = vector_length_encoding(this);
18702     __ vmovw($rtmp$$Register, $src$$XMMRegister);
18703     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18704   %}
18705   ins_pipe( pipe_slow );
18706 %}
18707 
18708 instruct ReplS_mem(vec dst, memory mem) %{
18709   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18710   match(Set dst (Replicate (LoadS mem)));
18711   format %{ "replicateS $dst,$mem" %}
18712   ins_encode %{
18713     int vlen_enc = vector_length_encoding(this);
18714     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18715   %}
18716   ins_pipe( pipe_slow );
18717 %}
18718 
18719 // ====================ReplicateI=======================================
18720 
18721 instruct ReplI_reg(vec dst, rRegI src) %{
18722   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18723   match(Set dst (Replicate src));
18724   format %{ "replicateI $dst,$src" %}
18725   ins_encode %{
18726     uint vlen = Matcher::vector_length(this);
18727     int vlen_enc = vector_length_encoding(this);
18728     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18729       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18730     } else if (VM_Version::supports_avx2()) {
18731       __ movdl($dst$$XMMRegister, $src$$Register);
18732       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18733     } else {
18734       __ movdl($dst$$XMMRegister, $src$$Register);
18735       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18736     }
18737   %}
18738   ins_pipe( pipe_slow );
18739 %}
18740 
18741 instruct ReplI_mem(vec dst, memory mem) %{
18742   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18743   match(Set dst (Replicate (LoadI mem)));
18744   format %{ "replicateI $dst,$mem" %}
18745   ins_encode %{
18746     int vlen_enc = vector_length_encoding(this);
18747     if (VM_Version::supports_avx2()) {
18748       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18749     } else if (VM_Version::supports_avx()) {
18750       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18751     } else {
18752       __ movdl($dst$$XMMRegister, $mem$$Address);
18753       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18754     }
18755   %}
18756   ins_pipe( pipe_slow );
18757 %}
18758 
18759 instruct ReplI_imm(vec dst, immI con) %{
18760   predicate(Matcher::is_non_long_integral_vector(n));
18761   match(Set dst (Replicate con));
18762   format %{ "replicateI $dst,$con" %}
18763   ins_encode %{
18764     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18765                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18766                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
18767     BasicType bt = Matcher::vector_element_basic_type(this);
18768     int vlen = Matcher::vector_length_in_bytes(this);
18769     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18770   %}
18771   ins_pipe( pipe_slow );
18772 %}
18773 
18774 // Replicate scalar zero to be vector
18775 instruct ReplI_zero(vec dst, immI_0 zero) %{
18776   predicate(Matcher::is_non_long_integral_vector(n));
18777   match(Set dst (Replicate zero));
18778   format %{ "replicateI $dst,$zero" %}
18779   ins_encode %{
18780     int vlen_enc = vector_length_encoding(this);
18781     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18782       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18783     } else {
18784       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18785     }
18786   %}
18787   ins_pipe( fpu_reg_reg );
18788 %}
18789 
18790 instruct ReplI_M1(vec dst, immI_M1 con) %{
18791   predicate(Matcher::is_non_long_integral_vector(n));
18792   match(Set dst (Replicate con));
18793   format %{ "vallones $dst" %}
18794   ins_encode %{
18795     int vector_len = vector_length_encoding(this);
18796     __ vallones($dst$$XMMRegister, vector_len);
18797   %}
18798   ins_pipe( pipe_slow );
18799 %}
18800 
18801 // ====================ReplicateL=======================================
18802 
18803 // Replicate long (8 byte) scalar to be vector
18804 instruct ReplL_reg(vec dst, rRegL src) %{
18805   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18806   match(Set dst (Replicate src));
18807   format %{ "replicateL $dst,$src" %}
18808   ins_encode %{
18809     int vlen = Matcher::vector_length(this);
18810     int vlen_enc = vector_length_encoding(this);
18811     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18812       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18813     } else if (VM_Version::supports_avx2()) {
18814       __ movdq($dst$$XMMRegister, $src$$Register);
18815       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18816     } else {
18817       __ movdq($dst$$XMMRegister, $src$$Register);
18818       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18819     }
18820   %}
18821   ins_pipe( pipe_slow );
18822 %}
18823 
18824 instruct ReplL_mem(vec dst, memory mem) %{
18825   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18826   match(Set dst (Replicate (LoadL mem)));
18827   format %{ "replicateL $dst,$mem" %}
18828   ins_encode %{
18829     int vlen_enc = vector_length_encoding(this);
18830     if (VM_Version::supports_avx2()) {
18831       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18832     } else if (VM_Version::supports_sse3()) {
18833       __ movddup($dst$$XMMRegister, $mem$$Address);
18834     } else {
18835       __ movq($dst$$XMMRegister, $mem$$Address);
18836       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18837     }
18838   %}
18839   ins_pipe( pipe_slow );
18840 %}
18841 
18842 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18843 instruct ReplL_imm(vec dst, immL con) %{
18844   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18845   match(Set dst (Replicate con));
18846   format %{ "replicateL $dst,$con" %}
18847   ins_encode %{
18848     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18849     int vlen = Matcher::vector_length_in_bytes(this);
18850     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18851   %}
18852   ins_pipe( pipe_slow );
18853 %}
18854 
18855 instruct ReplL_zero(vec dst, immL0 zero) %{
18856   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18857   match(Set dst (Replicate zero));
18858   format %{ "replicateL $dst,$zero" %}
18859   ins_encode %{
18860     int vlen_enc = vector_length_encoding(this);
18861     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18862       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18863     } else {
18864       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18865     }
18866   %}
18867   ins_pipe( fpu_reg_reg );
18868 %}
18869 
18870 instruct ReplL_M1(vec dst, immL_M1 con) %{
18871   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18872   match(Set dst (Replicate con));
18873   format %{ "vallones $dst" %}
18874   ins_encode %{
18875     int vector_len = vector_length_encoding(this);
18876     __ vallones($dst$$XMMRegister, vector_len);
18877   %}
18878   ins_pipe( pipe_slow );
18879 %}
18880 
18881 // ====================ReplicateF=======================================
18882 
18883 instruct vReplF_reg(vec dst, vlRegF src) %{
18884   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18885   match(Set dst (Replicate src));
18886   format %{ "replicateF $dst,$src" %}
18887   ins_encode %{
18888     uint vlen = Matcher::vector_length(this);
18889     int vlen_enc = vector_length_encoding(this);
18890     if (vlen <= 4) {
18891       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18892     } else if (VM_Version::supports_avx2()) {
18893       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18894     } else {
18895       assert(vlen == 8, "sanity");
18896       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18897       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18898     }
18899   %}
18900   ins_pipe( pipe_slow );
18901 %}
18902 
18903 instruct ReplF_reg(vec dst, vlRegF src) %{
18904   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18905   match(Set dst (Replicate src));
18906   format %{ "replicateF $dst,$src" %}
18907   ins_encode %{
18908     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
18909   %}
18910   ins_pipe( pipe_slow );
18911 %}
18912 
18913 instruct ReplF_mem(vec dst, memory mem) %{
18914   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18915   match(Set dst (Replicate (LoadF mem)));
18916   format %{ "replicateF $dst,$mem" %}
18917   ins_encode %{
18918     int vlen_enc = vector_length_encoding(this);
18919     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18920   %}
18921   ins_pipe( pipe_slow );
18922 %}
18923 
18924 // Replicate float scalar immediate to be vector by loading from const table.
18925 instruct ReplF_imm(vec dst, immF con) %{
18926   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18927   match(Set dst (Replicate con));
18928   format %{ "replicateF $dst,$con" %}
18929   ins_encode %{
18930     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
18931                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
18932     int vlen = Matcher::vector_length_in_bytes(this);
18933     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
18934   %}
18935   ins_pipe( pipe_slow );
18936 %}
18937 
18938 instruct ReplF_zero(vec dst, immF0 zero) %{
18939   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18940   match(Set dst (Replicate zero));
18941   format %{ "replicateF $dst,$zero" %}
18942   ins_encode %{
18943     int vlen_enc = vector_length_encoding(this);
18944     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18945       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18946     } else {
18947       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18948     }
18949   %}
18950   ins_pipe( fpu_reg_reg );
18951 %}
18952 
18953 // ====================ReplicateD=======================================
18954 
18955 // Replicate double (8 bytes) scalar to be vector
18956 instruct vReplD_reg(vec dst, vlRegD src) %{
18957   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18958   match(Set dst (Replicate src));
18959   format %{ "replicateD $dst,$src" %}
18960   ins_encode %{
18961     uint vlen = Matcher::vector_length(this);
18962     int vlen_enc = vector_length_encoding(this);
18963     if (vlen <= 2) {
18964       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18965     } else if (VM_Version::supports_avx2()) {
18966       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18967     } else {
18968       assert(vlen == 4, "sanity");
18969       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18970       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18971     }
18972   %}
18973   ins_pipe( pipe_slow );
18974 %}
18975 
18976 instruct ReplD_reg(vec dst, vlRegD src) %{
18977   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18978   match(Set dst (Replicate src));
18979   format %{ "replicateD $dst,$src" %}
18980   ins_encode %{
18981     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
18982   %}
18983   ins_pipe( pipe_slow );
18984 %}
18985 
18986 instruct ReplD_mem(vec dst, memory mem) %{
18987   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18988   match(Set dst (Replicate (LoadD mem)));
18989   format %{ "replicateD $dst,$mem" %}
18990   ins_encode %{
18991     if (Matcher::vector_length(this) >= 4) {
18992       int vlen_enc = vector_length_encoding(this);
18993       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18994     } else {
18995       __ movddup($dst$$XMMRegister, $mem$$Address);
18996     }
18997   %}
18998   ins_pipe( pipe_slow );
18999 %}
19000 
19001 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19002 instruct ReplD_imm(vec dst, immD con) %{
19003   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19004   match(Set dst (Replicate con));
19005   format %{ "replicateD $dst,$con" %}
19006   ins_encode %{
19007     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19008     int vlen = Matcher::vector_length_in_bytes(this);
19009     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19010   %}
19011   ins_pipe( pipe_slow );
19012 %}
19013 
19014 instruct ReplD_zero(vec dst, immD0 zero) %{
19015   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19016   match(Set dst (Replicate zero));
19017   format %{ "replicateD $dst,$zero" %}
19018   ins_encode %{
19019     int vlen_enc = vector_length_encoding(this);
19020     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19021       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19022     } else {
19023       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19024     }
19025   %}
19026   ins_pipe( fpu_reg_reg );
19027 %}
19028 
19029 // ====================VECTOR INSERT=======================================
19030 
19031 instruct insert(vec dst, rRegI val, immU8 idx) %{
19032   predicate(Matcher::vector_length_in_bytes(n) < 32);
19033   match(Set dst (VectorInsert (Binary dst val) idx));
19034   format %{ "vector_insert $dst,$val,$idx" %}
19035   ins_encode %{
19036     assert(UseSSE >= 4, "required");
19037     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19038 
19039     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19040 
19041     assert(is_integral_type(elem_bt), "");
19042     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19043 
19044     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19045   %}
19046   ins_pipe( pipe_slow );
19047 %}
19048 
19049 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19050   predicate(Matcher::vector_length_in_bytes(n) == 32);
19051   match(Set dst (VectorInsert (Binary src val) idx));
19052   effect(TEMP vtmp);
19053   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19054   ins_encode %{
19055     int vlen_enc = Assembler::AVX_256bit;
19056     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19057     int elem_per_lane = 16/type2aelembytes(elem_bt);
19058     int log2epr = log2(elem_per_lane);
19059 
19060     assert(is_integral_type(elem_bt), "sanity");
19061     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19062 
19063     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19064     uint y_idx = ($idx$$constant >> log2epr) & 1;
19065     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19066     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19067     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19068   %}
19069   ins_pipe( pipe_slow );
19070 %}
19071 
19072 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19073   predicate(Matcher::vector_length_in_bytes(n) == 64);
19074   match(Set dst (VectorInsert (Binary src val) idx));
19075   effect(TEMP vtmp);
19076   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19077   ins_encode %{
19078     assert(UseAVX > 2, "sanity");
19079 
19080     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19081     int elem_per_lane = 16/type2aelembytes(elem_bt);
19082     int log2epr = log2(elem_per_lane);
19083 
19084     assert(is_integral_type(elem_bt), "");
19085     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19086 
19087     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19088     uint y_idx = ($idx$$constant >> log2epr) & 3;
19089     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19090     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19091     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19092   %}
19093   ins_pipe( pipe_slow );
19094 %}
19095 
19096 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19097   predicate(Matcher::vector_length(n) == 2);
19098   match(Set dst (VectorInsert (Binary dst val) idx));
19099   format %{ "vector_insert $dst,$val,$idx" %}
19100   ins_encode %{
19101     assert(UseSSE >= 4, "required");
19102     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19103     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19104 
19105     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19106   %}
19107   ins_pipe( pipe_slow );
19108 %}
19109 
19110 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19111   predicate(Matcher::vector_length(n) == 4);
19112   match(Set dst (VectorInsert (Binary src val) idx));
19113   effect(TEMP vtmp);
19114   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19115   ins_encode %{
19116     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19117     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19118 
19119     uint x_idx = $idx$$constant & right_n_bits(1);
19120     uint y_idx = ($idx$$constant >> 1) & 1;
19121     int vlen_enc = Assembler::AVX_256bit;
19122     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19123     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19124     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19125   %}
19126   ins_pipe( pipe_slow );
19127 %}
19128 
19129 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19130   predicate(Matcher::vector_length(n) == 8);
19131   match(Set dst (VectorInsert (Binary src val) idx));
19132   effect(TEMP vtmp);
19133   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19134   ins_encode %{
19135     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19136     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19137 
19138     uint x_idx = $idx$$constant & right_n_bits(1);
19139     uint y_idx = ($idx$$constant >> 1) & 3;
19140     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19141     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19142     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19143   %}
19144   ins_pipe( pipe_slow );
19145 %}
19146 
19147 instruct insertF(vec dst, regF val, immU8 idx) %{
19148   predicate(Matcher::vector_length(n) < 8);
19149   match(Set dst (VectorInsert (Binary dst val) idx));
19150   format %{ "vector_insert $dst,$val,$idx" %}
19151   ins_encode %{
19152     assert(UseSSE >= 4, "sanity");
19153 
19154     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19155     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19156 
19157     uint x_idx = $idx$$constant & right_n_bits(2);
19158     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19159   %}
19160   ins_pipe( pipe_slow );
19161 %}
19162 
19163 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19164   predicate(Matcher::vector_length(n) >= 8);
19165   match(Set dst (VectorInsert (Binary src val) idx));
19166   effect(TEMP vtmp);
19167   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19168   ins_encode %{
19169     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19170     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19171 
19172     int vlen = Matcher::vector_length(this);
19173     uint x_idx = $idx$$constant & right_n_bits(2);
19174     if (vlen == 8) {
19175       uint y_idx = ($idx$$constant >> 2) & 1;
19176       int vlen_enc = Assembler::AVX_256bit;
19177       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19178       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19179       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19180     } else {
19181       assert(vlen == 16, "sanity");
19182       uint y_idx = ($idx$$constant >> 2) & 3;
19183       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19184       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19185       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19186     }
19187   %}
19188   ins_pipe( pipe_slow );
19189 %}
19190 
19191 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19192   predicate(Matcher::vector_length(n) == 2);
19193   match(Set dst (VectorInsert (Binary dst val) idx));
19194   effect(TEMP tmp);
19195   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19196   ins_encode %{
19197     assert(UseSSE >= 4, "sanity");
19198     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19199     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19200 
19201     __ movq($tmp$$Register, $val$$XMMRegister);
19202     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19203   %}
19204   ins_pipe( pipe_slow );
19205 %}
19206 
19207 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19208   predicate(Matcher::vector_length(n) == 4);
19209   match(Set dst (VectorInsert (Binary src val) idx));
19210   effect(TEMP vtmp, TEMP tmp);
19211   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19212   ins_encode %{
19213     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19214     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19215 
19216     uint x_idx = $idx$$constant & right_n_bits(1);
19217     uint y_idx = ($idx$$constant >> 1) & 1;
19218     int vlen_enc = Assembler::AVX_256bit;
19219     __ movq($tmp$$Register, $val$$XMMRegister);
19220     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19221     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19222     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19223   %}
19224   ins_pipe( pipe_slow );
19225 %}
19226 
19227 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19228   predicate(Matcher::vector_length(n) == 8);
19229   match(Set dst (VectorInsert (Binary src val) idx));
19230   effect(TEMP tmp, TEMP vtmp);
19231   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19232   ins_encode %{
19233     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19234     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19235 
19236     uint x_idx = $idx$$constant & right_n_bits(1);
19237     uint y_idx = ($idx$$constant >> 1) & 3;
19238     __ movq($tmp$$Register, $val$$XMMRegister);
19239     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19240     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19241     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19242   %}
19243   ins_pipe( pipe_slow );
19244 %}
19245 
19246 // ====================REDUCTION ARITHMETIC=======================================
19247 
19248 // =======================Int Reduction==========================================
19249 
19250 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19251   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19252   match(Set dst (AddReductionVI src1 src2));
19253   match(Set dst (MulReductionVI src1 src2));
19254   match(Set dst (AndReductionV  src1 src2));
19255   match(Set dst ( OrReductionV  src1 src2));
19256   match(Set dst (XorReductionV  src1 src2));
19257   match(Set dst (MinReductionV  src1 src2));
19258   match(Set dst (MaxReductionV  src1 src2));
19259   effect(TEMP vtmp1, TEMP vtmp2);
19260   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19261   ins_encode %{
19262     int opcode = this->ideal_Opcode();
19263     int vlen = Matcher::vector_length(this, $src2);
19264     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19265   %}
19266   ins_pipe( pipe_slow );
19267 %}
19268 
19269 // =======================Long Reduction==========================================
19270 
19271 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19272   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19273   match(Set dst (AddReductionVL src1 src2));
19274   match(Set dst (MulReductionVL src1 src2));
19275   match(Set dst (AndReductionV  src1 src2));
19276   match(Set dst ( OrReductionV  src1 src2));
19277   match(Set dst (XorReductionV  src1 src2));
19278   match(Set dst (MinReductionV  src1 src2));
19279   match(Set dst (MaxReductionV  src1 src2));
19280   effect(TEMP vtmp1, TEMP vtmp2);
19281   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19282   ins_encode %{
19283     int opcode = this->ideal_Opcode();
19284     int vlen = Matcher::vector_length(this, $src2);
19285     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19286   %}
19287   ins_pipe( pipe_slow );
19288 %}
19289 
19290 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19291   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19292   match(Set dst (AddReductionVL src1 src2));
19293   match(Set dst (MulReductionVL src1 src2));
19294   match(Set dst (AndReductionV  src1 src2));
19295   match(Set dst ( OrReductionV  src1 src2));
19296   match(Set dst (XorReductionV  src1 src2));
19297   match(Set dst (MinReductionV  src1 src2));
19298   match(Set dst (MaxReductionV  src1 src2));
19299   effect(TEMP vtmp1, TEMP vtmp2);
19300   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19301   ins_encode %{
19302     int opcode = this->ideal_Opcode();
19303     int vlen = Matcher::vector_length(this, $src2);
19304     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19305   %}
19306   ins_pipe( pipe_slow );
19307 %}
19308 
19309 // =======================Float Reduction==========================================
19310 
19311 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19312   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19313   match(Set dst (AddReductionVF dst src));
19314   match(Set dst (MulReductionVF dst src));
19315   effect(TEMP dst, TEMP vtmp);
19316   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
19317   ins_encode %{
19318     int opcode = this->ideal_Opcode();
19319     int vlen = Matcher::vector_length(this, $src);
19320     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19321   %}
19322   ins_pipe( pipe_slow );
19323 %}
19324 
19325 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19326   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19327   match(Set dst (AddReductionVF dst src));
19328   match(Set dst (MulReductionVF dst src));
19329   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19330   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19331   ins_encode %{
19332     int opcode = this->ideal_Opcode();
19333     int vlen = Matcher::vector_length(this, $src);
19334     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19335   %}
19336   ins_pipe( pipe_slow );
19337 %}
19338 
19339 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19340   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19341   match(Set dst (AddReductionVF dst src));
19342   match(Set dst (MulReductionVF dst src));
19343   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19344   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19345   ins_encode %{
19346     int opcode = this->ideal_Opcode();
19347     int vlen = Matcher::vector_length(this, $src);
19348     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19349   %}
19350   ins_pipe( pipe_slow );
19351 %}
19352 
19353 
19354 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19355   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19356   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19357   // src1 contains reduction identity
19358   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19359   match(Set dst (AddReductionVF src1 src2));
19360   match(Set dst (MulReductionVF src1 src2));
19361   effect(TEMP dst);
19362   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
19363   ins_encode %{
19364     int opcode = this->ideal_Opcode();
19365     int vlen = Matcher::vector_length(this, $src2);
19366     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19367   %}
19368   ins_pipe( pipe_slow );
19369 %}
19370 
19371 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19372   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19373   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19374   // src1 contains reduction identity
19375   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19376   match(Set dst (AddReductionVF src1 src2));
19377   match(Set dst (MulReductionVF src1 src2));
19378   effect(TEMP dst, TEMP vtmp);
19379   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19380   ins_encode %{
19381     int opcode = this->ideal_Opcode();
19382     int vlen = Matcher::vector_length(this, $src2);
19383     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19384   %}
19385   ins_pipe( pipe_slow );
19386 %}
19387 
19388 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19389   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19390   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19391   // src1 contains reduction identity
19392   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19393   match(Set dst (AddReductionVF src1 src2));
19394   match(Set dst (MulReductionVF src1 src2));
19395   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19396   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19397   ins_encode %{
19398     int opcode = this->ideal_Opcode();
19399     int vlen = Matcher::vector_length(this, $src2);
19400     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19401   %}
19402   ins_pipe( pipe_slow );
19403 %}
19404 
19405 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19406   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19407   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19408   // src1 contains reduction identity
19409   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19410   match(Set dst (AddReductionVF src1 src2));
19411   match(Set dst (MulReductionVF src1 src2));
19412   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19413   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19414   ins_encode %{
19415     int opcode = this->ideal_Opcode();
19416     int vlen = Matcher::vector_length(this, $src2);
19417     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19418   %}
19419   ins_pipe( pipe_slow );
19420 %}
19421 
19422 // =======================Double Reduction==========================================
19423 
19424 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19425   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19426   match(Set dst (AddReductionVD dst src));
19427   match(Set dst (MulReductionVD dst src));
19428   effect(TEMP dst, TEMP vtmp);
19429   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19430   ins_encode %{
19431     int opcode = this->ideal_Opcode();
19432     int vlen = Matcher::vector_length(this, $src);
19433     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19434 %}
19435   ins_pipe( pipe_slow );
19436 %}
19437 
19438 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19439   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19440   match(Set dst (AddReductionVD dst src));
19441   match(Set dst (MulReductionVD dst src));
19442   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19443   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19444   ins_encode %{
19445     int opcode = this->ideal_Opcode();
19446     int vlen = Matcher::vector_length(this, $src);
19447     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19448   %}
19449   ins_pipe( pipe_slow );
19450 %}
19451 
19452 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19453   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19454   match(Set dst (AddReductionVD dst src));
19455   match(Set dst (MulReductionVD dst src));
19456   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19457   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19458   ins_encode %{
19459     int opcode = this->ideal_Opcode();
19460     int vlen = Matcher::vector_length(this, $src);
19461     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19462   %}
19463   ins_pipe( pipe_slow );
19464 %}
19465 
19466 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19467   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19468   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19469   // src1 contains reduction identity
19470   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19471   match(Set dst (AddReductionVD src1 src2));
19472   match(Set dst (MulReductionVD src1 src2));
19473   effect(TEMP dst);
19474   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19475   ins_encode %{
19476     int opcode = this->ideal_Opcode();
19477     int vlen = Matcher::vector_length(this, $src2);
19478     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19479 %}
19480   ins_pipe( pipe_slow );
19481 %}
19482 
19483 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19484   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19485   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19486   // src1 contains reduction identity
19487   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19488   match(Set dst (AddReductionVD src1 src2));
19489   match(Set dst (MulReductionVD src1 src2));
19490   effect(TEMP dst, TEMP vtmp);
19491   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19492   ins_encode %{
19493     int opcode = this->ideal_Opcode();
19494     int vlen = Matcher::vector_length(this, $src2);
19495     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19496   %}
19497   ins_pipe( pipe_slow );
19498 %}
19499 
19500 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19501   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19502   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19503   // src1 contains reduction identity
19504   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19505   match(Set dst (AddReductionVD src1 src2));
19506   match(Set dst (MulReductionVD src1 src2));
19507   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19508   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19509   ins_encode %{
19510     int opcode = this->ideal_Opcode();
19511     int vlen = Matcher::vector_length(this, $src2);
19512     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19513   %}
19514   ins_pipe( pipe_slow );
19515 %}
19516 
19517 // =======================Byte Reduction==========================================
19518 
19519 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19520   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19521   match(Set dst (AddReductionVI src1 src2));
19522   match(Set dst (AndReductionV  src1 src2));
19523   match(Set dst ( OrReductionV  src1 src2));
19524   match(Set dst (XorReductionV  src1 src2));
19525   match(Set dst (MinReductionV  src1 src2));
19526   match(Set dst (MaxReductionV  src1 src2));
19527   effect(TEMP vtmp1, TEMP vtmp2);
19528   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19529   ins_encode %{
19530     int opcode = this->ideal_Opcode();
19531     int vlen = Matcher::vector_length(this, $src2);
19532     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19533   %}
19534   ins_pipe( pipe_slow );
19535 %}
19536 
19537 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19538   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19539   match(Set dst (AddReductionVI src1 src2));
19540   match(Set dst (AndReductionV  src1 src2));
19541   match(Set dst ( OrReductionV  src1 src2));
19542   match(Set dst (XorReductionV  src1 src2));
19543   match(Set dst (MinReductionV  src1 src2));
19544   match(Set dst (MaxReductionV  src1 src2));
19545   effect(TEMP vtmp1, TEMP vtmp2);
19546   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19547   ins_encode %{
19548     int opcode = this->ideal_Opcode();
19549     int vlen = Matcher::vector_length(this, $src2);
19550     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19551   %}
19552   ins_pipe( pipe_slow );
19553 %}
19554 
19555 // =======================Short Reduction==========================================
19556 
19557 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19558   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19559   match(Set dst (AddReductionVI src1 src2));
19560   match(Set dst (MulReductionVI src1 src2));
19561   match(Set dst (AndReductionV  src1 src2));
19562   match(Set dst ( OrReductionV  src1 src2));
19563   match(Set dst (XorReductionV  src1 src2));
19564   match(Set dst (MinReductionV  src1 src2));
19565   match(Set dst (MaxReductionV  src1 src2));
19566   effect(TEMP vtmp1, TEMP vtmp2);
19567   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19568   ins_encode %{
19569     int opcode = this->ideal_Opcode();
19570     int vlen = Matcher::vector_length(this, $src2);
19571     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19572   %}
19573   ins_pipe( pipe_slow );
19574 %}
19575 
19576 // =======================Mul Reduction==========================================
19577 
19578 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19579   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19580             Matcher::vector_length(n->in(2)) <= 32); // src2
19581   match(Set dst (MulReductionVI src1 src2));
19582   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19583   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19584   ins_encode %{
19585     int opcode = this->ideal_Opcode();
19586     int vlen = Matcher::vector_length(this, $src2);
19587     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19588   %}
19589   ins_pipe( pipe_slow );
19590 %}
19591 
19592 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19593   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19594             Matcher::vector_length(n->in(2)) == 64); // src2
19595   match(Set dst (MulReductionVI src1 src2));
19596   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19597   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19598   ins_encode %{
19599     int opcode = this->ideal_Opcode();
19600     int vlen = Matcher::vector_length(this, $src2);
19601     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19602   %}
19603   ins_pipe( pipe_slow );
19604 %}
19605 
19606 //--------------------Min/Max Float Reduction --------------------
19607 // Float Min Reduction
19608 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19609                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19610   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19611             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19612              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19613             Matcher::vector_length(n->in(2)) == 2);
19614   match(Set dst (MinReductionV src1 src2));
19615   match(Set dst (MaxReductionV src1 src2));
19616   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19617   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19618   ins_encode %{
19619     assert(UseAVX > 0, "sanity");
19620 
19621     int opcode = this->ideal_Opcode();
19622     int vlen = Matcher::vector_length(this, $src2);
19623     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19624                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19625   %}
19626   ins_pipe( pipe_slow );
19627 %}
19628 
19629 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19630                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19631   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19632             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19633              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19634             Matcher::vector_length(n->in(2)) >= 4);
19635   match(Set dst (MinReductionV src1 src2));
19636   match(Set dst (MaxReductionV src1 src2));
19637   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19638   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19639   ins_encode %{
19640     assert(UseAVX > 0, "sanity");
19641 
19642     int opcode = this->ideal_Opcode();
19643     int vlen = Matcher::vector_length(this, $src2);
19644     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19645                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19646   %}
19647   ins_pipe( pipe_slow );
19648 %}
19649 
19650 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19651                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19652   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19653             Matcher::vector_length(n->in(2)) == 2);
19654   match(Set dst (MinReductionV dst src));
19655   match(Set dst (MaxReductionV dst src));
19656   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19657   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19658   ins_encode %{
19659     assert(UseAVX > 0, "sanity");
19660 
19661     int opcode = this->ideal_Opcode();
19662     int vlen = Matcher::vector_length(this, $src);
19663     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19664                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19665   %}
19666   ins_pipe( pipe_slow );
19667 %}
19668 
19669 
19670 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19671                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19672   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19673             Matcher::vector_length(n->in(2)) >= 4);
19674   match(Set dst (MinReductionV dst src));
19675   match(Set dst (MaxReductionV dst src));
19676   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19677   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19678   ins_encode %{
19679     assert(UseAVX > 0, "sanity");
19680 
19681     int opcode = this->ideal_Opcode();
19682     int vlen = Matcher::vector_length(this, $src);
19683     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19684                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19685   %}
19686   ins_pipe( pipe_slow );
19687 %}
19688 
19689 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19690   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19691             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19692              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19693             Matcher::vector_length(n->in(2)) == 2);
19694   match(Set dst (MinReductionV src1 src2));
19695   match(Set dst (MaxReductionV src1 src2));
19696   effect(TEMP dst, TEMP xtmp1);
19697   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19698   ins_encode %{
19699     int opcode = this->ideal_Opcode();
19700     int vlen = Matcher::vector_length(this, $src2);
19701     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19702                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19703   %}
19704   ins_pipe( pipe_slow );
19705 %}
19706 
19707 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19708   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19709             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19710              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19711             Matcher::vector_length(n->in(2)) >= 4);
19712   match(Set dst (MinReductionV src1 src2));
19713   match(Set dst (MaxReductionV src1 src2));
19714   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19715   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19716   ins_encode %{
19717     int opcode = this->ideal_Opcode();
19718     int vlen = Matcher::vector_length(this, $src2);
19719     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19720                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19721   %}
19722   ins_pipe( pipe_slow );
19723 %}
19724 
19725 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19726   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19727             Matcher::vector_length(n->in(2)) == 2);
19728   match(Set dst (MinReductionV dst src));
19729   match(Set dst (MaxReductionV dst src));
19730   effect(TEMP dst, TEMP xtmp1);
19731   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19732   ins_encode %{
19733     int opcode = this->ideal_Opcode();
19734     int vlen = Matcher::vector_length(this, $src);
19735     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19736                          $xtmp1$$XMMRegister);
19737   %}
19738   ins_pipe( pipe_slow );
19739 %}
19740 
19741 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19742   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19743             Matcher::vector_length(n->in(2)) >= 4);
19744   match(Set dst (MinReductionV dst src));
19745   match(Set dst (MaxReductionV dst src));
19746   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19747   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19748   ins_encode %{
19749     int opcode = this->ideal_Opcode();
19750     int vlen = Matcher::vector_length(this, $src);
19751     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19752                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19753   %}
19754   ins_pipe( pipe_slow );
19755 %}
19756 
19757 //--------------------Min Double Reduction --------------------
19758 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19759                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19760   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19761             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19762              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19763             Matcher::vector_length(n->in(2)) == 2);
19764   match(Set dst (MinReductionV src1 src2));
19765   match(Set dst (MaxReductionV src1 src2));
19766   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19767   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19768   ins_encode %{
19769     assert(UseAVX > 0, "sanity");
19770 
19771     int opcode = this->ideal_Opcode();
19772     int vlen = Matcher::vector_length(this, $src2);
19773     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19774                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19775   %}
19776   ins_pipe( pipe_slow );
19777 %}
19778 
19779 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19780                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19781   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19782             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19783              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19784             Matcher::vector_length(n->in(2)) >= 4);
19785   match(Set dst (MinReductionV src1 src2));
19786   match(Set dst (MaxReductionV src1 src2));
19787   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19788   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19789   ins_encode %{
19790     assert(UseAVX > 0, "sanity");
19791 
19792     int opcode = this->ideal_Opcode();
19793     int vlen = Matcher::vector_length(this, $src2);
19794     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19795                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19796   %}
19797   ins_pipe( pipe_slow );
19798 %}
19799 
19800 
19801 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19802                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19803   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19804             Matcher::vector_length(n->in(2)) == 2);
19805   match(Set dst (MinReductionV dst src));
19806   match(Set dst (MaxReductionV dst src));
19807   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19808   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19809   ins_encode %{
19810     assert(UseAVX > 0, "sanity");
19811 
19812     int opcode = this->ideal_Opcode();
19813     int vlen = Matcher::vector_length(this, $src);
19814     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19815                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19816   %}
19817   ins_pipe( pipe_slow );
19818 %}
19819 
19820 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19821                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19822   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19823             Matcher::vector_length(n->in(2)) >= 4);
19824   match(Set dst (MinReductionV dst src));
19825   match(Set dst (MaxReductionV dst src));
19826   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19827   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19828   ins_encode %{
19829     assert(UseAVX > 0, "sanity");
19830 
19831     int opcode = this->ideal_Opcode();
19832     int vlen = Matcher::vector_length(this, $src);
19833     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19834                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19835   %}
19836   ins_pipe( pipe_slow );
19837 %}
19838 
19839 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19840   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19841             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19842              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19843             Matcher::vector_length(n->in(2)) == 2);
19844   match(Set dst (MinReductionV src1 src2));
19845   match(Set dst (MaxReductionV src1 src2));
19846   effect(TEMP dst, TEMP xtmp1);
19847   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19848   ins_encode %{
19849     int opcode = this->ideal_Opcode();
19850     int vlen = Matcher::vector_length(this, $src2);
19851     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
19852                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
19853   %}
19854   ins_pipe( pipe_slow );
19855 %}
19856 
19857 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
19858   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19859             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19860              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19861             Matcher::vector_length(n->in(2)) >= 4);
19862   match(Set dst (MinReductionV src1 src2));
19863   match(Set dst (MaxReductionV src1 src2));
19864   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19865   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
19866   ins_encode %{
19867     int opcode = this->ideal_Opcode();
19868     int vlen = Matcher::vector_length(this, $src2);
19869     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19870                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19871   %}
19872   ins_pipe( pipe_slow );
19873 %}
19874 
19875 
19876 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
19877   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19878             Matcher::vector_length(n->in(2)) == 2);
19879   match(Set dst (MinReductionV dst src));
19880   match(Set dst (MaxReductionV dst src));
19881   effect(TEMP dst, TEMP xtmp1);
19882   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
19883   ins_encode %{
19884     int opcode = this->ideal_Opcode();
19885     int vlen = Matcher::vector_length(this, $src);
19886     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19887                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19888   %}
19889   ins_pipe( pipe_slow );
19890 %}
19891 
19892 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
19893   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19894             Matcher::vector_length(n->in(2)) >= 4);
19895   match(Set dst (MinReductionV dst src));
19896   match(Set dst (MaxReductionV dst src));
19897   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19898   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
19899   ins_encode %{
19900     int opcode = this->ideal_Opcode();
19901     int vlen = Matcher::vector_length(this, $src);
19902     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19903                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19904   %}
19905   ins_pipe( pipe_slow );
19906 %}
19907 
19908 // ====================VECTOR ARITHMETIC=======================================
19909 
19910 // --------------------------------- ADD --------------------------------------
19911 
19912 // Bytes vector add
19913 instruct vaddB(vec dst, vec src) %{
19914   predicate(UseAVX == 0);
19915   match(Set dst (AddVB dst src));
19916   format %{ "paddb   $dst,$src\t! add packedB" %}
19917   ins_encode %{
19918     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
19919   %}
19920   ins_pipe( pipe_slow );
19921 %}
19922 
19923 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
19924   predicate(UseAVX > 0);
19925   match(Set dst (AddVB src1 src2));
19926   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
19927   ins_encode %{
19928     int vlen_enc = vector_length_encoding(this);
19929     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19930   %}
19931   ins_pipe( pipe_slow );
19932 %}
19933 
19934 instruct vaddB_mem(vec dst, vec src, memory mem) %{
19935   predicate((UseAVX > 0) &&
19936             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19937   match(Set dst (AddVB src (LoadVector mem)));
19938   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
19939   ins_encode %{
19940     int vlen_enc = vector_length_encoding(this);
19941     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19942   %}
19943   ins_pipe( pipe_slow );
19944 %}
19945 
19946 // Shorts/Chars vector add
19947 instruct vaddS(vec dst, vec src) %{
19948   predicate(UseAVX == 0);
19949   match(Set dst (AddVS dst src));
19950   format %{ "paddw   $dst,$src\t! add packedS" %}
19951   ins_encode %{
19952     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
19953   %}
19954   ins_pipe( pipe_slow );
19955 %}
19956 
19957 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
19958   predicate(UseAVX > 0);
19959   match(Set dst (AddVS src1 src2));
19960   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
19961   ins_encode %{
19962     int vlen_enc = vector_length_encoding(this);
19963     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19964   %}
19965   ins_pipe( pipe_slow );
19966 %}
19967 
19968 instruct vaddS_mem(vec dst, vec src, memory mem) %{
19969   predicate((UseAVX > 0) &&
19970             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19971   match(Set dst (AddVS src (LoadVector mem)));
19972   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
19973   ins_encode %{
19974     int vlen_enc = vector_length_encoding(this);
19975     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19976   %}
19977   ins_pipe( pipe_slow );
19978 %}
19979 
19980 // Integers vector add
19981 instruct vaddI(vec dst, vec src) %{
19982   predicate(UseAVX == 0);
19983   match(Set dst (AddVI dst src));
19984   format %{ "paddd   $dst,$src\t! add packedI" %}
19985   ins_encode %{
19986     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
19987   %}
19988   ins_pipe( pipe_slow );
19989 %}
19990 
19991 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
19992   predicate(UseAVX > 0);
19993   match(Set dst (AddVI src1 src2));
19994   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
19995   ins_encode %{
19996     int vlen_enc = vector_length_encoding(this);
19997     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19998   %}
19999   ins_pipe( pipe_slow );
20000 %}
20001 
20002 
20003 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20004   predicate((UseAVX > 0) &&
20005             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20006   match(Set dst (AddVI src (LoadVector mem)));
20007   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
20008   ins_encode %{
20009     int vlen_enc = vector_length_encoding(this);
20010     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20011   %}
20012   ins_pipe( pipe_slow );
20013 %}
20014 
20015 // Longs vector add
20016 instruct vaddL(vec dst, vec src) %{
20017   predicate(UseAVX == 0);
20018   match(Set dst (AddVL dst src));
20019   format %{ "paddq   $dst,$src\t! add packedL" %}
20020   ins_encode %{
20021     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20022   %}
20023   ins_pipe( pipe_slow );
20024 %}
20025 
20026 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20027   predicate(UseAVX > 0);
20028   match(Set dst (AddVL src1 src2));
20029   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
20030   ins_encode %{
20031     int vlen_enc = vector_length_encoding(this);
20032     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20033   %}
20034   ins_pipe( pipe_slow );
20035 %}
20036 
20037 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20038   predicate((UseAVX > 0) &&
20039             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20040   match(Set dst (AddVL src (LoadVector mem)));
20041   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
20042   ins_encode %{
20043     int vlen_enc = vector_length_encoding(this);
20044     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20045   %}
20046   ins_pipe( pipe_slow );
20047 %}
20048 
20049 // Floats vector add
20050 instruct vaddF(vec dst, vec src) %{
20051   predicate(UseAVX == 0);
20052   match(Set dst (AddVF dst src));
20053   format %{ "addps   $dst,$src\t! add packedF" %}
20054   ins_encode %{
20055     __ addps($dst$$XMMRegister, $src$$XMMRegister);
20056   %}
20057   ins_pipe( pipe_slow );
20058 %}
20059 
20060 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20061   predicate(UseAVX > 0);
20062   match(Set dst (AddVF src1 src2));
20063   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
20064   ins_encode %{
20065     int vlen_enc = vector_length_encoding(this);
20066     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20067   %}
20068   ins_pipe( pipe_slow );
20069 %}
20070 
20071 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20072   predicate((UseAVX > 0) &&
20073             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20074   match(Set dst (AddVF src (LoadVector mem)));
20075   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
20076   ins_encode %{
20077     int vlen_enc = vector_length_encoding(this);
20078     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20079   %}
20080   ins_pipe( pipe_slow );
20081 %}
20082 
20083 // Doubles vector add
20084 instruct vaddD(vec dst, vec src) %{
20085   predicate(UseAVX == 0);
20086   match(Set dst (AddVD dst src));
20087   format %{ "addpd   $dst,$src\t! add packedD" %}
20088   ins_encode %{
20089     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20090   %}
20091   ins_pipe( pipe_slow );
20092 %}
20093 
20094 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20095   predicate(UseAVX > 0);
20096   match(Set dst (AddVD src1 src2));
20097   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
20098   ins_encode %{
20099     int vlen_enc = vector_length_encoding(this);
20100     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20101   %}
20102   ins_pipe( pipe_slow );
20103 %}
20104 
20105 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20106   predicate((UseAVX > 0) &&
20107             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20108   match(Set dst (AddVD src (LoadVector mem)));
20109   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
20110   ins_encode %{
20111     int vlen_enc = vector_length_encoding(this);
20112     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20113   %}
20114   ins_pipe( pipe_slow );
20115 %}
20116 
20117 // --------------------------------- SUB --------------------------------------
20118 
20119 // Bytes vector sub
20120 instruct vsubB(vec dst, vec src) %{
20121   predicate(UseAVX == 0);
20122   match(Set dst (SubVB dst src));
20123   format %{ "psubb   $dst,$src\t! sub packedB" %}
20124   ins_encode %{
20125     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20126   %}
20127   ins_pipe( pipe_slow );
20128 %}
20129 
20130 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20131   predicate(UseAVX > 0);
20132   match(Set dst (SubVB src1 src2));
20133   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
20134   ins_encode %{
20135     int vlen_enc = vector_length_encoding(this);
20136     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20137   %}
20138   ins_pipe( pipe_slow );
20139 %}
20140 
20141 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20142   predicate((UseAVX > 0) &&
20143             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20144   match(Set dst (SubVB src (LoadVector mem)));
20145   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
20146   ins_encode %{
20147     int vlen_enc = vector_length_encoding(this);
20148     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20149   %}
20150   ins_pipe( pipe_slow );
20151 %}
20152 
20153 // Shorts/Chars vector sub
20154 instruct vsubS(vec dst, vec src) %{
20155   predicate(UseAVX == 0);
20156   match(Set dst (SubVS dst src));
20157   format %{ "psubw   $dst,$src\t! sub packedS" %}
20158   ins_encode %{
20159     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20160   %}
20161   ins_pipe( pipe_slow );
20162 %}
20163 
20164 
20165 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20166   predicate(UseAVX > 0);
20167   match(Set dst (SubVS src1 src2));
20168   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
20169   ins_encode %{
20170     int vlen_enc = vector_length_encoding(this);
20171     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20172   %}
20173   ins_pipe( pipe_slow );
20174 %}
20175 
20176 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20177   predicate((UseAVX > 0) &&
20178             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20179   match(Set dst (SubVS src (LoadVector mem)));
20180   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
20181   ins_encode %{
20182     int vlen_enc = vector_length_encoding(this);
20183     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20184   %}
20185   ins_pipe( pipe_slow );
20186 %}
20187 
20188 // Integers vector sub
20189 instruct vsubI(vec dst, vec src) %{
20190   predicate(UseAVX == 0);
20191   match(Set dst (SubVI dst src));
20192   format %{ "psubd   $dst,$src\t! sub packedI" %}
20193   ins_encode %{
20194     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20195   %}
20196   ins_pipe( pipe_slow );
20197 %}
20198 
20199 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20200   predicate(UseAVX > 0);
20201   match(Set dst (SubVI src1 src2));
20202   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
20203   ins_encode %{
20204     int vlen_enc = vector_length_encoding(this);
20205     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20206   %}
20207   ins_pipe( pipe_slow );
20208 %}
20209 
20210 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20211   predicate((UseAVX > 0) &&
20212             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20213   match(Set dst (SubVI src (LoadVector mem)));
20214   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
20215   ins_encode %{
20216     int vlen_enc = vector_length_encoding(this);
20217     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20218   %}
20219   ins_pipe( pipe_slow );
20220 %}
20221 
20222 // Longs vector sub
20223 instruct vsubL(vec dst, vec src) %{
20224   predicate(UseAVX == 0);
20225   match(Set dst (SubVL dst src));
20226   format %{ "psubq   $dst,$src\t! sub packedL" %}
20227   ins_encode %{
20228     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20229   %}
20230   ins_pipe( pipe_slow );
20231 %}
20232 
20233 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20234   predicate(UseAVX > 0);
20235   match(Set dst (SubVL src1 src2));
20236   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
20237   ins_encode %{
20238     int vlen_enc = vector_length_encoding(this);
20239     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20240   %}
20241   ins_pipe( pipe_slow );
20242 %}
20243 
20244 
20245 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20246   predicate((UseAVX > 0) &&
20247             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20248   match(Set dst (SubVL src (LoadVector mem)));
20249   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
20250   ins_encode %{
20251     int vlen_enc = vector_length_encoding(this);
20252     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20253   %}
20254   ins_pipe( pipe_slow );
20255 %}
20256 
20257 // Floats vector sub
20258 instruct vsubF(vec dst, vec src) %{
20259   predicate(UseAVX == 0);
20260   match(Set dst (SubVF dst src));
20261   format %{ "subps   $dst,$src\t! sub packedF" %}
20262   ins_encode %{
20263     __ subps($dst$$XMMRegister, $src$$XMMRegister);
20264   %}
20265   ins_pipe( pipe_slow );
20266 %}
20267 
20268 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20269   predicate(UseAVX > 0);
20270   match(Set dst (SubVF src1 src2));
20271   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
20272   ins_encode %{
20273     int vlen_enc = vector_length_encoding(this);
20274     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20275   %}
20276   ins_pipe( pipe_slow );
20277 %}
20278 
20279 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20280   predicate((UseAVX > 0) &&
20281             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20282   match(Set dst (SubVF src (LoadVector mem)));
20283   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
20284   ins_encode %{
20285     int vlen_enc = vector_length_encoding(this);
20286     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20287   %}
20288   ins_pipe( pipe_slow );
20289 %}
20290 
20291 // Doubles vector sub
20292 instruct vsubD(vec dst, vec src) %{
20293   predicate(UseAVX == 0);
20294   match(Set dst (SubVD dst src));
20295   format %{ "subpd   $dst,$src\t! sub packedD" %}
20296   ins_encode %{
20297     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20298   %}
20299   ins_pipe( pipe_slow );
20300 %}
20301 
20302 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20303   predicate(UseAVX > 0);
20304   match(Set dst (SubVD src1 src2));
20305   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
20306   ins_encode %{
20307     int vlen_enc = vector_length_encoding(this);
20308     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20309   %}
20310   ins_pipe( pipe_slow );
20311 %}
20312 
20313 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20314   predicate((UseAVX > 0) &&
20315             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20316   match(Set dst (SubVD src (LoadVector mem)));
20317   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
20318   ins_encode %{
20319     int vlen_enc = vector_length_encoding(this);
20320     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20321   %}
20322   ins_pipe( pipe_slow );
20323 %}
20324 
20325 // --------------------------------- MUL --------------------------------------
20326 
20327 // Byte vector mul
20328 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20329   predicate(Matcher::vector_length_in_bytes(n) <= 8);
20330   match(Set dst (MulVB src1 src2));
20331   effect(TEMP dst, TEMP xtmp);
20332   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20333   ins_encode %{
20334     assert(UseSSE > 3, "required");
20335     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20336     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20337     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20338     __ psllw($dst$$XMMRegister, 8);
20339     __ psrlw($dst$$XMMRegister, 8);
20340     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20341   %}
20342   ins_pipe( pipe_slow );
20343 %}
20344 
20345 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20346   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20347   match(Set dst (MulVB src1 src2));
20348   effect(TEMP dst, TEMP xtmp);
20349   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20350   ins_encode %{
20351     assert(UseSSE > 3, "required");
20352     // Odd-index elements
20353     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20354     __ psrlw($dst$$XMMRegister, 8);
20355     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20356     __ psrlw($xtmp$$XMMRegister, 8);
20357     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20358     __ psllw($dst$$XMMRegister, 8);
20359     // Even-index elements
20360     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20361     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20362     __ psllw($xtmp$$XMMRegister, 8);
20363     __ psrlw($xtmp$$XMMRegister, 8);
20364     // Combine
20365     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20366   %}
20367   ins_pipe( pipe_slow );
20368 %}
20369 
20370 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20371   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20372   match(Set dst (MulVB src1 src2));
20373   effect(TEMP xtmp1, TEMP xtmp2);
20374   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20375   ins_encode %{
20376     int vlen_enc = vector_length_encoding(this);
20377     // Odd-index elements
20378     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20379     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20380     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20381     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20382     // Even-index elements
20383     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20384     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20385     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20386     // Combine
20387     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20388   %}
20389   ins_pipe( pipe_slow );
20390 %}
20391 
20392 // Shorts/Chars vector mul
20393 instruct vmulS(vec dst, vec src) %{
20394   predicate(UseAVX == 0);
20395   match(Set dst (MulVS dst src));
20396   format %{ "pmullw  $dst,$src\t! mul packedS" %}
20397   ins_encode %{
20398     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20399   %}
20400   ins_pipe( pipe_slow );
20401 %}
20402 
20403 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20404   predicate(UseAVX > 0);
20405   match(Set dst (MulVS src1 src2));
20406   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20407   ins_encode %{
20408     int vlen_enc = vector_length_encoding(this);
20409     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20410   %}
20411   ins_pipe( pipe_slow );
20412 %}
20413 
20414 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20415   predicate((UseAVX > 0) &&
20416             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20417   match(Set dst (MulVS src (LoadVector mem)));
20418   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20419   ins_encode %{
20420     int vlen_enc = vector_length_encoding(this);
20421     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20422   %}
20423   ins_pipe( pipe_slow );
20424 %}
20425 
20426 // Integers vector mul
20427 instruct vmulI(vec dst, vec src) %{
20428   predicate(UseAVX == 0);
20429   match(Set dst (MulVI dst src));
20430   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20431   ins_encode %{
20432     assert(UseSSE > 3, "required");
20433     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20434   %}
20435   ins_pipe( pipe_slow );
20436 %}
20437 
20438 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20439   predicate(UseAVX > 0);
20440   match(Set dst (MulVI src1 src2));
20441   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20442   ins_encode %{
20443     int vlen_enc = vector_length_encoding(this);
20444     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20445   %}
20446   ins_pipe( pipe_slow );
20447 %}
20448 
20449 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20450   predicate((UseAVX > 0) &&
20451             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20452   match(Set dst (MulVI src (LoadVector mem)));
20453   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20454   ins_encode %{
20455     int vlen_enc = vector_length_encoding(this);
20456     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20457   %}
20458   ins_pipe( pipe_slow );
20459 %}
20460 
20461 // Longs vector mul
20462 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20463   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20464              VM_Version::supports_avx512dq()) ||
20465             VM_Version::supports_avx512vldq());
20466   match(Set dst (MulVL src1 src2));
20467   ins_cost(500);
20468   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20469   ins_encode %{
20470     assert(UseAVX > 2, "required");
20471     int vlen_enc = vector_length_encoding(this);
20472     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20473   %}
20474   ins_pipe( pipe_slow );
20475 %}
20476 
20477 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20478   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20479              VM_Version::supports_avx512dq()) ||
20480             (Matcher::vector_length_in_bytes(n) > 8 &&
20481              VM_Version::supports_avx512vldq()));
20482   match(Set dst (MulVL src (LoadVector mem)));
20483   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20484   ins_cost(500);
20485   ins_encode %{
20486     assert(UseAVX > 2, "required");
20487     int vlen_enc = vector_length_encoding(this);
20488     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20489   %}
20490   ins_pipe( pipe_slow );
20491 %}
20492 
20493 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20494   predicate(UseAVX == 0);
20495   match(Set dst (MulVL src1 src2));
20496   ins_cost(500);
20497   effect(TEMP dst, TEMP xtmp);
20498   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20499   ins_encode %{
20500     assert(VM_Version::supports_sse4_1(), "required");
20501     // Get the lo-hi products, only the lower 32 bits is in concerns
20502     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20503     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20504     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20505     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20506     __ psllq($dst$$XMMRegister, 32);
20507     // Get the lo-lo products
20508     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20509     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20510     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20511   %}
20512   ins_pipe( pipe_slow );
20513 %}
20514 
20515 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20516   predicate(UseAVX > 0 &&
20517             ((Matcher::vector_length_in_bytes(n) == 64 &&
20518               !VM_Version::supports_avx512dq()) ||
20519              (Matcher::vector_length_in_bytes(n) < 64 &&
20520               !VM_Version::supports_avx512vldq())));
20521   match(Set dst (MulVL src1 src2));
20522   effect(TEMP xtmp1, TEMP xtmp2);
20523   ins_cost(500);
20524   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20525   ins_encode %{
20526     int vlen_enc = vector_length_encoding(this);
20527     // Get the lo-hi products, only the lower 32 bits is in concerns
20528     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20529     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20530     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20531     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20532     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20533     // Get the lo-lo products
20534     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20535     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20536   %}
20537   ins_pipe( pipe_slow );
20538 %}
20539 
20540 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20541   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20542   match(Set dst (MulVL src1 src2));
20543   ins_cost(100);
20544   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20545   ins_encode %{
20546     int vlen_enc = vector_length_encoding(this);
20547     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20548   %}
20549   ins_pipe( pipe_slow );
20550 %}
20551 
20552 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20553   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20554   match(Set dst (MulVL src1 src2));
20555   ins_cost(100);
20556   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20557   ins_encode %{
20558     int vlen_enc = vector_length_encoding(this);
20559     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20560   %}
20561   ins_pipe( pipe_slow );
20562 %}
20563 
20564 // Floats vector mul
20565 instruct vmulF(vec dst, vec src) %{
20566   predicate(UseAVX == 0);
20567   match(Set dst (MulVF dst src));
20568   format %{ "mulps   $dst,$src\t! mul packedF" %}
20569   ins_encode %{
20570     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20571   %}
20572   ins_pipe( pipe_slow );
20573 %}
20574 
20575 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20576   predicate(UseAVX > 0);
20577   match(Set dst (MulVF src1 src2));
20578   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
20579   ins_encode %{
20580     int vlen_enc = vector_length_encoding(this);
20581     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20582   %}
20583   ins_pipe( pipe_slow );
20584 %}
20585 
20586 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20587   predicate((UseAVX > 0) &&
20588             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20589   match(Set dst (MulVF src (LoadVector mem)));
20590   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
20591   ins_encode %{
20592     int vlen_enc = vector_length_encoding(this);
20593     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20594   %}
20595   ins_pipe( pipe_slow );
20596 %}
20597 
20598 // Doubles vector mul
20599 instruct vmulD(vec dst, vec src) %{
20600   predicate(UseAVX == 0);
20601   match(Set dst (MulVD dst src));
20602   format %{ "mulpd   $dst,$src\t! mul packedD" %}
20603   ins_encode %{
20604     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20605   %}
20606   ins_pipe( pipe_slow );
20607 %}
20608 
20609 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20610   predicate(UseAVX > 0);
20611   match(Set dst (MulVD src1 src2));
20612   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
20613   ins_encode %{
20614     int vlen_enc = vector_length_encoding(this);
20615     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20616   %}
20617   ins_pipe( pipe_slow );
20618 %}
20619 
20620 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20621   predicate((UseAVX > 0) &&
20622             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20623   match(Set dst (MulVD src (LoadVector mem)));
20624   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
20625   ins_encode %{
20626     int vlen_enc = vector_length_encoding(this);
20627     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20628   %}
20629   ins_pipe( pipe_slow );
20630 %}
20631 
20632 // --------------------------------- DIV --------------------------------------
20633 
20634 // Floats vector div
20635 instruct vdivF(vec dst, vec src) %{
20636   predicate(UseAVX == 0);
20637   match(Set dst (DivVF dst src));
20638   format %{ "divps   $dst,$src\t! div packedF" %}
20639   ins_encode %{
20640     __ divps($dst$$XMMRegister, $src$$XMMRegister);
20641   %}
20642   ins_pipe( pipe_slow );
20643 %}
20644 
20645 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20646   predicate(UseAVX > 0);
20647   match(Set dst (DivVF src1 src2));
20648   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
20649   ins_encode %{
20650     int vlen_enc = vector_length_encoding(this);
20651     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20652   %}
20653   ins_pipe( pipe_slow );
20654 %}
20655 
20656 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20657   predicate((UseAVX > 0) &&
20658             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20659   match(Set dst (DivVF src (LoadVector mem)));
20660   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
20661   ins_encode %{
20662     int vlen_enc = vector_length_encoding(this);
20663     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20664   %}
20665   ins_pipe( pipe_slow );
20666 %}
20667 
20668 // Doubles vector div
20669 instruct vdivD(vec dst, vec src) %{
20670   predicate(UseAVX == 0);
20671   match(Set dst (DivVD dst src));
20672   format %{ "divpd   $dst,$src\t! div packedD" %}
20673   ins_encode %{
20674     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20675   %}
20676   ins_pipe( pipe_slow );
20677 %}
20678 
20679 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20680   predicate(UseAVX > 0);
20681   match(Set dst (DivVD src1 src2));
20682   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
20683   ins_encode %{
20684     int vlen_enc = vector_length_encoding(this);
20685     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20686   %}
20687   ins_pipe( pipe_slow );
20688 %}
20689 
20690 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20691   predicate((UseAVX > 0) &&
20692             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20693   match(Set dst (DivVD src (LoadVector mem)));
20694   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
20695   ins_encode %{
20696     int vlen_enc = vector_length_encoding(this);
20697     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20698   %}
20699   ins_pipe( pipe_slow );
20700 %}
20701 
20702 // ------------------------------ MinMax ---------------------------------------
20703 
20704 // Byte, Short, Int vector Min/Max
20705 instruct minmax_reg_sse(vec dst, vec src) %{
20706   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20707             UseAVX == 0);
20708   match(Set dst (MinV dst src));
20709   match(Set dst (MaxV dst src));
20710   format %{ "vector_minmax  $dst,$src\t!  " %}
20711   ins_encode %{
20712     assert(UseSSE >= 4, "required");
20713 
20714     int opcode = this->ideal_Opcode();
20715     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20716     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20717   %}
20718   ins_pipe( pipe_slow );
20719 %}
20720 
20721 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20722   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20723             UseAVX > 0);
20724   match(Set dst (MinV src1 src2));
20725   match(Set dst (MaxV src1 src2));
20726   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
20727   ins_encode %{
20728     int opcode = this->ideal_Opcode();
20729     int vlen_enc = vector_length_encoding(this);
20730     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20731 
20732     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20733   %}
20734   ins_pipe( pipe_slow );
20735 %}
20736 
20737 // Long vector Min/Max
20738 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20739   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20740             UseAVX == 0);
20741   match(Set dst (MinV dst src));
20742   match(Set dst (MaxV src dst));
20743   effect(TEMP dst, TEMP tmp);
20744   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
20745   ins_encode %{
20746     assert(UseSSE >= 4, "required");
20747 
20748     int opcode = this->ideal_Opcode();
20749     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20750     assert(elem_bt == T_LONG, "sanity");
20751 
20752     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20753   %}
20754   ins_pipe( pipe_slow );
20755 %}
20756 
20757 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20758   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20759             UseAVX > 0 && !VM_Version::supports_avx512vl());
20760   match(Set dst (MinV src1 src2));
20761   match(Set dst (MaxV src1 src2));
20762   effect(TEMP dst);
20763   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
20764   ins_encode %{
20765     int vlen_enc = vector_length_encoding(this);
20766     int opcode = this->ideal_Opcode();
20767     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20768     assert(elem_bt == T_LONG, "sanity");
20769 
20770     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20771   %}
20772   ins_pipe( pipe_slow );
20773 %}
20774 
20775 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20776   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20777             Matcher::vector_element_basic_type(n) == T_LONG);
20778   match(Set dst (MinV src1 src2));
20779   match(Set dst (MaxV src1 src2));
20780   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
20781   ins_encode %{
20782     assert(UseAVX > 2, "required");
20783 
20784     int vlen_enc = vector_length_encoding(this);
20785     int opcode = this->ideal_Opcode();
20786     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20787     assert(elem_bt == T_LONG, "sanity");
20788 
20789     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20790   %}
20791   ins_pipe( pipe_slow );
20792 %}
20793 
20794 // Float/Double vector Min/Max
20795 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20796   predicate(VM_Version::supports_avx10_2() &&
20797             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20798   match(Set dst (MinV a b));
20799   match(Set dst (MaxV a b));
20800   format %{ "vector_minmaxFP  $dst, $a, $b" %}
20801   ins_encode %{
20802     int vlen_enc = vector_length_encoding(this);
20803     int opcode = this->ideal_Opcode();
20804     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20805     __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20806   %}
20807   ins_pipe( pipe_slow );
20808 %}
20809 
20810 // Float/Double vector Min/Max
20811 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20812   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20813             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20814             UseAVX > 0);
20815   match(Set dst (MinV a b));
20816   match(Set dst (MaxV a b));
20817   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20818   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20819   ins_encode %{
20820     assert(UseAVX > 0, "required");
20821 
20822     int opcode = this->ideal_Opcode();
20823     int vlen_enc = vector_length_encoding(this);
20824     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20825 
20826     __ vminmax_fp(opcode, elem_bt,
20827                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20828                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20829   %}
20830   ins_pipe( pipe_slow );
20831 %}
20832 
20833 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20834   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20835             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20836   match(Set dst (MinV a b));
20837   match(Set dst (MaxV a b));
20838   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20839   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20840   ins_encode %{
20841     assert(UseAVX > 2, "required");
20842 
20843     int opcode = this->ideal_Opcode();
20844     int vlen_enc = vector_length_encoding(this);
20845     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20846 
20847     __ evminmax_fp(opcode, elem_bt,
20848                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20849                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20850   %}
20851   ins_pipe( pipe_slow );
20852 %}
20853 
20854 // ------------------------------ Unsigned vector Min/Max ----------------------
20855 
20856 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
20857   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20858   match(Set dst (UMinV a b));
20859   match(Set dst (UMaxV a b));
20860   format %{ "vector_uminmax $dst,$a,$b\t!" %}
20861   ins_encode %{
20862     int opcode = this->ideal_Opcode();
20863     int vlen_enc = vector_length_encoding(this);
20864     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20865     assert(is_integral_type(elem_bt), "");
20866     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20867   %}
20868   ins_pipe( pipe_slow );
20869 %}
20870 
20871 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
20872   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20873   match(Set dst (UMinV a (LoadVector b)));
20874   match(Set dst (UMaxV a (LoadVector b)));
20875   format %{ "vector_uminmax $dst,$a,$b\t!" %}
20876   ins_encode %{
20877     int opcode = this->ideal_Opcode();
20878     int vlen_enc = vector_length_encoding(this);
20879     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20880     assert(is_integral_type(elem_bt), "");
20881     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
20882   %}
20883   ins_pipe( pipe_slow );
20884 %}
20885 
20886 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
20887   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
20888   match(Set dst (UMinV a b));
20889   match(Set dst (UMaxV a b));
20890   effect(TEMP xtmp1, TEMP xtmp2);
20891   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
20892   ins_encode %{
20893     int opcode = this->ideal_Opcode();
20894     int vlen_enc = vector_length_encoding(this);
20895     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20896   %}
20897   ins_pipe( pipe_slow );
20898 %}
20899 
20900 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
20901   match(Set dst (UMinV (Binary dst src2) mask));
20902   match(Set dst (UMaxV (Binary dst src2) mask));
20903   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20904   ins_encode %{
20905     int vlen_enc = vector_length_encoding(this);
20906     BasicType bt = Matcher::vector_element_basic_type(this);
20907     int opc = this->ideal_Opcode();
20908     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20909                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
20910   %}
20911   ins_pipe( pipe_slow );
20912 %}
20913 
20914 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
20915   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
20916   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
20917   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20918   ins_encode %{
20919     int vlen_enc = vector_length_encoding(this);
20920     BasicType bt = Matcher::vector_element_basic_type(this);
20921     int opc = this->ideal_Opcode();
20922     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20923                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
20924   %}
20925   ins_pipe( pipe_slow );
20926 %}
20927 
20928 // --------------------------------- Signum/CopySign ---------------------------
20929 
20930 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
20931   match(Set dst (SignumF dst (Binary zero one)));
20932   effect(KILL cr);
20933   format %{ "signumF $dst, $dst" %}
20934   ins_encode %{
20935     int opcode = this->ideal_Opcode();
20936     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20937   %}
20938   ins_pipe( pipe_slow );
20939 %}
20940 
20941 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
20942   match(Set dst (SignumD dst (Binary zero one)));
20943   effect(KILL cr);
20944   format %{ "signumD $dst, $dst" %}
20945   ins_encode %{
20946     int opcode = this->ideal_Opcode();
20947     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20948   %}
20949   ins_pipe( pipe_slow );
20950 %}
20951 
20952 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
20953   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
20954   match(Set dst (SignumVF src (Binary zero one)));
20955   match(Set dst (SignumVD src (Binary zero one)));
20956   effect(TEMP dst, TEMP xtmp1);
20957   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
20958   ins_encode %{
20959     int opcode = this->ideal_Opcode();
20960     int vec_enc = vector_length_encoding(this);
20961     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20962                          $xtmp1$$XMMRegister, vec_enc);
20963   %}
20964   ins_pipe( pipe_slow );
20965 %}
20966 
20967 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
20968   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
20969   match(Set dst (SignumVF src (Binary zero one)));
20970   match(Set dst (SignumVD src (Binary zero one)));
20971   effect(TEMP dst, TEMP ktmp1);
20972   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
20973   ins_encode %{
20974     int opcode = this->ideal_Opcode();
20975     int vec_enc = vector_length_encoding(this);
20976     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20977                           $ktmp1$$KRegister, vec_enc);
20978   %}
20979   ins_pipe( pipe_slow );
20980 %}
20981 
20982 // ---------------------------------------
20983 // For copySign use 0xE4 as writemask for vpternlog
20984 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
20985 // C (xmm2) is set to 0x7FFFFFFF
20986 // Wherever xmm2 is 0, we want to pick from B (sign)
20987 // Wherever xmm2 is 1, we want to pick from A (src)
20988 //
20989 // A B C Result
20990 // 0 0 0 0
20991 // 0 0 1 0
20992 // 0 1 0 1
20993 // 0 1 1 0
20994 // 1 0 0 0
20995 // 1 0 1 1
20996 // 1 1 0 1
20997 // 1 1 1 1
20998 //
20999 // Result going from high bit to low bit is 0x11100100 = 0xe4
21000 // ---------------------------------------
21001 
21002 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21003   match(Set dst (CopySignF dst src));
21004   effect(TEMP tmp1, TEMP tmp2);
21005   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21006   ins_encode %{
21007     __ movl($tmp2$$Register, 0x7FFFFFFF);
21008     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21009     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21010   %}
21011   ins_pipe( pipe_slow );
21012 %}
21013 
21014 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21015   match(Set dst (CopySignD dst (Binary src zero)));
21016   ins_cost(100);
21017   effect(TEMP tmp1, TEMP tmp2);
21018   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21019   ins_encode %{
21020     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21021     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21022     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21023   %}
21024   ins_pipe( pipe_slow );
21025 %}
21026 
21027 //----------------------------- CompressBits/ExpandBits ------------------------
21028 
21029 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21030   predicate(n->bottom_type()->isa_int());
21031   match(Set dst (CompressBits src mask));
21032   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21033   ins_encode %{
21034     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21035   %}
21036   ins_pipe( pipe_slow );
21037 %}
21038 
21039 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21040   predicate(n->bottom_type()->isa_int());
21041   match(Set dst (ExpandBits src mask));
21042   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21043   ins_encode %{
21044     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21045   %}
21046   ins_pipe( pipe_slow );
21047 %}
21048 
21049 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21050   predicate(n->bottom_type()->isa_int());
21051   match(Set dst (CompressBits src (LoadI mask)));
21052   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21053   ins_encode %{
21054     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21055   %}
21056   ins_pipe( pipe_slow );
21057 %}
21058 
21059 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21060   predicate(n->bottom_type()->isa_int());
21061   match(Set dst (ExpandBits src (LoadI mask)));
21062   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21063   ins_encode %{
21064     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21065   %}
21066   ins_pipe( pipe_slow );
21067 %}
21068 
21069 // --------------------------------- Sqrt --------------------------------------
21070 
21071 instruct vsqrtF_reg(vec dst, vec src) %{
21072   match(Set dst (SqrtVF src));
21073   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
21074   ins_encode %{
21075     assert(UseAVX > 0, "required");
21076     int vlen_enc = vector_length_encoding(this);
21077     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21078   %}
21079   ins_pipe( pipe_slow );
21080 %}
21081 
21082 instruct vsqrtF_mem(vec dst, memory mem) %{
21083   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21084   match(Set dst (SqrtVF (LoadVector mem)));
21085   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
21086   ins_encode %{
21087     assert(UseAVX > 0, "required");
21088     int vlen_enc = vector_length_encoding(this);
21089     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21090   %}
21091   ins_pipe( pipe_slow );
21092 %}
21093 
21094 // Floating point vector sqrt
21095 instruct vsqrtD_reg(vec dst, vec src) %{
21096   match(Set dst (SqrtVD src));
21097   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
21098   ins_encode %{
21099     assert(UseAVX > 0, "required");
21100     int vlen_enc = vector_length_encoding(this);
21101     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21102   %}
21103   ins_pipe( pipe_slow );
21104 %}
21105 
21106 instruct vsqrtD_mem(vec dst, memory mem) %{
21107   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21108   match(Set dst (SqrtVD (LoadVector mem)));
21109   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
21110   ins_encode %{
21111     assert(UseAVX > 0, "required");
21112     int vlen_enc = vector_length_encoding(this);
21113     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21114   %}
21115   ins_pipe( pipe_slow );
21116 %}
21117 
21118 // ------------------------------ Shift ---------------------------------------
21119 
21120 // Left and right shift count vectors are the same on x86
21121 // (only lowest bits of xmm reg are used for count).
21122 instruct vshiftcnt(vec dst, rRegI cnt) %{
21123   match(Set dst (LShiftCntV cnt));
21124   match(Set dst (RShiftCntV cnt));
21125   format %{ "movdl    $dst,$cnt\t! load shift count" %}
21126   ins_encode %{
21127     __ movdl($dst$$XMMRegister, $cnt$$Register);
21128   %}
21129   ins_pipe( pipe_slow );
21130 %}
21131 
21132 // Byte vector shift
21133 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21134   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21135   match(Set dst ( LShiftVB src shift));
21136   match(Set dst ( RShiftVB src shift));
21137   match(Set dst (URShiftVB src shift));
21138   effect(TEMP dst, USE src, USE shift, TEMP tmp);
21139   format %{"vector_byte_shift $dst,$src,$shift" %}
21140   ins_encode %{
21141     assert(UseSSE > 3, "required");
21142     int opcode = this->ideal_Opcode();
21143     bool sign = (opcode != Op_URShiftVB);
21144     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21145     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21146     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21147     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21148     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21149   %}
21150   ins_pipe( pipe_slow );
21151 %}
21152 
21153 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21154   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21155             UseAVX <= 1);
21156   match(Set dst ( LShiftVB src shift));
21157   match(Set dst ( RShiftVB src shift));
21158   match(Set dst (URShiftVB src shift));
21159   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21160   format %{"vector_byte_shift $dst,$src,$shift" %}
21161   ins_encode %{
21162     assert(UseSSE > 3, "required");
21163     int opcode = this->ideal_Opcode();
21164     bool sign = (opcode != Op_URShiftVB);
21165     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21166     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21167     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21168     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21169     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21170     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21171     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21172     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21173     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21174   %}
21175   ins_pipe( pipe_slow );
21176 %}
21177 
21178 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21179   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21180             UseAVX > 1);
21181   match(Set dst ( LShiftVB src shift));
21182   match(Set dst ( RShiftVB src shift));
21183   match(Set dst (URShiftVB src shift));
21184   effect(TEMP dst, TEMP tmp);
21185   format %{"vector_byte_shift $dst,$src,$shift" %}
21186   ins_encode %{
21187     int opcode = this->ideal_Opcode();
21188     bool sign = (opcode != Op_URShiftVB);
21189     int vlen_enc = Assembler::AVX_256bit;
21190     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21191     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21192     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21193     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21194     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21195   %}
21196   ins_pipe( pipe_slow );
21197 %}
21198 
21199 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21200   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21201   match(Set dst ( LShiftVB src shift));
21202   match(Set dst ( RShiftVB src shift));
21203   match(Set dst (URShiftVB src shift));
21204   effect(TEMP dst, TEMP tmp);
21205   format %{"vector_byte_shift $dst,$src,$shift" %}
21206   ins_encode %{
21207     assert(UseAVX > 1, "required");
21208     int opcode = this->ideal_Opcode();
21209     bool sign = (opcode != Op_URShiftVB);
21210     int vlen_enc = Assembler::AVX_256bit;
21211     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21212     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21213     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21214     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21215     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21216     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21217     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21218     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21219     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21220   %}
21221   ins_pipe( pipe_slow );
21222 %}
21223 
21224 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21225   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21226   match(Set dst ( LShiftVB src shift));
21227   match(Set dst  (RShiftVB src shift));
21228   match(Set dst (URShiftVB src shift));
21229   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21230   format %{"vector_byte_shift $dst,$src,$shift" %}
21231   ins_encode %{
21232     assert(UseAVX > 2, "required");
21233     int opcode = this->ideal_Opcode();
21234     bool sign = (opcode != Op_URShiftVB);
21235     int vlen_enc = Assembler::AVX_512bit;
21236     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21237     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21238     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21239     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21240     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21241     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21242     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21243     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21244     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21245     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21246     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21247     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21248   %}
21249   ins_pipe( pipe_slow );
21250 %}
21251 
21252 // Shorts vector logical right shift produces incorrect Java result
21253 // for negative data because java code convert short value into int with
21254 // sign extension before a shift. But char vectors are fine since chars are
21255 // unsigned values.
21256 // Shorts/Chars vector left shift
21257 instruct vshiftS(vec dst, vec src, vec shift) %{
21258   predicate(!n->as_ShiftV()->is_var_shift());
21259   match(Set dst ( LShiftVS src shift));
21260   match(Set dst ( RShiftVS src shift));
21261   match(Set dst (URShiftVS src shift));
21262   effect(TEMP dst, USE src, USE shift);
21263   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
21264   ins_encode %{
21265     int opcode = this->ideal_Opcode();
21266     if (UseAVX > 0) {
21267       int vlen_enc = vector_length_encoding(this);
21268       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21269     } else {
21270       int vlen = Matcher::vector_length(this);
21271       if (vlen == 2) {
21272         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21273         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21274       } else if (vlen == 4) {
21275         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21276         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21277       } else {
21278         assert (vlen == 8, "sanity");
21279         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21280         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21281       }
21282     }
21283   %}
21284   ins_pipe( pipe_slow );
21285 %}
21286 
21287 // Integers vector left shift
21288 instruct vshiftI(vec dst, vec src, vec shift) %{
21289   predicate(!n->as_ShiftV()->is_var_shift());
21290   match(Set dst ( LShiftVI src shift));
21291   match(Set dst ( RShiftVI src shift));
21292   match(Set dst (URShiftVI src shift));
21293   effect(TEMP dst, USE src, USE shift);
21294   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
21295   ins_encode %{
21296     int opcode = this->ideal_Opcode();
21297     if (UseAVX > 0) {
21298       int vlen_enc = vector_length_encoding(this);
21299       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21300     } else {
21301       int vlen = Matcher::vector_length(this);
21302       if (vlen == 2) {
21303         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21304         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21305       } else {
21306         assert(vlen == 4, "sanity");
21307         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21308         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21309       }
21310     }
21311   %}
21312   ins_pipe( pipe_slow );
21313 %}
21314 
21315 // Integers vector left constant shift
21316 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21317   match(Set dst (LShiftVI src (LShiftCntV shift)));
21318   match(Set dst (RShiftVI src (RShiftCntV shift)));
21319   match(Set dst (URShiftVI src (RShiftCntV shift)));
21320   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
21321   ins_encode %{
21322     int opcode = this->ideal_Opcode();
21323     if (UseAVX > 0) {
21324       int vector_len = vector_length_encoding(this);
21325       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21326     } else {
21327       int vlen = Matcher::vector_length(this);
21328       if (vlen == 2) {
21329         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21330         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21331       } else {
21332         assert(vlen == 4, "sanity");
21333         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21334         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21335       }
21336     }
21337   %}
21338   ins_pipe( pipe_slow );
21339 %}
21340 
21341 // Longs vector shift
21342 instruct vshiftL(vec dst, vec src, vec shift) %{
21343   predicate(!n->as_ShiftV()->is_var_shift());
21344   match(Set dst ( LShiftVL src shift));
21345   match(Set dst (URShiftVL src shift));
21346   effect(TEMP dst, USE src, USE shift);
21347   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
21348   ins_encode %{
21349     int opcode = this->ideal_Opcode();
21350     if (UseAVX > 0) {
21351       int vlen_enc = vector_length_encoding(this);
21352       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21353     } else {
21354       assert(Matcher::vector_length(this) == 2, "");
21355       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21356       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21357     }
21358   %}
21359   ins_pipe( pipe_slow );
21360 %}
21361 
21362 // Longs vector constant shift
21363 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21364   match(Set dst (LShiftVL src (LShiftCntV shift)));
21365   match(Set dst (URShiftVL src (RShiftCntV shift)));
21366   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
21367   ins_encode %{
21368     int opcode = this->ideal_Opcode();
21369     if (UseAVX > 0) {
21370       int vector_len = vector_length_encoding(this);
21371       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21372     } else {
21373       assert(Matcher::vector_length(this) == 2, "");
21374       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21375       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21376     }
21377   %}
21378   ins_pipe( pipe_slow );
21379 %}
21380 
21381 // -------------------ArithmeticRightShift -----------------------------------
21382 // Long vector arithmetic right shift
21383 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21384   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21385   match(Set dst (RShiftVL src shift));
21386   effect(TEMP dst, TEMP tmp);
21387   format %{ "vshiftq $dst,$src,$shift" %}
21388   ins_encode %{
21389     uint vlen = Matcher::vector_length(this);
21390     if (vlen == 2) {
21391       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21392       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21393       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21394       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21395       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21396       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21397     } else {
21398       assert(vlen == 4, "sanity");
21399       assert(UseAVX > 1, "required");
21400       int vlen_enc = Assembler::AVX_256bit;
21401       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21402       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21403       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21404       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21405       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21406     }
21407   %}
21408   ins_pipe( pipe_slow );
21409 %}
21410 
21411 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21412   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21413   match(Set dst (RShiftVL src shift));
21414   format %{ "vshiftq $dst,$src,$shift" %}
21415   ins_encode %{
21416     int vlen_enc = vector_length_encoding(this);
21417     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21418   %}
21419   ins_pipe( pipe_slow );
21420 %}
21421 
21422 // ------------------- Variable Shift -----------------------------
21423 // Byte variable shift
21424 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21425   predicate(Matcher::vector_length(n) <= 8 &&
21426             n->as_ShiftV()->is_var_shift() &&
21427             !VM_Version::supports_avx512bw());
21428   match(Set dst ( LShiftVB src shift));
21429   match(Set dst ( RShiftVB src shift));
21430   match(Set dst (URShiftVB src shift));
21431   effect(TEMP dst, TEMP vtmp);
21432   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21433   ins_encode %{
21434     assert(UseAVX >= 2, "required");
21435 
21436     int opcode = this->ideal_Opcode();
21437     int vlen_enc = Assembler::AVX_128bit;
21438     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21439     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21440   %}
21441   ins_pipe( pipe_slow );
21442 %}
21443 
21444 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21445   predicate(Matcher::vector_length(n) == 16 &&
21446             n->as_ShiftV()->is_var_shift() &&
21447             !VM_Version::supports_avx512bw());
21448   match(Set dst ( LShiftVB src shift));
21449   match(Set dst ( RShiftVB src shift));
21450   match(Set dst (URShiftVB src shift));
21451   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21452   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21453   ins_encode %{
21454     assert(UseAVX >= 2, "required");
21455 
21456     int opcode = this->ideal_Opcode();
21457     int vlen_enc = Assembler::AVX_128bit;
21458     // Shift lower half and get word result in dst
21459     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21460 
21461     // Shift upper half and get word result in vtmp1
21462     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21463     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21464     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21465 
21466     // Merge and down convert the two word results to byte in dst
21467     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21468   %}
21469   ins_pipe( pipe_slow );
21470 %}
21471 
21472 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21473   predicate(Matcher::vector_length(n) == 32 &&
21474             n->as_ShiftV()->is_var_shift() &&
21475             !VM_Version::supports_avx512bw());
21476   match(Set dst ( LShiftVB src shift));
21477   match(Set dst ( RShiftVB src shift));
21478   match(Set dst (URShiftVB src shift));
21479   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21480   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21481   ins_encode %{
21482     assert(UseAVX >= 2, "required");
21483 
21484     int opcode = this->ideal_Opcode();
21485     int vlen_enc = Assembler::AVX_128bit;
21486     // Process lower 128 bits and get result in dst
21487     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21488     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21489     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21490     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21491     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21492 
21493     // Process higher 128 bits and get result in vtmp3
21494     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21495     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21496     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21497     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21498     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21499     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21500     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21501 
21502     // Merge the two results in dst
21503     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21504   %}
21505   ins_pipe( pipe_slow );
21506 %}
21507 
21508 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21509   predicate(Matcher::vector_length(n) <= 32 &&
21510             n->as_ShiftV()->is_var_shift() &&
21511             VM_Version::supports_avx512bw());
21512   match(Set dst ( LShiftVB src shift));
21513   match(Set dst ( RShiftVB src shift));
21514   match(Set dst (URShiftVB src shift));
21515   effect(TEMP dst, TEMP vtmp);
21516   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21517   ins_encode %{
21518     assert(UseAVX > 2, "required");
21519 
21520     int opcode = this->ideal_Opcode();
21521     int vlen_enc = vector_length_encoding(this);
21522     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21523   %}
21524   ins_pipe( pipe_slow );
21525 %}
21526 
21527 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21528   predicate(Matcher::vector_length(n) == 64 &&
21529             n->as_ShiftV()->is_var_shift() &&
21530             VM_Version::supports_avx512bw());
21531   match(Set dst ( LShiftVB src shift));
21532   match(Set dst ( RShiftVB src shift));
21533   match(Set dst (URShiftVB src shift));
21534   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21535   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21536   ins_encode %{
21537     assert(UseAVX > 2, "required");
21538 
21539     int opcode = this->ideal_Opcode();
21540     int vlen_enc = Assembler::AVX_256bit;
21541     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21542     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21543     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21544     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21545     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21546   %}
21547   ins_pipe( pipe_slow );
21548 %}
21549 
21550 // Short variable shift
21551 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21552   predicate(Matcher::vector_length(n) <= 8 &&
21553             n->as_ShiftV()->is_var_shift() &&
21554             !VM_Version::supports_avx512bw());
21555   match(Set dst ( LShiftVS src shift));
21556   match(Set dst ( RShiftVS src shift));
21557   match(Set dst (URShiftVS src shift));
21558   effect(TEMP dst, TEMP vtmp);
21559   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21560   ins_encode %{
21561     assert(UseAVX >= 2, "required");
21562 
21563     int opcode = this->ideal_Opcode();
21564     bool sign = (opcode != Op_URShiftVS);
21565     int vlen_enc = Assembler::AVX_256bit;
21566     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21567     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21568     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21569     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21570     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21571     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21572   %}
21573   ins_pipe( pipe_slow );
21574 %}
21575 
21576 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21577   predicate(Matcher::vector_length(n) == 16 &&
21578             n->as_ShiftV()->is_var_shift() &&
21579             !VM_Version::supports_avx512bw());
21580   match(Set dst ( LShiftVS src shift));
21581   match(Set dst ( RShiftVS src shift));
21582   match(Set dst (URShiftVS src shift));
21583   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21584   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21585   ins_encode %{
21586     assert(UseAVX >= 2, "required");
21587 
21588     int opcode = this->ideal_Opcode();
21589     bool sign = (opcode != Op_URShiftVS);
21590     int vlen_enc = Assembler::AVX_256bit;
21591     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21592     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21593     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21594     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21595     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21596 
21597     // Shift upper half, with result in dst using vtmp1 as TEMP
21598     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21599     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21600     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21601     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21602     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21603     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21604 
21605     // Merge lower and upper half result into dst
21606     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21607     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21608   %}
21609   ins_pipe( pipe_slow );
21610 %}
21611 
21612 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21613   predicate(n->as_ShiftV()->is_var_shift() &&
21614             VM_Version::supports_avx512bw());
21615   match(Set dst ( LShiftVS src shift));
21616   match(Set dst ( RShiftVS src shift));
21617   match(Set dst (URShiftVS src shift));
21618   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21619   ins_encode %{
21620     assert(UseAVX > 2, "required");
21621 
21622     int opcode = this->ideal_Opcode();
21623     int vlen_enc = vector_length_encoding(this);
21624     if (!VM_Version::supports_avx512vl()) {
21625       vlen_enc = Assembler::AVX_512bit;
21626     }
21627     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21628   %}
21629   ins_pipe( pipe_slow );
21630 %}
21631 
21632 //Integer variable shift
21633 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21634   predicate(n->as_ShiftV()->is_var_shift());
21635   match(Set dst ( LShiftVI src shift));
21636   match(Set dst ( RShiftVI src shift));
21637   match(Set dst (URShiftVI src shift));
21638   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21639   ins_encode %{
21640     assert(UseAVX >= 2, "required");
21641 
21642     int opcode = this->ideal_Opcode();
21643     int vlen_enc = vector_length_encoding(this);
21644     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21645   %}
21646   ins_pipe( pipe_slow );
21647 %}
21648 
21649 //Long variable shift
21650 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21651   predicate(n->as_ShiftV()->is_var_shift());
21652   match(Set dst ( LShiftVL src shift));
21653   match(Set dst (URShiftVL src shift));
21654   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21655   ins_encode %{
21656     assert(UseAVX >= 2, "required");
21657 
21658     int opcode = this->ideal_Opcode();
21659     int vlen_enc = vector_length_encoding(this);
21660     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21661   %}
21662   ins_pipe( pipe_slow );
21663 %}
21664 
21665 //Long variable right shift arithmetic
21666 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21667   predicate(Matcher::vector_length(n) <= 4 &&
21668             n->as_ShiftV()->is_var_shift() &&
21669             UseAVX == 2);
21670   match(Set dst (RShiftVL src shift));
21671   effect(TEMP dst, TEMP vtmp);
21672   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21673   ins_encode %{
21674     int opcode = this->ideal_Opcode();
21675     int vlen_enc = vector_length_encoding(this);
21676     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21677                  $vtmp$$XMMRegister);
21678   %}
21679   ins_pipe( pipe_slow );
21680 %}
21681 
21682 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21683   predicate(n->as_ShiftV()->is_var_shift() &&
21684             UseAVX > 2);
21685   match(Set dst (RShiftVL src shift));
21686   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21687   ins_encode %{
21688     int opcode = this->ideal_Opcode();
21689     int vlen_enc = vector_length_encoding(this);
21690     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21691   %}
21692   ins_pipe( pipe_slow );
21693 %}
21694 
21695 // --------------------------------- AND --------------------------------------
21696 
21697 instruct vand(vec dst, vec src) %{
21698   predicate(UseAVX == 0);
21699   match(Set dst (AndV dst src));
21700   format %{ "pand    $dst,$src\t! and vectors" %}
21701   ins_encode %{
21702     __ pand($dst$$XMMRegister, $src$$XMMRegister);
21703   %}
21704   ins_pipe( pipe_slow );
21705 %}
21706 
21707 instruct vand_reg(vec dst, vec src1, vec src2) %{
21708   predicate(UseAVX > 0);
21709   match(Set dst (AndV src1 src2));
21710   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
21711   ins_encode %{
21712     int vlen_enc = vector_length_encoding(this);
21713     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21714   %}
21715   ins_pipe( pipe_slow );
21716 %}
21717 
21718 instruct vand_mem(vec dst, vec src, memory mem) %{
21719   predicate((UseAVX > 0) &&
21720             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21721   match(Set dst (AndV src (LoadVector mem)));
21722   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
21723   ins_encode %{
21724     int vlen_enc = vector_length_encoding(this);
21725     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21726   %}
21727   ins_pipe( pipe_slow );
21728 %}
21729 
21730 // --------------------------------- OR ---------------------------------------
21731 
21732 instruct vor(vec dst, vec src) %{
21733   predicate(UseAVX == 0);
21734   match(Set dst (OrV dst src));
21735   format %{ "por     $dst,$src\t! or vectors" %}
21736   ins_encode %{
21737     __ por($dst$$XMMRegister, $src$$XMMRegister);
21738   %}
21739   ins_pipe( pipe_slow );
21740 %}
21741 
21742 instruct vor_reg(vec dst, vec src1, vec src2) %{
21743   predicate(UseAVX > 0);
21744   match(Set dst (OrV src1 src2));
21745   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
21746   ins_encode %{
21747     int vlen_enc = vector_length_encoding(this);
21748     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21749   %}
21750   ins_pipe( pipe_slow );
21751 %}
21752 
21753 instruct vor_mem(vec dst, vec src, memory mem) %{
21754   predicate((UseAVX > 0) &&
21755             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21756   match(Set dst (OrV src (LoadVector mem)));
21757   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
21758   ins_encode %{
21759     int vlen_enc = vector_length_encoding(this);
21760     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21761   %}
21762   ins_pipe( pipe_slow );
21763 %}
21764 
21765 // --------------------------------- XOR --------------------------------------
21766 
21767 instruct vxor(vec dst, vec src) %{
21768   predicate(UseAVX == 0);
21769   match(Set dst (XorV dst src));
21770   format %{ "pxor    $dst,$src\t! xor vectors" %}
21771   ins_encode %{
21772     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21773   %}
21774   ins_pipe( pipe_slow );
21775 %}
21776 
21777 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21778   predicate(UseAVX > 0);
21779   match(Set dst (XorV src1 src2));
21780   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
21781   ins_encode %{
21782     int vlen_enc = vector_length_encoding(this);
21783     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21784   %}
21785   ins_pipe( pipe_slow );
21786 %}
21787 
21788 instruct vxor_mem(vec dst, vec src, memory mem) %{
21789   predicate((UseAVX > 0) &&
21790             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21791   match(Set dst (XorV src (LoadVector mem)));
21792   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
21793   ins_encode %{
21794     int vlen_enc = vector_length_encoding(this);
21795     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21796   %}
21797   ins_pipe( pipe_slow );
21798 %}
21799 
21800 // --------------------------------- VectorCast --------------------------------------
21801 
21802 instruct vcastBtoX(vec dst, vec src) %{
21803   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21804   match(Set dst (VectorCastB2X src));
21805   format %{ "vector_cast_b2x $dst,$src\t!" %}
21806   ins_encode %{
21807     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21808     int vlen_enc = vector_length_encoding(this);
21809     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21810   %}
21811   ins_pipe( pipe_slow );
21812 %}
21813 
21814 instruct vcastBtoD(legVec dst, legVec src) %{
21815   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21816   match(Set dst (VectorCastB2X src));
21817   format %{ "vector_cast_b2x $dst,$src\t!" %}
21818   ins_encode %{
21819     int vlen_enc = vector_length_encoding(this);
21820     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21821   %}
21822   ins_pipe( pipe_slow );
21823 %}
21824 
21825 instruct castStoX(vec dst, vec src) %{
21826   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21827             Matcher::vector_length(n->in(1)) <= 8 && // src
21828             Matcher::vector_element_basic_type(n) == T_BYTE);
21829   match(Set dst (VectorCastS2X src));
21830   format %{ "vector_cast_s2x $dst,$src" %}
21831   ins_encode %{
21832     assert(UseAVX > 0, "required");
21833 
21834     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21835     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21836   %}
21837   ins_pipe( pipe_slow );
21838 %}
21839 
21840 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21841   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21842             Matcher::vector_length(n->in(1)) == 16 && // src
21843             Matcher::vector_element_basic_type(n) == T_BYTE);
21844   effect(TEMP dst, TEMP vtmp);
21845   match(Set dst (VectorCastS2X src));
21846   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21847   ins_encode %{
21848     assert(UseAVX > 0, "required");
21849 
21850     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
21851     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21852     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
21853     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21854   %}
21855   ins_pipe( pipe_slow );
21856 %}
21857 
21858 instruct vcastStoX_evex(vec dst, vec src) %{
21859   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
21860             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21861   match(Set dst (VectorCastS2X src));
21862   format %{ "vector_cast_s2x $dst,$src\t!" %}
21863   ins_encode %{
21864     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21865     int src_vlen_enc = vector_length_encoding(this, $src);
21866     int vlen_enc = vector_length_encoding(this);
21867     switch (to_elem_bt) {
21868       case T_BYTE:
21869         if (!VM_Version::supports_avx512vl()) {
21870           vlen_enc = Assembler::AVX_512bit;
21871         }
21872         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21873         break;
21874       case T_INT:
21875         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21876         break;
21877       case T_FLOAT:
21878         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21879         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21880         break;
21881       case T_LONG:
21882         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21883         break;
21884       case T_DOUBLE: {
21885         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
21886         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
21887         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21888         break;
21889       }
21890       default:
21891         ShouldNotReachHere();
21892     }
21893   %}
21894   ins_pipe( pipe_slow );
21895 %}
21896 
21897 instruct castItoX(vec dst, vec src) %{
21898   predicate(UseAVX <= 2 &&
21899             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
21900             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21901   match(Set dst (VectorCastI2X src));
21902   format %{ "vector_cast_i2x $dst,$src" %}
21903   ins_encode %{
21904     assert(UseAVX > 0, "required");
21905 
21906     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21907     int vlen_enc = vector_length_encoding(this, $src);
21908 
21909     if (to_elem_bt == T_BYTE) {
21910       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21911       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21912       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21913     } else {
21914       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21915       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21916       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21917     }
21918   %}
21919   ins_pipe( pipe_slow );
21920 %}
21921 
21922 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
21923   predicate(UseAVX <= 2 &&
21924             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
21925             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21926   match(Set dst (VectorCastI2X src));
21927   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
21928   effect(TEMP dst, TEMP vtmp);
21929   ins_encode %{
21930     assert(UseAVX > 0, "required");
21931 
21932     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21933     int vlen_enc = vector_length_encoding(this, $src);
21934 
21935     if (to_elem_bt == T_BYTE) {
21936       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21937       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21938       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21939       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21940     } else {
21941       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21942       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21943       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21944       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21945     }
21946   %}
21947   ins_pipe( pipe_slow );
21948 %}
21949 
21950 instruct vcastItoX_evex(vec dst, vec src) %{
21951   predicate(UseAVX > 2 ||
21952             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21953   match(Set dst (VectorCastI2X src));
21954   format %{ "vector_cast_i2x $dst,$src\t!" %}
21955   ins_encode %{
21956     assert(UseAVX > 0, "required");
21957 
21958     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
21959     int src_vlen_enc = vector_length_encoding(this, $src);
21960     int dst_vlen_enc = vector_length_encoding(this);
21961     switch (dst_elem_bt) {
21962       case T_BYTE:
21963         if (!VM_Version::supports_avx512vl()) {
21964           src_vlen_enc = Assembler::AVX_512bit;
21965         }
21966         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21967         break;
21968       case T_SHORT:
21969         if (!VM_Version::supports_avx512vl()) {
21970           src_vlen_enc = Assembler::AVX_512bit;
21971         }
21972         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21973         break;
21974       case T_FLOAT:
21975         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21976         break;
21977       case T_LONG:
21978         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21979         break;
21980       case T_DOUBLE:
21981         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21982         break;
21983       default:
21984         ShouldNotReachHere();
21985     }
21986   %}
21987   ins_pipe( pipe_slow );
21988 %}
21989 
21990 instruct vcastLtoBS(vec dst, vec src) %{
21991   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
21992             UseAVX <= 2);
21993   match(Set dst (VectorCastL2X src));
21994   format %{ "vector_cast_l2x  $dst,$src" %}
21995   ins_encode %{
21996     assert(UseAVX > 0, "required");
21997 
21998     int vlen = Matcher::vector_length_in_bytes(this, $src);
21999     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
22000     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22001                                                       : ExternalAddress(vector_int_to_short_mask());
22002     if (vlen <= 16) {
22003       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22004       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22005       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22006     } else {
22007       assert(vlen <= 32, "required");
22008       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22009       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22010       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22011       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22012     }
22013     if (to_elem_bt == T_BYTE) {
22014       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22015     }
22016   %}
22017   ins_pipe( pipe_slow );
22018 %}
22019 
22020 instruct vcastLtoX_evex(vec dst, vec src) %{
22021   predicate(UseAVX > 2 ||
22022             (Matcher::vector_element_basic_type(n) == T_INT ||
22023              Matcher::vector_element_basic_type(n) == T_FLOAT ||
22024              Matcher::vector_element_basic_type(n) == T_DOUBLE));
22025   match(Set dst (VectorCastL2X src));
22026   format %{ "vector_cast_l2x  $dst,$src\t!" %}
22027   ins_encode %{
22028     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22029     int vlen = Matcher::vector_length_in_bytes(this, $src);
22030     int vlen_enc = vector_length_encoding(this, $src);
22031     switch (to_elem_bt) {
22032       case T_BYTE:
22033         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22034           vlen_enc = Assembler::AVX_512bit;
22035         }
22036         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22037         break;
22038       case T_SHORT:
22039         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22040           vlen_enc = Assembler::AVX_512bit;
22041         }
22042         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22043         break;
22044       case T_INT:
22045         if (vlen == 8) {
22046           if ($dst$$XMMRegister != $src$$XMMRegister) {
22047             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22048           }
22049         } else if (vlen == 16) {
22050           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22051         } else if (vlen == 32) {
22052           if (UseAVX > 2) {
22053             if (!VM_Version::supports_avx512vl()) {
22054               vlen_enc = Assembler::AVX_512bit;
22055             }
22056             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22057           } else {
22058             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22059             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22060           }
22061         } else { // vlen == 64
22062           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22063         }
22064         break;
22065       case T_FLOAT:
22066         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22067         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22068         break;
22069       case T_DOUBLE:
22070         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22071         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22072         break;
22073 
22074       default: assert(false, "%s", type2name(to_elem_bt));
22075     }
22076   %}
22077   ins_pipe( pipe_slow );
22078 %}
22079 
22080 instruct vcastFtoD_reg(vec dst, vec src) %{
22081   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22082   match(Set dst (VectorCastF2X src));
22083   format %{ "vector_cast_f2d  $dst,$src\t!" %}
22084   ins_encode %{
22085     int vlen_enc = vector_length_encoding(this);
22086     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22087   %}
22088   ins_pipe( pipe_slow );
22089 %}
22090 
22091 
22092 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22093   predicate(!VM_Version::supports_avx10_2() &&
22094             !VM_Version::supports_avx512vl() &&
22095             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22096             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22097             is_integral_type(Matcher::vector_element_basic_type(n)));
22098   match(Set dst (VectorCastF2X src));
22099   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22100   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22101   ins_encode %{
22102     int vlen_enc = vector_length_encoding(this, $src);
22103     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22104     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22105     // 32 bit addresses for register indirect addressing mode since stub constants
22106     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22107     // However, targets are free to increase this limit, but having a large code cache size
22108     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22109     // cap we save a temporary register allocation which in limiting case can prevent
22110     // spilling in high register pressure blocks.
22111     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22112                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22113                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22114   %}
22115   ins_pipe( pipe_slow );
22116 %}
22117 
22118 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22119   predicate(!VM_Version::supports_avx10_2() &&
22120             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22121             is_integral_type(Matcher::vector_element_basic_type(n)));
22122   match(Set dst (VectorCastF2X src));
22123   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22124   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22125   ins_encode %{
22126     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22127     if (to_elem_bt == T_LONG) {
22128       int vlen_enc = vector_length_encoding(this);
22129       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22130                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22131                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22132     } else {
22133       int vlen_enc = vector_length_encoding(this, $src);
22134       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22135                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22136                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22137     }
22138   %}
22139   ins_pipe( pipe_slow );
22140 %}
22141 
22142 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22143   predicate(VM_Version::supports_avx10_2() &&
22144             is_integral_type(Matcher::vector_element_basic_type(n)));
22145   match(Set dst (VectorCastF2X src));
22146   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22147   ins_encode %{
22148     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22149     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22150     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22151   %}
22152   ins_pipe( pipe_slow );
22153 %}
22154 
22155 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22156   predicate(VM_Version::supports_avx10_2() &&
22157             is_integral_type(Matcher::vector_element_basic_type(n)));
22158   match(Set dst (VectorCastF2X (LoadVector src)));
22159   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22160   ins_encode %{
22161     int vlen = Matcher::vector_length(this);
22162     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22163     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22164     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22165   %}
22166   ins_pipe( pipe_slow );
22167 %}
22168 
22169 instruct vcastDtoF_reg(vec dst, vec src) %{
22170   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22171   match(Set dst (VectorCastD2X src));
22172   format %{ "vector_cast_d2x  $dst,$src\t!" %}
22173   ins_encode %{
22174     int vlen_enc = vector_length_encoding(this, $src);
22175     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22176   %}
22177   ins_pipe( pipe_slow );
22178 %}
22179 
22180 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22181   predicate(!VM_Version::supports_avx10_2() &&
22182             !VM_Version::supports_avx512vl() &&
22183             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22184             is_integral_type(Matcher::vector_element_basic_type(n)));
22185   match(Set dst (VectorCastD2X src));
22186   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22187   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22188   ins_encode %{
22189     int vlen_enc = vector_length_encoding(this, $src);
22190     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22191     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22192                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22193                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22194   %}
22195   ins_pipe( pipe_slow );
22196 %}
22197 
22198 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22199   predicate(!VM_Version::supports_avx10_2() &&
22200             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22201             is_integral_type(Matcher::vector_element_basic_type(n)));
22202   match(Set dst (VectorCastD2X src));
22203   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22204   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22205   ins_encode %{
22206     int vlen_enc = vector_length_encoding(this, $src);
22207     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22208     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22209                               ExternalAddress(vector_float_signflip());
22210     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22211                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22212   %}
22213   ins_pipe( pipe_slow );
22214 %}
22215 
22216 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22217   predicate(VM_Version::supports_avx10_2() &&
22218             is_integral_type(Matcher::vector_element_basic_type(n)));
22219   match(Set dst (VectorCastD2X src));
22220   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22221   ins_encode %{
22222     int vlen_enc = vector_length_encoding(this, $src);
22223     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22224     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22225   %}
22226   ins_pipe( pipe_slow );
22227 %}
22228 
22229 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22230   predicate(VM_Version::supports_avx10_2() &&
22231             is_integral_type(Matcher::vector_element_basic_type(n)));
22232   match(Set dst (VectorCastD2X (LoadVector src)));
22233   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22234   ins_encode %{
22235     int vlen = Matcher::vector_length(this);
22236     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22237     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22238     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22239   %}
22240   ins_pipe( pipe_slow );
22241 %}
22242 
22243 instruct vucast(vec dst, vec src) %{
22244   match(Set dst (VectorUCastB2X src));
22245   match(Set dst (VectorUCastS2X src));
22246   match(Set dst (VectorUCastI2X src));
22247   format %{ "vector_ucast $dst,$src\t!" %}
22248   ins_encode %{
22249     assert(UseAVX > 0, "required");
22250 
22251     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22252     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22253     int vlen_enc = vector_length_encoding(this);
22254     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22255   %}
22256   ins_pipe( pipe_slow );
22257 %}
22258 
22259 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22260   predicate(!VM_Version::supports_avx512vl() &&
22261             Matcher::vector_length_in_bytes(n) < 64 &&
22262             Matcher::vector_element_basic_type(n) == T_INT);
22263   match(Set dst (RoundVF src));
22264   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22265   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22266   ins_encode %{
22267     int vlen_enc = vector_length_encoding(this);
22268     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22269     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22270                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22271                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22272   %}
22273   ins_pipe( pipe_slow );
22274 %}
22275 
22276 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22277   predicate((VM_Version::supports_avx512vl() ||
22278              Matcher::vector_length_in_bytes(n) == 64) &&
22279              Matcher::vector_element_basic_type(n) == T_INT);
22280   match(Set dst (RoundVF src));
22281   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22282   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22283   ins_encode %{
22284     int vlen_enc = vector_length_encoding(this);
22285     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22286     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22287                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22288                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22289   %}
22290   ins_pipe( pipe_slow );
22291 %}
22292 
22293 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22294   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22295   match(Set dst (RoundVD src));
22296   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
22297   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22298   ins_encode %{
22299     int vlen_enc = vector_length_encoding(this);
22300     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22301     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22302                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22303                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22304   %}
22305   ins_pipe( pipe_slow );
22306 %}
22307 
22308 // --------------------------------- VectorMaskCmp --------------------------------------
22309 
22310 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22311   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22312             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
22313             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22314             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22315   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22316   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22317   ins_encode %{
22318     int vlen_enc = vector_length_encoding(this, $src1);
22319     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22320     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22321       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22322     } else {
22323       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22324     }
22325   %}
22326   ins_pipe( pipe_slow );
22327 %}
22328 
22329 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22330   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22331             n->bottom_type()->isa_vectmask() == nullptr &&
22332             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22333   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22334   effect(TEMP ktmp);
22335   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22336   ins_encode %{
22337     int vlen_enc = Assembler::AVX_512bit;
22338     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22339     KRegister mask = k0; // The comparison itself is not being masked.
22340     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22341       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22342       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22343     } else {
22344       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22345       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22346     }
22347   %}
22348   ins_pipe( pipe_slow );
22349 %}
22350 
22351 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22352   predicate(n->bottom_type()->isa_vectmask() &&
22353             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22354   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22355   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22356   ins_encode %{
22357     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22358     int vlen_enc = vector_length_encoding(this, $src1);
22359     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22360     KRegister mask = k0; // The comparison itself is not being masked.
22361     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22362       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22363     } else {
22364       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22365     }
22366   %}
22367   ins_pipe( pipe_slow );
22368 %}
22369 
22370 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22371   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22372             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22373             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22374             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22375             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22376             (n->in(2)->get_int() == BoolTest::eq ||
22377              n->in(2)->get_int() == BoolTest::lt ||
22378              n->in(2)->get_int() == BoolTest::gt)); // cond
22379   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22380   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22381   ins_encode %{
22382     int vlen_enc = vector_length_encoding(this, $src1);
22383     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22384     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22385     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22386   %}
22387   ins_pipe( pipe_slow );
22388 %}
22389 
22390 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22391   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22392             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22393             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22394             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22395             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22396             (n->in(2)->get_int() == BoolTest::ne ||
22397              n->in(2)->get_int() == BoolTest::le ||
22398              n->in(2)->get_int() == BoolTest::ge)); // cond
22399   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22400   effect(TEMP dst, TEMP xtmp);
22401   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22402   ins_encode %{
22403     int vlen_enc = vector_length_encoding(this, $src1);
22404     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22405     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22406     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22407   %}
22408   ins_pipe( pipe_slow );
22409 %}
22410 
22411 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22412   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22413             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22414             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22415             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22416             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22417   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22418   effect(TEMP dst, TEMP xtmp);
22419   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22420   ins_encode %{
22421     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22422     int vlen_enc = vector_length_encoding(this, $src1);
22423     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22424     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22425 
22426     if (vlen_enc == Assembler::AVX_128bit) {
22427       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22428     } else {
22429       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22430     }
22431     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22432     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22433     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22434   %}
22435   ins_pipe( pipe_slow );
22436 %}
22437 
22438 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22439   predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22440              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22441              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22442   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22443   effect(TEMP ktmp);
22444   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22445   ins_encode %{
22446     assert(UseAVX > 2, "required");
22447 
22448     int vlen_enc = vector_length_encoding(this, $src1);
22449     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22450     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22451     KRegister mask = k0; // The comparison itself is not being masked.
22452     bool merge = false;
22453     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22454 
22455     switch (src1_elem_bt) {
22456       case T_INT: {
22457         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22458         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22459         break;
22460       }
22461       case T_LONG: {
22462         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22463         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22464         break;
22465       }
22466       default: assert(false, "%s", type2name(src1_elem_bt));
22467     }
22468   %}
22469   ins_pipe( pipe_slow );
22470 %}
22471 
22472 
22473 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22474   predicate(n->bottom_type()->isa_vectmask() &&
22475             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22476   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22477   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22478   ins_encode %{
22479     assert(UseAVX > 2, "required");
22480     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22481 
22482     int vlen_enc = vector_length_encoding(this, $src1);
22483     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22484     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22485     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22486 
22487     // Comparison i
22488     switch (src1_elem_bt) {
22489       case T_BYTE: {
22490         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22491         break;
22492       }
22493       case T_SHORT: {
22494         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22495         break;
22496       }
22497       case T_INT: {
22498         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22499         break;
22500       }
22501       case T_LONG: {
22502         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22503         break;
22504       }
22505       default: assert(false, "%s", type2name(src1_elem_bt));
22506     }
22507   %}
22508   ins_pipe( pipe_slow );
22509 %}
22510 
22511 // Extract
22512 
22513 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22514   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22515   match(Set dst (ExtractI src idx));
22516   match(Set dst (ExtractS src idx));
22517   match(Set dst (ExtractB src idx));
22518   format %{ "extractI $dst,$src,$idx\t!" %}
22519   ins_encode %{
22520     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22521 
22522     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22523     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22524   %}
22525   ins_pipe( pipe_slow );
22526 %}
22527 
22528 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22529   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22530             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
22531   match(Set dst (ExtractI src idx));
22532   match(Set dst (ExtractS src idx));
22533   match(Set dst (ExtractB src idx));
22534   effect(TEMP vtmp);
22535   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22536   ins_encode %{
22537     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22538 
22539     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22540     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22541     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22542   %}
22543   ins_pipe( pipe_slow );
22544 %}
22545 
22546 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22547   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22548   match(Set dst (ExtractL src idx));
22549   format %{ "extractL $dst,$src,$idx\t!" %}
22550   ins_encode %{
22551     assert(UseSSE >= 4, "required");
22552     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22553 
22554     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22555   %}
22556   ins_pipe( pipe_slow );
22557 %}
22558 
22559 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22560   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22561             Matcher::vector_length(n->in(1)) == 8);  // src
22562   match(Set dst (ExtractL src idx));
22563   effect(TEMP vtmp);
22564   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22565   ins_encode %{
22566     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22567 
22568     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22569     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22570   %}
22571   ins_pipe( pipe_slow );
22572 %}
22573 
22574 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22575   predicate(Matcher::vector_length(n->in(1)) <= 4);
22576   match(Set dst (ExtractF src idx));
22577   effect(TEMP dst, TEMP vtmp);
22578   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22579   ins_encode %{
22580     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22581 
22582     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22583   %}
22584   ins_pipe( pipe_slow );
22585 %}
22586 
22587 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22588   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22589             Matcher::vector_length(n->in(1)/*src*/) == 16);
22590   match(Set dst (ExtractF src idx));
22591   effect(TEMP vtmp);
22592   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22593   ins_encode %{
22594     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22595 
22596     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22597     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22598   %}
22599   ins_pipe( pipe_slow );
22600 %}
22601 
22602 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22603   predicate(Matcher::vector_length(n->in(1)) == 2); // src
22604   match(Set dst (ExtractD src idx));
22605   format %{ "extractD $dst,$src,$idx\t!" %}
22606   ins_encode %{
22607     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22608 
22609     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22610   %}
22611   ins_pipe( pipe_slow );
22612 %}
22613 
22614 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22615   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22616             Matcher::vector_length(n->in(1)) == 8);  // src
22617   match(Set dst (ExtractD src idx));
22618   effect(TEMP vtmp);
22619   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22620   ins_encode %{
22621     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22622 
22623     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22624     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22625   %}
22626   ins_pipe( pipe_slow );
22627 %}
22628 
22629 // --------------------------------- Vector Blend --------------------------------------
22630 
22631 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22632   predicate(UseAVX == 0);
22633   match(Set dst (VectorBlend (Binary dst src) mask));
22634   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
22635   effect(TEMP tmp);
22636   ins_encode %{
22637     assert(UseSSE >= 4, "required");
22638 
22639     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22640       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22641     }
22642     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22643   %}
22644   ins_pipe( pipe_slow );
22645 %}
22646 
22647 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22648   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22649             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22650             Matcher::vector_length_in_bytes(n) <= 32 &&
22651             is_integral_type(Matcher::vector_element_basic_type(n)));
22652   match(Set dst (VectorBlend (Binary src1 src2) mask));
22653   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22654   ins_encode %{
22655     int vlen_enc = vector_length_encoding(this);
22656     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22657   %}
22658   ins_pipe( pipe_slow );
22659 %}
22660 
22661 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22662   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22663             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22664             Matcher::vector_length_in_bytes(n) <= 32 &&
22665             !is_integral_type(Matcher::vector_element_basic_type(n)));
22666   match(Set dst (VectorBlend (Binary src1 src2) mask));
22667   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22668   ins_encode %{
22669     int vlen_enc = vector_length_encoding(this);
22670     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22671   %}
22672   ins_pipe( pipe_slow );
22673 %}
22674 
22675 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22676   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22677             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22678             Matcher::vector_length_in_bytes(n) <= 32);
22679   match(Set dst (VectorBlend (Binary src1 src2) mask));
22680   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22681   effect(TEMP vtmp, TEMP dst);
22682   ins_encode %{
22683     int vlen_enc = vector_length_encoding(this);
22684     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22685     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22686     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
22687   %}
22688   ins_pipe( pipe_slow );
22689 %}
22690 
22691 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22692   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22693             n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22694   match(Set dst (VectorBlend (Binary src1 src2) mask));
22695   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22696   effect(TEMP ktmp);
22697   ins_encode %{
22698      int vlen_enc = Assembler::AVX_512bit;
22699      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22700     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22701     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22702   %}
22703   ins_pipe( pipe_slow );
22704 %}
22705 
22706 
22707 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22708   predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22709             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22710              VM_Version::supports_avx512bw()));
22711   match(Set dst (VectorBlend (Binary src1 src2) mask));
22712   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22713   ins_encode %{
22714     int vlen_enc = vector_length_encoding(this);
22715     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22716     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22717   %}
22718   ins_pipe( pipe_slow );
22719 %}
22720 
22721 // --------------------------------- ABS --------------------------------------
22722 // a = |a|
22723 instruct vabsB_reg(vec dst, vec src) %{
22724   match(Set dst (AbsVB  src));
22725   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22726   ins_encode %{
22727     uint vlen = Matcher::vector_length(this);
22728     if (vlen <= 16) {
22729       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22730     } else {
22731       int vlen_enc = vector_length_encoding(this);
22732       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22733     }
22734   %}
22735   ins_pipe( pipe_slow );
22736 %}
22737 
22738 instruct vabsS_reg(vec dst, vec src) %{
22739   match(Set dst (AbsVS  src));
22740   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22741   ins_encode %{
22742     uint vlen = Matcher::vector_length(this);
22743     if (vlen <= 8) {
22744       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22745     } else {
22746       int vlen_enc = vector_length_encoding(this);
22747       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22748     }
22749   %}
22750   ins_pipe( pipe_slow );
22751 %}
22752 
22753 instruct vabsI_reg(vec dst, vec src) %{
22754   match(Set dst (AbsVI  src));
22755   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22756   ins_encode %{
22757     uint vlen = Matcher::vector_length(this);
22758     if (vlen <= 4) {
22759       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22760     } else {
22761       int vlen_enc = vector_length_encoding(this);
22762       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22763     }
22764   %}
22765   ins_pipe( pipe_slow );
22766 %}
22767 
22768 instruct vabsL_reg(vec dst, vec src) %{
22769   match(Set dst (AbsVL  src));
22770   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22771   ins_encode %{
22772     assert(UseAVX > 2, "required");
22773     int vlen_enc = vector_length_encoding(this);
22774     if (!VM_Version::supports_avx512vl()) {
22775       vlen_enc = Assembler::AVX_512bit;
22776     }
22777     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22778   %}
22779   ins_pipe( pipe_slow );
22780 %}
22781 
22782 // --------------------------------- ABSNEG --------------------------------------
22783 
22784 instruct vabsnegF(vec dst, vec src) %{
22785   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22786   match(Set dst (AbsVF src));
22787   match(Set dst (NegVF src));
22788   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22789   ins_cost(150);
22790   ins_encode %{
22791     int opcode = this->ideal_Opcode();
22792     int vlen = Matcher::vector_length(this);
22793     if (vlen == 2) {
22794       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22795     } else {
22796       assert(vlen == 8 || vlen == 16, "required");
22797       int vlen_enc = vector_length_encoding(this);
22798       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22799     }
22800   %}
22801   ins_pipe( pipe_slow );
22802 %}
22803 
22804 instruct vabsneg4F(vec dst) %{
22805   predicate(Matcher::vector_length(n) == 4);
22806   match(Set dst (AbsVF dst));
22807   match(Set dst (NegVF dst));
22808   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22809   ins_cost(150);
22810   ins_encode %{
22811     int opcode = this->ideal_Opcode();
22812     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22813   %}
22814   ins_pipe( pipe_slow );
22815 %}
22816 
22817 instruct vabsnegD(vec dst, vec src) %{
22818   match(Set dst (AbsVD  src));
22819   match(Set dst (NegVD  src));
22820   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22821   ins_encode %{
22822     int opcode = this->ideal_Opcode();
22823     uint vlen = Matcher::vector_length(this);
22824     if (vlen == 2) {
22825       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22826     } else {
22827       int vlen_enc = vector_length_encoding(this);
22828       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22829     }
22830   %}
22831   ins_pipe( pipe_slow );
22832 %}
22833 
22834 //------------------------------------- VectorTest --------------------------------------------
22835 
22836 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22837   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22838   match(Set cr (VectorTest src1 src2));
22839   effect(TEMP vtmp);
22840   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
22841   ins_encode %{
22842     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22843     int vlen = Matcher::vector_length_in_bytes(this, $src1);
22844     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22845   %}
22846   ins_pipe( pipe_slow );
22847 %}
22848 
22849 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
22850   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
22851   match(Set cr (VectorTest src1 src2));
22852   format %{ "vptest_ge16  $src1, $src2\n\t" %}
22853   ins_encode %{
22854     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22855     int vlen = Matcher::vector_length_in_bytes(this, $src1);
22856     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
22857   %}
22858   ins_pipe( pipe_slow );
22859 %}
22860 
22861 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22862   predicate((Matcher::vector_length(n->in(1)) < 8 ||
22863              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22864             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
22865   match(Set cr (VectorTest src1 src2));
22866   effect(TEMP tmp);
22867   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
22868   ins_encode %{
22869     uint masklen = Matcher::vector_length(this, $src1);
22870     __ kmovwl($tmp$$Register, $src1$$KRegister);
22871     __ andl($tmp$$Register, (1 << masklen) - 1);
22872     __ cmpl($tmp$$Register, (1 << masklen) - 1);
22873   %}
22874   ins_pipe( pipe_slow );
22875 %}
22876 
22877 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22878   predicate((Matcher::vector_length(n->in(1)) < 8 ||
22879              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22880             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
22881   match(Set cr (VectorTest src1 src2));
22882   effect(TEMP tmp);
22883   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
22884   ins_encode %{
22885     uint masklen = Matcher::vector_length(this, $src1);
22886     __ kmovwl($tmp$$Register, $src1$$KRegister);
22887     __ andl($tmp$$Register, (1 << masklen) - 1);
22888   %}
22889   ins_pipe( pipe_slow );
22890 %}
22891 
22892 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
22893   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
22894             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
22895   match(Set cr (VectorTest src1 src2));
22896   format %{ "ktest_ge8  $src1, $src2\n\t" %}
22897   ins_encode %{
22898     uint masklen = Matcher::vector_length(this, $src1);
22899     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
22900   %}
22901   ins_pipe( pipe_slow );
22902 %}
22903 
22904 //------------------------------------- LoadMask --------------------------------------------
22905 
22906 instruct loadMask(legVec dst, legVec src) %{
22907   predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
22908   match(Set dst (VectorLoadMask src));
22909   effect(TEMP dst);
22910   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
22911   ins_encode %{
22912     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22913     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22914     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
22915   %}
22916   ins_pipe( pipe_slow );
22917 %}
22918 
22919 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
22920   predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
22921   match(Set dst (VectorLoadMask src));
22922   effect(TEMP xtmp);
22923   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
22924   ins_encode %{
22925     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22926                         true, Assembler::AVX_512bit);
22927   %}
22928   ins_pipe( pipe_slow );
22929 %}
22930 
22931 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
22932   predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
22933   match(Set dst (VectorLoadMask src));
22934   effect(TEMP xtmp);
22935   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
22936   ins_encode %{
22937     int vlen_enc = vector_length_encoding(in(1));
22938     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22939                         false, vlen_enc);
22940   %}
22941   ins_pipe( pipe_slow );
22942 %}
22943 
22944 //------------------------------------- StoreMask --------------------------------------------
22945 
22946 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
22947   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22948   match(Set dst (VectorStoreMask src size));
22949   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22950   ins_encode %{
22951     int vlen = Matcher::vector_length(this);
22952     if (vlen <= 16 && UseAVX <= 2) {
22953       assert(UseSSE >= 3, "required");
22954       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22955     } else {
22956       assert(UseAVX > 0, "required");
22957       int src_vlen_enc = vector_length_encoding(this, $src);
22958       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22959     }
22960   %}
22961   ins_pipe( pipe_slow );
22962 %}
22963 
22964 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
22965   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22966   match(Set dst (VectorStoreMask src size));
22967   effect(TEMP_DEF dst, TEMP xtmp);
22968   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22969   ins_encode %{
22970     int vlen_enc = Assembler::AVX_128bit;
22971     int vlen = Matcher::vector_length(this);
22972     if (vlen <= 8) {
22973       assert(UseSSE >= 3, "required");
22974       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22975       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22976       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22977     } else {
22978       assert(UseAVX > 0, "required");
22979       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22980       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22981       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22982     }
22983   %}
22984   ins_pipe( pipe_slow );
22985 %}
22986 
22987 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
22988   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22989   match(Set dst (VectorStoreMask src size));
22990   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22991   effect(TEMP_DEF dst, TEMP xtmp);
22992   ins_encode %{
22993     int vlen_enc = Assembler::AVX_128bit;
22994     int vlen = Matcher::vector_length(this);
22995     if (vlen <= 4) {
22996       assert(UseSSE >= 3, "required");
22997       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22998       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22999       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23000       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23001     } else {
23002       assert(UseAVX > 0, "required");
23003       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23004       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23005       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23006       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23007       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23008     }
23009   %}
23010   ins_pipe( pipe_slow );
23011 %}
23012 
23013 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23014   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23015   match(Set dst (VectorStoreMask src size));
23016   effect(TEMP_DEF dst, TEMP xtmp);
23017   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23018   ins_encode %{
23019     assert(UseSSE >= 3, "required");
23020     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23021     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23022     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23023     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23024     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23025   %}
23026   ins_pipe( pipe_slow );
23027 %}
23028 
23029 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23030   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23031   match(Set dst (VectorStoreMask src size));
23032   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23033   effect(TEMP_DEF dst, TEMP vtmp);
23034   ins_encode %{
23035     int vlen_enc = Assembler::AVX_128bit;
23036     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23037     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23038     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23039     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23040     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23041     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23042     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23043   %}
23044   ins_pipe( pipe_slow );
23045 %}
23046 
23047 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23048   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23049   match(Set dst (VectorStoreMask src size));
23050   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23051   ins_encode %{
23052     int src_vlen_enc = vector_length_encoding(this, $src);
23053     int dst_vlen_enc = vector_length_encoding(this);
23054     if (!VM_Version::supports_avx512vl()) {
23055       src_vlen_enc = Assembler::AVX_512bit;
23056     }
23057     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23058     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23059   %}
23060   ins_pipe( pipe_slow );
23061 %}
23062 
23063 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23064   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23065   match(Set dst (VectorStoreMask src size));
23066   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23067   ins_encode %{
23068     int src_vlen_enc = vector_length_encoding(this, $src);
23069     int dst_vlen_enc = vector_length_encoding(this);
23070     if (!VM_Version::supports_avx512vl()) {
23071       src_vlen_enc = Assembler::AVX_512bit;
23072     }
23073     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23074     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23075   %}
23076   ins_pipe( pipe_slow );
23077 %}
23078 
23079 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23080   predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23081   match(Set dst (VectorStoreMask mask size));
23082   effect(TEMP_DEF dst);
23083   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23084   ins_encode %{
23085     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23086     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23087                  false, Assembler::AVX_512bit, noreg);
23088     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23089   %}
23090   ins_pipe( pipe_slow );
23091 %}
23092 
23093 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23094   predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23095   match(Set dst (VectorStoreMask mask size));
23096   effect(TEMP_DEF dst);
23097   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23098   ins_encode %{
23099     int dst_vlen_enc = vector_length_encoding(this);
23100     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23101     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23102   %}
23103   ins_pipe( pipe_slow );
23104 %}
23105 
23106 instruct vmaskcast_evex(kReg dst) %{
23107   match(Set dst (VectorMaskCast dst));
23108   ins_cost(0);
23109   format %{ "vector_mask_cast $dst" %}
23110   ins_encode %{
23111     // empty
23112   %}
23113   ins_pipe(empty);
23114 %}
23115 
23116 instruct vmaskcast(vec dst) %{
23117   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23118   match(Set dst (VectorMaskCast dst));
23119   ins_cost(0);
23120   format %{ "vector_mask_cast $dst" %}
23121   ins_encode %{
23122     // empty
23123   %}
23124   ins_pipe(empty);
23125 %}
23126 
23127 instruct vmaskcast_avx(vec dst, vec src) %{
23128   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23129   match(Set dst (VectorMaskCast src));
23130   format %{ "vector_mask_cast $dst, $src" %}
23131   ins_encode %{
23132     int vlen = Matcher::vector_length(this);
23133     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23134     BasicType dst_bt = Matcher::vector_element_basic_type(this);
23135     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23136   %}
23137   ins_pipe(pipe_slow);
23138 %}
23139 
23140 //-------------------------------- Load Iota Indices ----------------------------------
23141 
23142 instruct loadIotaIndices(vec dst, immI_0 src) %{
23143   match(Set dst (VectorLoadConst src));
23144   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23145   ins_encode %{
23146      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23147      BasicType bt = Matcher::vector_element_basic_type(this);
23148      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23149   %}
23150   ins_pipe( pipe_slow );
23151 %}
23152 
23153 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23154   match(Set dst (PopulateIndex src1 src2));
23155   effect(TEMP dst, TEMP vtmp);
23156   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23157   ins_encode %{
23158      assert($src2$$constant == 1, "required");
23159      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23160      int vlen_enc = vector_length_encoding(this);
23161      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23162      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23163      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23164      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23165   %}
23166   ins_pipe( pipe_slow );
23167 %}
23168 
23169 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23170   match(Set dst (PopulateIndex src1 src2));
23171   effect(TEMP dst, TEMP vtmp);
23172   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23173   ins_encode %{
23174      assert($src2$$constant == 1, "required");
23175      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23176      int vlen_enc = vector_length_encoding(this);
23177      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23178      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23179      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23180      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23181   %}
23182   ins_pipe( pipe_slow );
23183 %}
23184 
23185 //-------------------------------- Rearrange ----------------------------------
23186 
23187 // LoadShuffle/Rearrange for Byte
23188 instruct rearrangeB(vec dst, vec shuffle) %{
23189   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23190             Matcher::vector_length(n) < 32);
23191   match(Set dst (VectorRearrange dst shuffle));
23192   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23193   ins_encode %{
23194     assert(UseSSE >= 4, "required");
23195     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23196   %}
23197   ins_pipe( pipe_slow );
23198 %}
23199 
23200 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23201   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23202             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23203   match(Set dst (VectorRearrange src shuffle));
23204   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23205   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23206   ins_encode %{
23207     assert(UseAVX >= 2, "required");
23208     // Swap src into vtmp1
23209     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23210     // Shuffle swapped src to get entries from other 128 bit lane
23211     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23212     // Shuffle original src to get entries from self 128 bit lane
23213     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23214     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23215     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23216     // Perform the blend
23217     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23218   %}
23219   ins_pipe( pipe_slow );
23220 %}
23221 
23222 
23223 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23224   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23225             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23226   match(Set dst (VectorRearrange src shuffle));
23227   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23228   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23229   ins_encode %{
23230     int vlen_enc = vector_length_encoding(this);
23231     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23232                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23233                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23234   %}
23235   ins_pipe( pipe_slow );
23236 %}
23237 
23238 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23239   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23240             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23241   match(Set dst (VectorRearrange src shuffle));
23242   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23243   ins_encode %{
23244     int vlen_enc = vector_length_encoding(this);
23245     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23246   %}
23247   ins_pipe( pipe_slow );
23248 %}
23249 
23250 // LoadShuffle/Rearrange for Short
23251 
23252 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23253   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23254             !VM_Version::supports_avx512bw());
23255   match(Set dst (VectorLoadShuffle src));
23256   effect(TEMP dst, TEMP vtmp);
23257   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23258   ins_encode %{
23259     // Create a byte shuffle mask from short shuffle mask
23260     // only byte shuffle instruction available on these platforms
23261     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23262     if (UseAVX == 0) {
23263       assert(vlen_in_bytes <= 16, "required");
23264       // Multiply each shuffle by two to get byte index
23265       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23266       __ psllw($vtmp$$XMMRegister, 1);
23267 
23268       // Duplicate to create 2 copies of byte index
23269       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23270       __ psllw($dst$$XMMRegister, 8);
23271       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23272 
23273       // Add one to get alternate byte index
23274       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23275       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23276     } else {
23277       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23278       int vlen_enc = vector_length_encoding(this);
23279       // Multiply each shuffle by two to get byte index
23280       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23281 
23282       // Duplicate to create 2 copies of byte index
23283       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
23284       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23285 
23286       // Add one to get alternate byte index
23287       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23288     }
23289   %}
23290   ins_pipe( pipe_slow );
23291 %}
23292 
23293 instruct rearrangeS(vec dst, vec shuffle) %{
23294   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23295             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23296   match(Set dst (VectorRearrange dst shuffle));
23297   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23298   ins_encode %{
23299     assert(UseSSE >= 4, "required");
23300     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23301   %}
23302   ins_pipe( pipe_slow );
23303 %}
23304 
23305 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23306   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23307             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23308   match(Set dst (VectorRearrange src shuffle));
23309   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23310   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23311   ins_encode %{
23312     assert(UseAVX >= 2, "required");
23313     // Swap src into vtmp1
23314     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23315     // Shuffle swapped src to get entries from other 128 bit lane
23316     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23317     // Shuffle original src to get entries from self 128 bit lane
23318     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23319     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23320     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23321     // Perform the blend
23322     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23323   %}
23324   ins_pipe( pipe_slow );
23325 %}
23326 
23327 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23328   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23329             VM_Version::supports_avx512bw());
23330   match(Set dst (VectorRearrange src shuffle));
23331   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23332   ins_encode %{
23333     int vlen_enc = vector_length_encoding(this);
23334     if (!VM_Version::supports_avx512vl()) {
23335       vlen_enc = Assembler::AVX_512bit;
23336     }
23337     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23338   %}
23339   ins_pipe( pipe_slow );
23340 %}
23341 
23342 // LoadShuffle/Rearrange for Integer and Float
23343 
23344 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23345   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23346             Matcher::vector_length(n) == 4 && UseAVX == 0);
23347   match(Set dst (VectorLoadShuffle src));
23348   effect(TEMP dst, TEMP vtmp);
23349   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23350   ins_encode %{
23351     assert(UseSSE >= 4, "required");
23352 
23353     // Create a byte shuffle mask from int shuffle mask
23354     // only byte shuffle instruction available on these platforms
23355 
23356     // Duplicate and multiply each shuffle by 4
23357     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23358     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23359     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23360     __ psllw($vtmp$$XMMRegister, 2);
23361 
23362     // Duplicate again to create 4 copies of byte index
23363     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23364     __ psllw($dst$$XMMRegister, 8);
23365     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23366 
23367     // Add 3,2,1,0 to get alternate byte index
23368     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23369     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23370   %}
23371   ins_pipe( pipe_slow );
23372 %}
23373 
23374 instruct rearrangeI(vec dst, vec shuffle) %{
23375   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23376             UseAVX == 0);
23377   match(Set dst (VectorRearrange dst shuffle));
23378   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23379   ins_encode %{
23380     assert(UseSSE >= 4, "required");
23381     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23382   %}
23383   ins_pipe( pipe_slow );
23384 %}
23385 
23386 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23387   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23388             UseAVX > 0);
23389   match(Set dst (VectorRearrange src shuffle));
23390   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23391   ins_encode %{
23392     int vlen_enc = vector_length_encoding(this);
23393     BasicType bt = Matcher::vector_element_basic_type(this);
23394     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23395   %}
23396   ins_pipe( pipe_slow );
23397 %}
23398 
23399 // LoadShuffle/Rearrange for Long and Double
23400 
23401 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23402   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23403             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23404   match(Set dst (VectorLoadShuffle src));
23405   effect(TEMP dst, TEMP vtmp);
23406   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23407   ins_encode %{
23408     assert(UseAVX >= 2, "required");
23409 
23410     int vlen_enc = vector_length_encoding(this);
23411     // Create a double word shuffle mask from long shuffle mask
23412     // only double word shuffle instruction available on these platforms
23413 
23414     // Multiply each shuffle by two to get double word index
23415     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23416 
23417     // Duplicate each double word shuffle
23418     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23419     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23420 
23421     // Add one to get alternate double word index
23422     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23423   %}
23424   ins_pipe( pipe_slow );
23425 %}
23426 
23427 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23428   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23429             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23430   match(Set dst (VectorRearrange src shuffle));
23431   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23432   ins_encode %{
23433     assert(UseAVX >= 2, "required");
23434 
23435     int vlen_enc = vector_length_encoding(this);
23436     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23437   %}
23438   ins_pipe( pipe_slow );
23439 %}
23440 
23441 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23442   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23443             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23444   match(Set dst (VectorRearrange src shuffle));
23445   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23446   ins_encode %{
23447     assert(UseAVX > 2, "required");
23448 
23449     int vlen_enc = vector_length_encoding(this);
23450     if (vlen_enc == Assembler::AVX_128bit) {
23451       vlen_enc = Assembler::AVX_256bit;
23452     }
23453     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23454   %}
23455   ins_pipe( pipe_slow );
23456 %}
23457 
23458 // --------------------------------- FMA --------------------------------------
23459 // a * b + c
23460 
23461 instruct vfmaF_reg(vec a, vec b, vec c) %{
23462   match(Set c (FmaVF  c (Binary a b)));
23463   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23464   ins_cost(150);
23465   ins_encode %{
23466     assert(UseFMA, "not enabled");
23467     int vlen_enc = vector_length_encoding(this);
23468     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23469   %}
23470   ins_pipe( pipe_slow );
23471 %}
23472 
23473 instruct vfmaF_mem(vec a, memory b, vec c) %{
23474   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23475   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23476   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23477   ins_cost(150);
23478   ins_encode %{
23479     assert(UseFMA, "not enabled");
23480     int vlen_enc = vector_length_encoding(this);
23481     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23482   %}
23483   ins_pipe( pipe_slow );
23484 %}
23485 
23486 instruct vfmaD_reg(vec a, vec b, vec c) %{
23487   match(Set c (FmaVD  c (Binary a b)));
23488   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23489   ins_cost(150);
23490   ins_encode %{
23491     assert(UseFMA, "not enabled");
23492     int vlen_enc = vector_length_encoding(this);
23493     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23494   %}
23495   ins_pipe( pipe_slow );
23496 %}
23497 
23498 instruct vfmaD_mem(vec a, memory b, vec c) %{
23499   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23500   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23501   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23502   ins_cost(150);
23503   ins_encode %{
23504     assert(UseFMA, "not enabled");
23505     int vlen_enc = vector_length_encoding(this);
23506     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23507   %}
23508   ins_pipe( pipe_slow );
23509 %}
23510 
23511 // --------------------------------- Vector Multiply Add --------------------------------------
23512 
23513 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23514   predicate(UseAVX == 0);
23515   match(Set dst (MulAddVS2VI dst src1));
23516   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23517   ins_encode %{
23518     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23519   %}
23520   ins_pipe( pipe_slow );
23521 %}
23522 
23523 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23524   predicate(UseAVX > 0);
23525   match(Set dst (MulAddVS2VI src1 src2));
23526   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23527   ins_encode %{
23528     int vlen_enc = vector_length_encoding(this);
23529     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23530   %}
23531   ins_pipe( pipe_slow );
23532 %}
23533 
23534 // --------------------------------- Vector Multiply Add Add ----------------------------------
23535 
23536 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23537   predicate(VM_Version::supports_avx512_vnni());
23538   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23539   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23540   ins_encode %{
23541     assert(UseAVX > 2, "required");
23542     int vlen_enc = vector_length_encoding(this);
23543     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23544   %}
23545   ins_pipe( pipe_slow );
23546   ins_cost(10);
23547 %}
23548 
23549 // --------------------------------- PopCount --------------------------------------
23550 
23551 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23552   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23553   match(Set dst (PopCountVI src));
23554   match(Set dst (PopCountVL src));
23555   format %{ "vector_popcount_integral $dst, $src" %}
23556   ins_encode %{
23557     int opcode = this->ideal_Opcode();
23558     int vlen_enc = vector_length_encoding(this, $src);
23559     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23560     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23561   %}
23562   ins_pipe( pipe_slow );
23563 %}
23564 
23565 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23566   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23567   match(Set dst (PopCountVI src mask));
23568   match(Set dst (PopCountVL src mask));
23569   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23570   ins_encode %{
23571     int vlen_enc = vector_length_encoding(this, $src);
23572     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23573     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23574     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23575   %}
23576   ins_pipe( pipe_slow );
23577 %}
23578 
23579 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23580   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23581   match(Set dst (PopCountVI src));
23582   match(Set dst (PopCountVL src));
23583   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23584   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23585   ins_encode %{
23586     int opcode = this->ideal_Opcode();
23587     int vlen_enc = vector_length_encoding(this, $src);
23588     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23589     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23590                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23591   %}
23592   ins_pipe( pipe_slow );
23593 %}
23594 
23595 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23596 
23597 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23598   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23599                                               Matcher::vector_length_in_bytes(n->in(1))));
23600   match(Set dst (CountTrailingZerosV src));
23601   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23602   ins_cost(400);
23603   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23604   ins_encode %{
23605     int vlen_enc = vector_length_encoding(this, $src);
23606     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23607     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23608                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23609   %}
23610   ins_pipe( pipe_slow );
23611 %}
23612 
23613 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23614   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23615             VM_Version::supports_avx512cd() &&
23616             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23617   match(Set dst (CountTrailingZerosV src));
23618   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23619   ins_cost(400);
23620   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23621   ins_encode %{
23622     int vlen_enc = vector_length_encoding(this, $src);
23623     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23624     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23625                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23626   %}
23627   ins_pipe( pipe_slow );
23628 %}
23629 
23630 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23631   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23632   match(Set dst (CountTrailingZerosV src));
23633   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23634   ins_cost(400);
23635   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23636   ins_encode %{
23637     int vlen_enc = vector_length_encoding(this, $src);
23638     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23639     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23640                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23641                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23642   %}
23643   ins_pipe( pipe_slow );
23644 %}
23645 
23646 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23647   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23648   match(Set dst (CountTrailingZerosV src));
23649   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23650   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23651   ins_encode %{
23652     int vlen_enc = vector_length_encoding(this, $src);
23653     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23654     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23655                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23656   %}
23657   ins_pipe( pipe_slow );
23658 %}
23659 
23660 
23661 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23662 
23663 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23664   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23665   effect(TEMP dst);
23666   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23667   ins_encode %{
23668     int vector_len = vector_length_encoding(this);
23669     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23670   %}
23671   ins_pipe( pipe_slow );
23672 %}
23673 
23674 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23675   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23676   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23677   effect(TEMP dst);
23678   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23679   ins_encode %{
23680     int vector_len = vector_length_encoding(this);
23681     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23682   %}
23683   ins_pipe( pipe_slow );
23684 %}
23685 
23686 // --------------------------------- Rotation Operations ----------------------------------
23687 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23688   match(Set dst (RotateLeftV src shift));
23689   match(Set dst (RotateRightV src shift));
23690   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23691   ins_encode %{
23692     int opcode      = this->ideal_Opcode();
23693     int vector_len  = vector_length_encoding(this);
23694     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23695     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23696   %}
23697   ins_pipe( pipe_slow );
23698 %}
23699 
23700 instruct vprorate(vec dst, vec src, vec shift) %{
23701   match(Set dst (RotateLeftV src shift));
23702   match(Set dst (RotateRightV src shift));
23703   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23704   ins_encode %{
23705     int opcode      = this->ideal_Opcode();
23706     int vector_len  = vector_length_encoding(this);
23707     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23708     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23709   %}
23710   ins_pipe( pipe_slow );
23711 %}
23712 
23713 // ---------------------------------- Masked Operations ------------------------------------
23714 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23715   predicate(!n->in(3)->bottom_type()->isa_vectmask());
23716   match(Set dst (LoadVectorMasked mem mask));
23717   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23718   ins_encode %{
23719     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23720     int vlen_enc = vector_length_encoding(this);
23721     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23722   %}
23723   ins_pipe( pipe_slow );
23724 %}
23725 
23726 
23727 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23728   predicate(n->in(3)->bottom_type()->isa_vectmask());
23729   match(Set dst (LoadVectorMasked mem mask));
23730   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23731   ins_encode %{
23732     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
23733     int vector_len = vector_length_encoding(this);
23734     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23735   %}
23736   ins_pipe( pipe_slow );
23737 %}
23738 
23739 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23740   predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23741   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23742   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23743   ins_encode %{
23744     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23745     int vlen_enc = vector_length_encoding(src_node);
23746     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23747     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23748   %}
23749   ins_pipe( pipe_slow );
23750 %}
23751 
23752 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23753   predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23754   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23755   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23756   ins_encode %{
23757     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23758     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23759     int vlen_enc = vector_length_encoding(src_node);
23760     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23761   %}
23762   ins_pipe( pipe_slow );
23763 %}
23764 
23765 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23766   match(Set addr (VerifyVectorAlignment addr mask));
23767   effect(KILL cr);
23768   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23769   ins_encode %{
23770     Label Lskip;
23771     // check if masked bits of addr are zero
23772     __ testq($addr$$Register, $mask$$constant);
23773     __ jccb(Assembler::equal, Lskip);
23774     __ stop("verify_vector_alignment found a misaligned vector memory access");
23775     __ bind(Lskip);
23776   %}
23777   ins_pipe(pipe_slow);
23778 %}
23779 
23780 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23781   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23782   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23783   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23784   ins_encode %{
23785     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23786     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23787 
23788     Label DONE;
23789     int vlen_enc = vector_length_encoding(this, $src1);
23790     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23791 
23792     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23793     __ mov64($dst$$Register, -1L);
23794     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23795     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23796     __ jccb(Assembler::carrySet, DONE);
23797     __ kmovql($dst$$Register, $ktmp1$$KRegister);
23798     __ notq($dst$$Register);
23799     __ tzcntq($dst$$Register, $dst$$Register);
23800     __ bind(DONE);
23801   %}
23802   ins_pipe( pipe_slow );
23803 %}
23804 
23805 
23806 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23807   match(Set dst (VectorMaskGen len));
23808   effect(TEMP temp, KILL cr);
23809   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23810   ins_encode %{
23811     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23812   %}
23813   ins_pipe( pipe_slow );
23814 %}
23815 
23816 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23817   match(Set dst (VectorMaskGen len));
23818   format %{ "vector_mask_gen $len \t! vector mask generator" %}
23819   effect(TEMP temp);
23820   ins_encode %{
23821     __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
23822     __ kmovql($dst$$KRegister, $temp$$Register);
23823   %}
23824   ins_pipe( pipe_slow );
23825 %}
23826 
23827 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23828   predicate(n->in(1)->bottom_type()->isa_vectmask());
23829   match(Set dst (VectorMaskToLong mask));
23830   effect(TEMP dst, KILL cr);
23831   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23832   ins_encode %{
23833     int opcode = this->ideal_Opcode();
23834     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23835     int mask_len = Matcher::vector_length(this, $mask);
23836     int mask_size = mask_len * type2aelembytes(mbt);
23837     int vlen_enc = vector_length_encoding(this, $mask);
23838     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23839                              $dst$$Register, mask_len, mask_size, vlen_enc);
23840   %}
23841   ins_pipe( pipe_slow );
23842 %}
23843 
23844 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23845   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23846   match(Set dst (VectorMaskToLong mask));
23847   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23848   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23849   ins_encode %{
23850     int opcode = this->ideal_Opcode();
23851     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23852     int mask_len = Matcher::vector_length(this, $mask);
23853     int vlen_enc = vector_length_encoding(this, $mask);
23854     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23855                              $dst$$Register, mask_len, mbt, vlen_enc);
23856   %}
23857   ins_pipe( pipe_slow );
23858 %}
23859 
23860 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
23861   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23862   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
23863   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
23864   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23865   ins_encode %{
23866     int opcode = this->ideal_Opcode();
23867     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23868     int mask_len = Matcher::vector_length(this, $mask);
23869     int vlen_enc = vector_length_encoding(this, $mask);
23870     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23871                              $dst$$Register, mask_len, mbt, vlen_enc);
23872   %}
23873   ins_pipe( pipe_slow );
23874 %}
23875 
23876 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23877   predicate(n->in(1)->bottom_type()->isa_vectmask());
23878   match(Set dst (VectorMaskTrueCount mask));
23879   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23880   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
23881   ins_encode %{
23882     int opcode = this->ideal_Opcode();
23883     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23884     int mask_len = Matcher::vector_length(this, $mask);
23885     int mask_size = mask_len * type2aelembytes(mbt);
23886     int vlen_enc = vector_length_encoding(this, $mask);
23887     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23888                              $tmp$$Register, mask_len, mask_size, vlen_enc);
23889   %}
23890   ins_pipe( pipe_slow );
23891 %}
23892 
23893 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23894   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23895   match(Set dst (VectorMaskTrueCount mask));
23896   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23897   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23898   ins_encode %{
23899     int opcode = this->ideal_Opcode();
23900     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23901     int mask_len = Matcher::vector_length(this, $mask);
23902     int vlen_enc = vector_length_encoding(this, $mask);
23903     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23904                              $tmp$$Register, mask_len, mbt, vlen_enc);
23905   %}
23906   ins_pipe( pipe_slow );
23907 %}
23908 
23909 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23910   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23911   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
23912   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23913   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23914   ins_encode %{
23915     int opcode = this->ideal_Opcode();
23916     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23917     int mask_len = Matcher::vector_length(this, $mask);
23918     int vlen_enc = vector_length_encoding(this, $mask);
23919     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23920                              $tmp$$Register, mask_len, mbt, vlen_enc);
23921   %}
23922   ins_pipe( pipe_slow );
23923 %}
23924 
23925 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23926   predicate(n->in(1)->bottom_type()->isa_vectmask());
23927   match(Set dst (VectorMaskFirstTrue mask));
23928   match(Set dst (VectorMaskLastTrue mask));
23929   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23930   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
23931   ins_encode %{
23932     int opcode = this->ideal_Opcode();
23933     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23934     int mask_len = Matcher::vector_length(this, $mask);
23935     int mask_size = mask_len * type2aelembytes(mbt);
23936     int vlen_enc = vector_length_encoding(this, $mask);
23937     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23938                              $tmp$$Register, mask_len, mask_size, vlen_enc);
23939   %}
23940   ins_pipe( pipe_slow );
23941 %}
23942 
23943 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23944   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23945   match(Set dst (VectorMaskFirstTrue mask));
23946   match(Set dst (VectorMaskLastTrue mask));
23947   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23948   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23949   ins_encode %{
23950     int opcode = this->ideal_Opcode();
23951     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23952     int mask_len = Matcher::vector_length(this, $mask);
23953     int vlen_enc = vector_length_encoding(this, $mask);
23954     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23955                              $tmp$$Register, mask_len, mbt, vlen_enc);
23956   %}
23957   ins_pipe( pipe_slow );
23958 %}
23959 
23960 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23961   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23962   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
23963   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
23964   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23965   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23966   ins_encode %{
23967     int opcode = this->ideal_Opcode();
23968     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23969     int mask_len = Matcher::vector_length(this, $mask);
23970     int vlen_enc = vector_length_encoding(this, $mask);
23971     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23972                              $tmp$$Register, mask_len, mbt, vlen_enc);
23973   %}
23974   ins_pipe( pipe_slow );
23975 %}
23976 
23977 // --------------------------------- Compress/Expand Operations ---------------------------
23978 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
23979   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
23980   match(Set dst (CompressV src mask));
23981   match(Set dst (ExpandV src mask));
23982   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
23983   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
23984   ins_encode %{
23985     int opcode = this->ideal_Opcode();
23986     int vlen_enc = vector_length_encoding(this);
23987     BasicType bt  = Matcher::vector_element_basic_type(this);
23988     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
23989                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
23990   %}
23991   ins_pipe( pipe_slow );
23992 %}
23993 
23994 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
23995   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
23996   match(Set dst (CompressV src mask));
23997   match(Set dst (ExpandV src mask));
23998   format %{ "vector_compress_expand $dst, $src, $mask" %}
23999   ins_encode %{
24000     int opcode = this->ideal_Opcode();
24001     int vector_len = vector_length_encoding(this);
24002     BasicType bt  = Matcher::vector_element_basic_type(this);
24003     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24004   %}
24005   ins_pipe( pipe_slow );
24006 %}
24007 
24008 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24009   match(Set dst (CompressM mask));
24010   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24011   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24012   ins_encode %{
24013     assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24014     int mask_len = Matcher::vector_length(this);
24015     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24016   %}
24017   ins_pipe( pipe_slow );
24018 %}
24019 
24020 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24021 
24022 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24023   predicate(!VM_Version::supports_gfni());
24024   match(Set dst (ReverseV src));
24025   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24026   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24027   ins_encode %{
24028     int vec_enc = vector_length_encoding(this);
24029     BasicType bt = Matcher::vector_element_basic_type(this);
24030     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24031                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24032   %}
24033   ins_pipe( pipe_slow );
24034 %}
24035 
24036 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24037   predicate(VM_Version::supports_gfni());
24038   match(Set dst (ReverseV src));
24039   effect(TEMP dst, TEMP xtmp);
24040   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24041   ins_encode %{
24042     int vec_enc = vector_length_encoding(this);
24043     BasicType bt  = Matcher::vector_element_basic_type(this);
24044     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24045     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24046                                $xtmp$$XMMRegister);
24047   %}
24048   ins_pipe( pipe_slow );
24049 %}
24050 
24051 instruct vreverse_byte_reg(vec dst, vec src) %{
24052   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24053   match(Set dst (ReverseBytesV src));
24054   effect(TEMP dst);
24055   format %{ "vector_reverse_byte $dst, $src" %}
24056   ins_encode %{
24057     int vec_enc = vector_length_encoding(this);
24058     BasicType bt = Matcher::vector_element_basic_type(this);
24059     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24060   %}
24061   ins_pipe( pipe_slow );
24062 %}
24063 
24064 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24065   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24066   match(Set dst (ReverseBytesV src));
24067   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24068   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24069   ins_encode %{
24070     int vec_enc = vector_length_encoding(this);
24071     BasicType bt = Matcher::vector_element_basic_type(this);
24072     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24073                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24074   %}
24075   ins_pipe( pipe_slow );
24076 %}
24077 
24078 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24079 
24080 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24081   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24082                                               Matcher::vector_length_in_bytes(n->in(1))));
24083   match(Set dst (CountLeadingZerosV src));
24084   format %{ "vector_count_leading_zeros $dst, $src" %}
24085   ins_encode %{
24086      int vlen_enc = vector_length_encoding(this, $src);
24087      BasicType bt = Matcher::vector_element_basic_type(this, $src);
24088      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24089                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24090   %}
24091   ins_pipe( pipe_slow );
24092 %}
24093 
24094 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24095   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24096                                               Matcher::vector_length_in_bytes(n->in(1))));
24097   match(Set dst (CountLeadingZerosV src mask));
24098   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24099   ins_encode %{
24100     int vlen_enc = vector_length_encoding(this, $src);
24101     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24102     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24103     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24104                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24105   %}
24106   ins_pipe( pipe_slow );
24107 %}
24108 
24109 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24110   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24111             VM_Version::supports_avx512cd() &&
24112             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24113   match(Set dst (CountLeadingZerosV src));
24114   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24115   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24116   ins_encode %{
24117     int vlen_enc = vector_length_encoding(this, $src);
24118     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24119     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24120                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24121   %}
24122   ins_pipe( pipe_slow );
24123 %}
24124 
24125 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24126   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24127   match(Set dst (CountLeadingZerosV src));
24128   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24129   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24130   ins_encode %{
24131     int vlen_enc = vector_length_encoding(this, $src);
24132     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24133     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24134                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24135                                        $rtmp$$Register, true, vlen_enc);
24136   %}
24137   ins_pipe( pipe_slow );
24138 %}
24139 
24140 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24141   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24142             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24143   match(Set dst (CountLeadingZerosV src));
24144   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24145   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24146   ins_encode %{
24147     int vlen_enc = vector_length_encoding(this, $src);
24148     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24149     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24150                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24151   %}
24152   ins_pipe( pipe_slow );
24153 %}
24154 
24155 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24156   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24157             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24158   match(Set dst (CountLeadingZerosV src));
24159   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24160   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24161   ins_encode %{
24162     int vlen_enc = vector_length_encoding(this, $src);
24163     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24164     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24165                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24166   %}
24167   ins_pipe( pipe_slow );
24168 %}
24169 
24170 // ---------------------------------- Vector Masked Operations ------------------------------------
24171 
24172 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24173   match(Set dst (AddVB (Binary dst src2) mask));
24174   match(Set dst (AddVS (Binary dst src2) mask));
24175   match(Set dst (AddVI (Binary dst src2) mask));
24176   match(Set dst (AddVL (Binary dst src2) mask));
24177   match(Set dst (AddVF (Binary dst src2) mask));
24178   match(Set dst (AddVD (Binary dst src2) mask));
24179   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24180   ins_encode %{
24181     int vlen_enc = vector_length_encoding(this);
24182     BasicType bt = Matcher::vector_element_basic_type(this);
24183     int opc = this->ideal_Opcode();
24184     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24185                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24186   %}
24187   ins_pipe( pipe_slow );
24188 %}
24189 
24190 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24191   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24192   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24193   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24194   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24195   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24196   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24197   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24198   ins_encode %{
24199     int vlen_enc = vector_length_encoding(this);
24200     BasicType bt = Matcher::vector_element_basic_type(this);
24201     int opc = this->ideal_Opcode();
24202     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24203                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24204   %}
24205   ins_pipe( pipe_slow );
24206 %}
24207 
24208 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24209   match(Set dst (XorV (Binary dst src2) mask));
24210   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24211   ins_encode %{
24212     int vlen_enc = vector_length_encoding(this);
24213     BasicType bt = Matcher::vector_element_basic_type(this);
24214     int opc = this->ideal_Opcode();
24215     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24216                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24217   %}
24218   ins_pipe( pipe_slow );
24219 %}
24220 
24221 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24222   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24223   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24224   ins_encode %{
24225     int vlen_enc = vector_length_encoding(this);
24226     BasicType bt = Matcher::vector_element_basic_type(this);
24227     int opc = this->ideal_Opcode();
24228     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24229                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24230   %}
24231   ins_pipe( pipe_slow );
24232 %}
24233 
24234 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24235   match(Set dst (OrV (Binary dst src2) mask));
24236   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24237   ins_encode %{
24238     int vlen_enc = vector_length_encoding(this);
24239     BasicType bt = Matcher::vector_element_basic_type(this);
24240     int opc = this->ideal_Opcode();
24241     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24242                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24243   %}
24244   ins_pipe( pipe_slow );
24245 %}
24246 
24247 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24248   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24249   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24250   ins_encode %{
24251     int vlen_enc = vector_length_encoding(this);
24252     BasicType bt = Matcher::vector_element_basic_type(this);
24253     int opc = this->ideal_Opcode();
24254     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24255                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24256   %}
24257   ins_pipe( pipe_slow );
24258 %}
24259 
24260 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24261   match(Set dst (AndV (Binary dst src2) mask));
24262   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24263   ins_encode %{
24264     int vlen_enc = vector_length_encoding(this);
24265     BasicType bt = Matcher::vector_element_basic_type(this);
24266     int opc = this->ideal_Opcode();
24267     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24268                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24269   %}
24270   ins_pipe( pipe_slow );
24271 %}
24272 
24273 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24274   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24275   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24276   ins_encode %{
24277     int vlen_enc = vector_length_encoding(this);
24278     BasicType bt = Matcher::vector_element_basic_type(this);
24279     int opc = this->ideal_Opcode();
24280     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24281                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24282   %}
24283   ins_pipe( pipe_slow );
24284 %}
24285 
24286 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24287   match(Set dst (SubVB (Binary dst src2) mask));
24288   match(Set dst (SubVS (Binary dst src2) mask));
24289   match(Set dst (SubVI (Binary dst src2) mask));
24290   match(Set dst (SubVL (Binary dst src2) mask));
24291   match(Set dst (SubVF (Binary dst src2) mask));
24292   match(Set dst (SubVD (Binary dst src2) mask));
24293   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24294   ins_encode %{
24295     int vlen_enc = vector_length_encoding(this);
24296     BasicType bt = Matcher::vector_element_basic_type(this);
24297     int opc = this->ideal_Opcode();
24298     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24299                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24300   %}
24301   ins_pipe( pipe_slow );
24302 %}
24303 
24304 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24305   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24306   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24307   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24308   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24309   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24310   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24311   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24312   ins_encode %{
24313     int vlen_enc = vector_length_encoding(this);
24314     BasicType bt = Matcher::vector_element_basic_type(this);
24315     int opc = this->ideal_Opcode();
24316     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24317                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24318   %}
24319   ins_pipe( pipe_slow );
24320 %}
24321 
24322 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24323   match(Set dst (MulVS (Binary dst src2) mask));
24324   match(Set dst (MulVI (Binary dst src2) mask));
24325   match(Set dst (MulVL (Binary dst src2) mask));
24326   match(Set dst (MulVF (Binary dst src2) mask));
24327   match(Set dst (MulVD (Binary dst src2) mask));
24328   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24329   ins_encode %{
24330     int vlen_enc = vector_length_encoding(this);
24331     BasicType bt = Matcher::vector_element_basic_type(this);
24332     int opc = this->ideal_Opcode();
24333     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24334                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24335   %}
24336   ins_pipe( pipe_slow );
24337 %}
24338 
24339 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24340   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24341   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24342   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24343   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24344   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24345   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24346   ins_encode %{
24347     int vlen_enc = vector_length_encoding(this);
24348     BasicType bt = Matcher::vector_element_basic_type(this);
24349     int opc = this->ideal_Opcode();
24350     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24351                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24352   %}
24353   ins_pipe( pipe_slow );
24354 %}
24355 
24356 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24357   match(Set dst (SqrtVF dst mask));
24358   match(Set dst (SqrtVD dst mask));
24359   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24360   ins_encode %{
24361     int vlen_enc = vector_length_encoding(this);
24362     BasicType bt = Matcher::vector_element_basic_type(this);
24363     int opc = this->ideal_Opcode();
24364     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24365                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24366   %}
24367   ins_pipe( pipe_slow );
24368 %}
24369 
24370 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24371   match(Set dst (DivVF (Binary dst src2) mask));
24372   match(Set dst (DivVD (Binary dst src2) mask));
24373   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24374   ins_encode %{
24375     int vlen_enc = vector_length_encoding(this);
24376     BasicType bt = Matcher::vector_element_basic_type(this);
24377     int opc = this->ideal_Opcode();
24378     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24379                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24380   %}
24381   ins_pipe( pipe_slow );
24382 %}
24383 
24384 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24385   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24386   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24387   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24388   ins_encode %{
24389     int vlen_enc = vector_length_encoding(this);
24390     BasicType bt = Matcher::vector_element_basic_type(this);
24391     int opc = this->ideal_Opcode();
24392     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24393                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24394   %}
24395   ins_pipe( pipe_slow );
24396 %}
24397 
24398 
24399 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24400   match(Set dst (RotateLeftV (Binary dst shift) mask));
24401   match(Set dst (RotateRightV (Binary dst shift) mask));
24402   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24403   ins_encode %{
24404     int vlen_enc = vector_length_encoding(this);
24405     BasicType bt = Matcher::vector_element_basic_type(this);
24406     int opc = this->ideal_Opcode();
24407     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24408                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24409   %}
24410   ins_pipe( pipe_slow );
24411 %}
24412 
24413 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24414   match(Set dst (RotateLeftV (Binary dst src2) mask));
24415   match(Set dst (RotateRightV (Binary dst src2) mask));
24416   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24417   ins_encode %{
24418     int vlen_enc = vector_length_encoding(this);
24419     BasicType bt = Matcher::vector_element_basic_type(this);
24420     int opc = this->ideal_Opcode();
24421     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24422                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24423   %}
24424   ins_pipe( pipe_slow );
24425 %}
24426 
24427 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24428   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24429   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24430   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24431   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24432   ins_encode %{
24433     int vlen_enc = vector_length_encoding(this);
24434     BasicType bt = Matcher::vector_element_basic_type(this);
24435     int opc = this->ideal_Opcode();
24436     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24437                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24438   %}
24439   ins_pipe( pipe_slow );
24440 %}
24441 
24442 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24443   predicate(!n->as_ShiftV()->is_var_shift());
24444   match(Set dst (LShiftVS (Binary dst src2) mask));
24445   match(Set dst (LShiftVI (Binary dst src2) mask));
24446   match(Set dst (LShiftVL (Binary dst src2) mask));
24447   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24448   ins_encode %{
24449     int vlen_enc = vector_length_encoding(this);
24450     BasicType bt = Matcher::vector_element_basic_type(this);
24451     int opc = this->ideal_Opcode();
24452     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24453                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24454   %}
24455   ins_pipe( pipe_slow );
24456 %}
24457 
24458 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24459   predicate(n->as_ShiftV()->is_var_shift());
24460   match(Set dst (LShiftVS (Binary dst src2) mask));
24461   match(Set dst (LShiftVI (Binary dst src2) mask));
24462   match(Set dst (LShiftVL (Binary dst src2) mask));
24463   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24464   ins_encode %{
24465     int vlen_enc = vector_length_encoding(this);
24466     BasicType bt = Matcher::vector_element_basic_type(this);
24467     int opc = this->ideal_Opcode();
24468     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24469                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24470   %}
24471   ins_pipe( pipe_slow );
24472 %}
24473 
24474 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24475   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24476   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24477   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24478   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24479   ins_encode %{
24480     int vlen_enc = vector_length_encoding(this);
24481     BasicType bt = Matcher::vector_element_basic_type(this);
24482     int opc = this->ideal_Opcode();
24483     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24484                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24485   %}
24486   ins_pipe( pipe_slow );
24487 %}
24488 
24489 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24490   predicate(!n->as_ShiftV()->is_var_shift());
24491   match(Set dst (RShiftVS (Binary dst src2) mask));
24492   match(Set dst (RShiftVI (Binary dst src2) mask));
24493   match(Set dst (RShiftVL (Binary dst src2) mask));
24494   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24495   ins_encode %{
24496     int vlen_enc = vector_length_encoding(this);
24497     BasicType bt = Matcher::vector_element_basic_type(this);
24498     int opc = this->ideal_Opcode();
24499     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24500                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24501   %}
24502   ins_pipe( pipe_slow );
24503 %}
24504 
24505 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24506   predicate(n->as_ShiftV()->is_var_shift());
24507   match(Set dst (RShiftVS (Binary dst src2) mask));
24508   match(Set dst (RShiftVI (Binary dst src2) mask));
24509   match(Set dst (RShiftVL (Binary dst src2) mask));
24510   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24511   ins_encode %{
24512     int vlen_enc = vector_length_encoding(this);
24513     BasicType bt = Matcher::vector_element_basic_type(this);
24514     int opc = this->ideal_Opcode();
24515     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24516                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24517   %}
24518   ins_pipe( pipe_slow );
24519 %}
24520 
24521 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24522   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24523   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24524   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24525   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24526   ins_encode %{
24527     int vlen_enc = vector_length_encoding(this);
24528     BasicType bt = Matcher::vector_element_basic_type(this);
24529     int opc = this->ideal_Opcode();
24530     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24531                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24532   %}
24533   ins_pipe( pipe_slow );
24534 %}
24535 
24536 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24537   predicate(!n->as_ShiftV()->is_var_shift());
24538   match(Set dst (URShiftVS (Binary dst src2) mask));
24539   match(Set dst (URShiftVI (Binary dst src2) mask));
24540   match(Set dst (URShiftVL (Binary dst src2) mask));
24541   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24542   ins_encode %{
24543     int vlen_enc = vector_length_encoding(this);
24544     BasicType bt = Matcher::vector_element_basic_type(this);
24545     int opc = this->ideal_Opcode();
24546     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24547                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24548   %}
24549   ins_pipe( pipe_slow );
24550 %}
24551 
24552 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24553   predicate(n->as_ShiftV()->is_var_shift());
24554   match(Set dst (URShiftVS (Binary dst src2) mask));
24555   match(Set dst (URShiftVI (Binary dst src2) mask));
24556   match(Set dst (URShiftVL (Binary dst src2) mask));
24557   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24558   ins_encode %{
24559     int vlen_enc = vector_length_encoding(this);
24560     BasicType bt = Matcher::vector_element_basic_type(this);
24561     int opc = this->ideal_Opcode();
24562     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24563                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24564   %}
24565   ins_pipe( pipe_slow );
24566 %}
24567 
24568 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24569   match(Set dst (MaxV (Binary dst src2) mask));
24570   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24571   ins_encode %{
24572     int vlen_enc = vector_length_encoding(this);
24573     BasicType bt = Matcher::vector_element_basic_type(this);
24574     int opc = this->ideal_Opcode();
24575     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24576                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24577   %}
24578   ins_pipe( pipe_slow );
24579 %}
24580 
24581 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24582   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24583   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24584   ins_encode %{
24585     int vlen_enc = vector_length_encoding(this);
24586     BasicType bt = Matcher::vector_element_basic_type(this);
24587     int opc = this->ideal_Opcode();
24588     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24589                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24590   %}
24591   ins_pipe( pipe_slow );
24592 %}
24593 
24594 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24595   match(Set dst (MinV (Binary dst src2) mask));
24596   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24597   ins_encode %{
24598     int vlen_enc = vector_length_encoding(this);
24599     BasicType bt = Matcher::vector_element_basic_type(this);
24600     int opc = this->ideal_Opcode();
24601     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24602                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24603   %}
24604   ins_pipe( pipe_slow );
24605 %}
24606 
24607 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24608   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24609   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24610   ins_encode %{
24611     int vlen_enc = vector_length_encoding(this);
24612     BasicType bt = Matcher::vector_element_basic_type(this);
24613     int opc = this->ideal_Opcode();
24614     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24615                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24616   %}
24617   ins_pipe( pipe_slow );
24618 %}
24619 
24620 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24621   match(Set dst (VectorRearrange (Binary dst src2) mask));
24622   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24623   ins_encode %{
24624     int vlen_enc = vector_length_encoding(this);
24625     BasicType bt = Matcher::vector_element_basic_type(this);
24626     int opc = this->ideal_Opcode();
24627     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24628                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24629   %}
24630   ins_pipe( pipe_slow );
24631 %}
24632 
24633 instruct vabs_masked(vec dst, kReg mask) %{
24634   match(Set dst (AbsVB dst mask));
24635   match(Set dst (AbsVS dst mask));
24636   match(Set dst (AbsVI dst mask));
24637   match(Set dst (AbsVL dst mask));
24638   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24639   ins_encode %{
24640     int vlen_enc = vector_length_encoding(this);
24641     BasicType bt = Matcher::vector_element_basic_type(this);
24642     int opc = this->ideal_Opcode();
24643     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24644                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24645   %}
24646   ins_pipe( pipe_slow );
24647 %}
24648 
24649 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24650   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24651   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24652   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24653   ins_encode %{
24654     assert(UseFMA, "Needs FMA instructions support.");
24655     int vlen_enc = vector_length_encoding(this);
24656     BasicType bt = Matcher::vector_element_basic_type(this);
24657     int opc = this->ideal_Opcode();
24658     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24659                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24660   %}
24661   ins_pipe( pipe_slow );
24662 %}
24663 
24664 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24665   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24666   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24667   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24668   ins_encode %{
24669     assert(UseFMA, "Needs FMA instructions support.");
24670     int vlen_enc = vector_length_encoding(this);
24671     BasicType bt = Matcher::vector_element_basic_type(this);
24672     int opc = this->ideal_Opcode();
24673     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24674                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24675   %}
24676   ins_pipe( pipe_slow );
24677 %}
24678 
24679 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24680   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24681   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24682   ins_encode %{
24683     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24684     int vlen_enc = vector_length_encoding(this, $src1);
24685     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24686 
24687     // Comparison i
24688     switch (src1_elem_bt) {
24689       case T_BYTE: {
24690         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24691         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24692         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24693         break;
24694       }
24695       case T_SHORT: {
24696         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24697         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24698         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24699         break;
24700       }
24701       case T_INT: {
24702         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24703         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24704         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24705         break;
24706       }
24707       case T_LONG: {
24708         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24709         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24710         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24711         break;
24712       }
24713       case T_FLOAT: {
24714         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24715         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24716         break;
24717       }
24718       case T_DOUBLE: {
24719         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24720         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24721         break;
24722       }
24723       default: assert(false, "%s", type2name(src1_elem_bt)); break;
24724     }
24725   %}
24726   ins_pipe( pipe_slow );
24727 %}
24728 
24729 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24730   predicate(Matcher::vector_length(n) <= 32);
24731   match(Set dst (MaskAll src));
24732   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24733   ins_encode %{
24734     int mask_len = Matcher::vector_length(this);
24735     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24736   %}
24737   ins_pipe( pipe_slow );
24738 %}
24739 
24740 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24741   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24742   match(Set dst (XorVMask src (MaskAll cnt)));
24743   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24744   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24745   ins_encode %{
24746     uint masklen = Matcher::vector_length(this);
24747     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24748   %}
24749   ins_pipe( pipe_slow );
24750 %}
24751 
24752 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24753   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24754             (Matcher::vector_length(n) == 16) ||
24755             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24756   match(Set dst (XorVMask src (MaskAll cnt)));
24757   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24758   ins_encode %{
24759     uint masklen = Matcher::vector_length(this);
24760     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24761   %}
24762   ins_pipe( pipe_slow );
24763 %}
24764 
24765 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24766   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24767   match(Set dst (VectorLongToMask src));
24768   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24769   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24770   ins_encode %{
24771     int mask_len = Matcher::vector_length(this);
24772     int vec_enc  = vector_length_encoding(mask_len);
24773     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24774                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24775   %}
24776   ins_pipe( pipe_slow );
24777 %}
24778 
24779 
24780 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24781   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24782   match(Set dst (VectorLongToMask src));
24783   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24784   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24785   ins_encode %{
24786     int mask_len = Matcher::vector_length(this);
24787     assert(mask_len <= 32, "invalid mask length");
24788     int vec_enc  = vector_length_encoding(mask_len);
24789     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24790                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24791   %}
24792   ins_pipe( pipe_slow );
24793 %}
24794 
24795 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24796   predicate(n->bottom_type()->isa_vectmask());
24797   match(Set dst (VectorLongToMask src));
24798   format %{ "long_to_mask_evex $dst, $src\t!" %}
24799   ins_encode %{
24800     __ kmov($dst$$KRegister, $src$$Register);
24801   %}
24802   ins_pipe( pipe_slow );
24803 %}
24804 
24805 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24806   match(Set dst (AndVMask src1 src2));
24807   match(Set dst (OrVMask src1 src2));
24808   match(Set dst (XorVMask src1 src2));
24809   effect(TEMP kscratch);
24810   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24811   ins_encode %{
24812     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24813     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24814     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24815     uint masklen = Matcher::vector_length(this);
24816     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24817     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24818   %}
24819   ins_pipe( pipe_slow );
24820 %}
24821 
24822 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24823   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24824   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24825   ins_encode %{
24826     int vlen_enc = vector_length_encoding(this);
24827     BasicType bt = Matcher::vector_element_basic_type(this);
24828     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24829                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24830   %}
24831   ins_pipe( pipe_slow );
24832 %}
24833 
24834 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24835   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24836   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24837   ins_encode %{
24838     int vlen_enc = vector_length_encoding(this);
24839     BasicType bt = Matcher::vector_element_basic_type(this);
24840     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24841                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24842   %}
24843   ins_pipe( pipe_slow );
24844 %}
24845 
24846 instruct castMM(kReg dst)
24847 %{
24848   match(Set dst (CastVV dst));
24849 
24850   size(0);
24851   format %{ "# castVV of $dst" %}
24852   ins_encode(/* empty encoding */);
24853   ins_cost(0);
24854   ins_pipe(empty);
24855 %}
24856 
24857 instruct castVV(vec dst)
24858 %{
24859   match(Set dst (CastVV dst));
24860 
24861   size(0);
24862   format %{ "# castVV of $dst" %}
24863   ins_encode(/* empty encoding */);
24864   ins_cost(0);
24865   ins_pipe(empty);
24866 %}
24867 
24868 instruct castVVLeg(legVec dst)
24869 %{
24870   match(Set dst (CastVV dst));
24871 
24872   size(0);
24873   format %{ "# castVV of $dst" %}
24874   ins_encode(/* empty encoding */);
24875   ins_cost(0);
24876   ins_pipe(empty);
24877 %}
24878 
24879 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
24880 %{
24881   match(Set dst (IsInfiniteF src));
24882   effect(TEMP ktmp, KILL cr);
24883   format %{ "float_class_check $dst, $src" %}
24884   ins_encode %{
24885     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24886     __ kmovbl($dst$$Register, $ktmp$$KRegister);
24887   %}
24888   ins_pipe(pipe_slow);
24889 %}
24890 
24891 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
24892 %{
24893   match(Set dst (IsInfiniteD src));
24894   effect(TEMP ktmp, KILL cr);
24895   format %{ "double_class_check $dst, $src" %}
24896   ins_encode %{
24897     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24898     __ kmovbl($dst$$Register, $ktmp$$KRegister);
24899   %}
24900   ins_pipe(pipe_slow);
24901 %}
24902 
24903 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
24904 %{
24905   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24906             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24907   match(Set dst (SaturatingAddV src1 src2));
24908   match(Set dst (SaturatingSubV src1 src2));
24909   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24910   ins_encode %{
24911     int vlen_enc = vector_length_encoding(this);
24912     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24913     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24914                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24915   %}
24916   ins_pipe(pipe_slow);
24917 %}
24918 
24919 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
24920 %{
24921   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24922             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24923   match(Set dst (SaturatingAddV src1 src2));
24924   match(Set dst (SaturatingSubV src1 src2));
24925   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24926   ins_encode %{
24927     int vlen_enc = vector_length_encoding(this);
24928     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24929     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24930                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24931   %}
24932   ins_pipe(pipe_slow);
24933 %}
24934 
24935 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
24936 %{
24937   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24938             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24939             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24940   match(Set dst (SaturatingAddV src1 src2));
24941   match(Set dst (SaturatingSubV src1 src2));
24942   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
24943   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
24944   ins_encode %{
24945     int vlen_enc = vector_length_encoding(this);
24946     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24947     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24948                                         $src1$$XMMRegister, $src2$$XMMRegister,
24949                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24950                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
24951   %}
24952   ins_pipe(pipe_slow);
24953 %}
24954 
24955 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
24956 %{
24957   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24958             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24959             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24960   match(Set dst (SaturatingAddV src1 src2));
24961   match(Set dst (SaturatingSubV src1 src2));
24962   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
24963   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
24964   ins_encode %{
24965     int vlen_enc = vector_length_encoding(this);
24966     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24967     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24968                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24969                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
24970   %}
24971   ins_pipe(pipe_slow);
24972 %}
24973 
24974 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
24975 %{
24976   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24977             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24978             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24979   match(Set dst (SaturatingAddV src1 src2));
24980   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
24981   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
24982   ins_encode %{
24983     int vlen_enc = vector_length_encoding(this);
24984     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24985     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24986                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24987   %}
24988   ins_pipe(pipe_slow);
24989 %}
24990 
24991 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
24992 %{
24993   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24994             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24995             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24996   match(Set dst (SaturatingAddV src1 src2));
24997   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24998   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24999   ins_encode %{
25000     int vlen_enc = vector_length_encoding(this);
25001     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25002     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25003                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25004   %}
25005   ins_pipe(pipe_slow);
25006 %}
25007 
25008 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25009 %{
25010   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25011             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25012             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25013   match(Set dst (SaturatingSubV src1 src2));
25014   effect(TEMP ktmp);
25015   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25016   ins_encode %{
25017     int vlen_enc = vector_length_encoding(this);
25018     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25019     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25020                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25021   %}
25022   ins_pipe(pipe_slow);
25023 %}
25024 
25025 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25026 %{
25027   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25028             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25029             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25030   match(Set dst (SaturatingSubV src1 src2));
25031   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25032   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25033   ins_encode %{
25034     int vlen_enc = vector_length_encoding(this);
25035     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25036     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25037                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25038   %}
25039   ins_pipe(pipe_slow);
25040 %}
25041 
25042 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25043 %{
25044   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25045             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25046   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25047   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25048   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25049   ins_encode %{
25050     int vlen_enc = vector_length_encoding(this);
25051     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25052     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25053                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25054   %}
25055   ins_pipe(pipe_slow);
25056 %}
25057 
25058 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25059 %{
25060   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25061             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25062   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25063   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25064   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25065   ins_encode %{
25066     int vlen_enc = vector_length_encoding(this);
25067     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25068     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25069                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25070   %}
25071   ins_pipe(pipe_slow);
25072 %}
25073 
25074 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25075   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25076             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25077   match(Set dst (SaturatingAddV (Binary dst src) mask));
25078   match(Set dst (SaturatingSubV (Binary dst src) mask));
25079   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25080   ins_encode %{
25081     int vlen_enc = vector_length_encoding(this);
25082     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25083     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25084                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25085   %}
25086   ins_pipe( pipe_slow );
25087 %}
25088 
25089 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25090   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25091             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25092   match(Set dst (SaturatingAddV (Binary dst src) mask));
25093   match(Set dst (SaturatingSubV (Binary dst src) mask));
25094   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25095   ins_encode %{
25096     int vlen_enc = vector_length_encoding(this);
25097     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25098     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25099                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25100   %}
25101   ins_pipe( pipe_slow );
25102 %}
25103 
25104 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25105   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25106             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25107   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25108   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25109   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25110   ins_encode %{
25111     int vlen_enc = vector_length_encoding(this);
25112     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25113     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25114                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25115   %}
25116   ins_pipe( pipe_slow );
25117 %}
25118 
25119 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25120   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25121             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25122   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25123   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25124   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25125   ins_encode %{
25126     int vlen_enc = vector_length_encoding(this);
25127     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25128     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25129                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25130   %}
25131   ins_pipe( pipe_slow );
25132 %}
25133 
25134 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25135 %{
25136   match(Set index (SelectFromTwoVector (Binary index src1) src2));
25137   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25138   ins_encode %{
25139     int vlen_enc = vector_length_encoding(this);
25140     BasicType bt = Matcher::vector_element_basic_type(this);
25141     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25142   %}
25143   ins_pipe(pipe_slow);
25144 %}
25145 
25146 instruct reinterpretS2HF(regF dst, rRegI src)
25147 %{
25148   match(Set dst (ReinterpretS2HF src));
25149   format %{ "vmovw $dst, $src" %}
25150   ins_encode %{
25151     __ vmovw($dst$$XMMRegister, $src$$Register);
25152   %}
25153   ins_pipe(pipe_slow);
25154 %}
25155 
25156 instruct reinterpretHF2S(rRegI dst, regF src)
25157 %{
25158   match(Set dst (ReinterpretHF2S src));
25159   format %{ "vmovw $dst, $src" %}
25160   ins_encode %{
25161     __ vmovw($dst$$Register, $src$$XMMRegister);
25162   %}
25163   ins_pipe(pipe_slow);
25164 %}
25165 
25166 instruct convF2HFAndS2HF(regF dst, regF src)
25167 %{
25168   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25169   format %{ "convF2HFAndS2HF $dst, $src" %}
25170   ins_encode %{
25171     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25172   %}
25173   ins_pipe(pipe_slow);
25174 %}
25175 
25176 instruct convHF2SAndHF2F(regF dst, regF src)
25177 %{
25178   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25179   format %{ "convHF2SAndHF2F $dst, $src" %}
25180   ins_encode %{
25181     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25182   %}
25183   ins_pipe(pipe_slow);
25184 %}
25185 
25186 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25187 %{
25188   match(Set dst (SqrtHF src));
25189   format %{ "scalar_sqrt_fp16 $dst, $src" %}
25190   ins_encode %{
25191     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25192   %}
25193   ins_pipe(pipe_slow);
25194 %}
25195 
25196 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25197 %{
25198   match(Set dst (AddHF src1 src2));
25199   match(Set dst (DivHF src1 src2));
25200   match(Set dst (MulHF src1 src2));
25201   match(Set dst (SubHF src1 src2));
25202   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25203   ins_encode %{
25204     int opcode = this->ideal_Opcode();
25205     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25206   %}
25207   ins_pipe(pipe_slow);
25208 %}
25209 
25210 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25211 %{
25212   predicate(VM_Version::supports_avx10_2());
25213   match(Set dst (MaxHF src1 src2));
25214   match(Set dst (MinHF src1 src2));
25215   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25216   ins_encode %{
25217     int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25218     __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25219   %}
25220   ins_pipe( pipe_slow );
25221 %}
25222 
25223 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25224 %{
25225   predicate(!VM_Version::supports_avx10_2());
25226   match(Set dst (MaxHF src1 src2));
25227   match(Set dst (MinHF src1 src2));
25228   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25229   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25230   ins_encode %{
25231     int opcode = this->ideal_Opcode();
25232     __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25233                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25234   %}
25235   ins_pipe( pipe_slow );
25236 %}
25237 
25238 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25239 %{
25240   match(Set dst (FmaHF  src2 (Binary dst src1)));
25241   effect(DEF dst);
25242   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25243   ins_encode %{
25244     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25245   %}
25246   ins_pipe( pipe_slow );
25247 %}
25248 
25249 
25250 instruct vector_sqrt_HF_reg(vec dst, vec src)
25251 %{
25252   match(Set dst (SqrtVHF src));
25253   format %{ "vector_sqrt_fp16 $dst, $src" %}
25254   ins_encode %{
25255     int vlen_enc = vector_length_encoding(this);
25256     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25257   %}
25258   ins_pipe(pipe_slow);
25259 %}
25260 
25261 instruct vector_sqrt_HF_mem(vec dst, memory src)
25262 %{
25263   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25264   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25265   ins_encode %{
25266     int vlen_enc = vector_length_encoding(this);
25267     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25268   %}
25269   ins_pipe(pipe_slow);
25270 %}
25271 
25272 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25273 %{
25274   match(Set dst (AddVHF src1 src2));
25275   match(Set dst (DivVHF src1 src2));
25276   match(Set dst (MulVHF src1 src2));
25277   match(Set dst (SubVHF src1 src2));
25278   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25279   ins_encode %{
25280     int vlen_enc = vector_length_encoding(this);
25281     int opcode = this->ideal_Opcode();
25282     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25283   %}
25284   ins_pipe(pipe_slow);
25285 %}
25286 
25287 
25288 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25289 %{
25290   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25291   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25292   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25293   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25294   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25295   ins_encode %{
25296     int vlen_enc = vector_length_encoding(this);
25297     int opcode = this->ideal_Opcode();
25298     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25299   %}
25300   ins_pipe(pipe_slow);
25301 %}
25302 
25303 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25304 %{
25305   match(Set dst (FmaVHF src2 (Binary dst src1)));
25306   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25307   ins_encode %{
25308     int vlen_enc = vector_length_encoding(this);
25309     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25310   %}
25311   ins_pipe( pipe_slow );
25312 %}
25313 
25314 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25315 %{
25316   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25317   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25318   ins_encode %{
25319     int vlen_enc = vector_length_encoding(this);
25320     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25321   %}
25322   ins_pipe( pipe_slow );
25323 %}
25324 
25325 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25326 %{
25327   predicate(VM_Version::supports_avx10_2());
25328   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25329   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25330   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25331   ins_encode %{
25332     int vlen_enc = vector_length_encoding(this);
25333     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25334     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25335   %}
25336   ins_pipe( pipe_slow );
25337 %}
25338 
25339 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25340 %{
25341   predicate(VM_Version::supports_avx10_2());
25342   match(Set dst (MinVHF src1 src2));
25343   match(Set dst (MaxVHF src1 src2));
25344   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25345   ins_encode %{
25346     int vlen_enc = vector_length_encoding(this);
25347     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25348     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25349   %}
25350   ins_pipe( pipe_slow );
25351 %}
25352 
25353 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25354 %{
25355   predicate(!VM_Version::supports_avx10_2());
25356   match(Set dst (MinVHF src1 src2));
25357   match(Set dst (MaxVHF src1 src2));
25358   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25359   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25360   ins_encode %{
25361     int vlen_enc = vector_length_encoding(this);
25362     int opcode = this->ideal_Opcode();
25363     __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25364                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25365   %}
25366   ins_pipe( pipe_slow );
25367 %}
25368 
25369 //----------PEEPHOLE RULES-----------------------------------------------------
25370 // These must follow all instruction definitions as they use the names
25371 // defined in the instructions definitions.
25372 //
25373 // peeppredicate ( rule_predicate );
25374 // // the predicate unless which the peephole rule will be ignored
25375 //
25376 // peepmatch ( root_instr_name [preceding_instruction]* );
25377 //
25378 // peepprocedure ( procedure_name );
25379 // // provide a procedure name to perform the optimization, the procedure should
25380 // // reside in the architecture dependent peephole file, the method has the
25381 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25382 // // with the arguments being the basic block, the current node index inside the
25383 // // block, the register allocator, the functions upon invoked return a new node
25384 // // defined in peepreplace, and the rules of the nodes appearing in the
25385 // // corresponding peepmatch, the function return true if successful, else
25386 // // return false
25387 //
25388 // peepconstraint %{
25389 // (instruction_number.operand_name relational_op instruction_number.operand_name
25390 //  [, ...] );
25391 // // instruction numbers are zero-based using left to right order in peepmatch
25392 //
25393 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25394 // // provide an instruction_number.operand_name for each operand that appears
25395 // // in the replacement instruction's match rule
25396 //
25397 // ---------VM FLAGS---------------------------------------------------------
25398 //
25399 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25400 //
25401 // Each peephole rule is given an identifying number starting with zero and
25402 // increasing by one in the order seen by the parser.  An individual peephole
25403 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25404 // on the command-line.
25405 //
25406 // ---------CURRENT LIMITATIONS----------------------------------------------
25407 //
25408 // Only transformations inside a basic block (do we need more for peephole)
25409 //
25410 // ---------EXAMPLE----------------------------------------------------------
25411 //
25412 // // pertinent parts of existing instructions in architecture description
25413 // instruct movI(rRegI dst, rRegI src)
25414 // %{
25415 //   match(Set dst (CopyI src));
25416 // %}
25417 //
25418 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25419 // %{
25420 //   match(Set dst (AddI dst src));
25421 //   effect(KILL cr);
25422 // %}
25423 //
25424 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25425 // %{
25426 //   match(Set dst (AddI dst src));
25427 // %}
25428 //
25429 // 1. Simple replacement
25430 // - Only match adjacent instructions in same basic block
25431 // - Only equality constraints
25432 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25433 // - Only one replacement instruction
25434 //
25435 // // Change (inc mov) to lea
25436 // peephole %{
25437 //   // lea should only be emitted when beneficial
25438 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25439 //   // increment preceded by register-register move
25440 //   peepmatch ( incI_rReg movI );
25441 //   // require that the destination register of the increment
25442 //   // match the destination register of the move
25443 //   peepconstraint ( 0.dst == 1.dst );
25444 //   // construct a replacement instruction that sets
25445 //   // the destination to ( move's source register + one )
25446 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25447 // %}
25448 //
25449 // 2. Procedural replacement
25450 // - More flexible finding relevent nodes
25451 // - More flexible constraints
25452 // - More flexible transformations
25453 // - May utilise architecture-dependent API more effectively
25454 // - Currently only one replacement instruction due to adlc parsing capabilities
25455 //
25456 // // Change (inc mov) to lea
25457 // peephole %{
25458 //   // lea should only be emitted when beneficial
25459 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25460 //   // the rule numbers of these nodes inside are passed into the function below
25461 //   peepmatch ( incI_rReg movI );
25462 //   // the method that takes the responsibility of transformation
25463 //   peepprocedure ( inc_mov_to_lea );
25464 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25465 //   // node is passed into the function above
25466 //   peepreplace ( leaI_rReg_immI() );
25467 // %}
25468 
25469 // These instructions is not matched by the matcher but used by the peephole
25470 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25471 %{
25472   predicate(false);
25473   match(Set dst (AddI src1 src2));
25474   format %{ "leal    $dst, [$src1 + $src2]" %}
25475   ins_encode %{
25476     Register dst = $dst$$Register;
25477     Register src1 = $src1$$Register;
25478     Register src2 = $src2$$Register;
25479     if (src1 != rbp && src1 != r13) {
25480       __ leal(dst, Address(src1, src2, Address::times_1));
25481     } else {
25482       assert(src2 != rbp && src2 != r13, "");
25483       __ leal(dst, Address(src2, src1, Address::times_1));
25484     }
25485   %}
25486   ins_pipe(ialu_reg_reg);
25487 %}
25488 
25489 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25490 %{
25491   predicate(false);
25492   match(Set dst (AddI src1 src2));
25493   format %{ "leal    $dst, [$src1 + $src2]" %}
25494   ins_encode %{
25495     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25496   %}
25497   ins_pipe(ialu_reg_reg);
25498 %}
25499 
25500 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25501 %{
25502   predicate(false);
25503   match(Set dst (LShiftI src shift));
25504   format %{ "leal    $dst, [$src << $shift]" %}
25505   ins_encode %{
25506     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25507     Register src = $src$$Register;
25508     if (scale == Address::times_2 && src != rbp && src != r13) {
25509       __ leal($dst$$Register, Address(src, src, Address::times_1));
25510     } else {
25511       __ leal($dst$$Register, Address(noreg, src, scale));
25512     }
25513   %}
25514   ins_pipe(ialu_reg_reg);
25515 %}
25516 
25517 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25518 %{
25519   predicate(false);
25520   match(Set dst (AddL src1 src2));
25521   format %{ "leaq    $dst, [$src1 + $src2]" %}
25522   ins_encode %{
25523     Register dst = $dst$$Register;
25524     Register src1 = $src1$$Register;
25525     Register src2 = $src2$$Register;
25526     if (src1 != rbp && src1 != r13) {
25527       __ leaq(dst, Address(src1, src2, Address::times_1));
25528     } else {
25529       assert(src2 != rbp && src2 != r13, "");
25530       __ leaq(dst, Address(src2, src1, Address::times_1));
25531     }
25532   %}
25533   ins_pipe(ialu_reg_reg);
25534 %}
25535 
25536 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25537 %{
25538   predicate(false);
25539   match(Set dst (AddL src1 src2));
25540   format %{ "leaq    $dst, [$src1 + $src2]" %}
25541   ins_encode %{
25542     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25543   %}
25544   ins_pipe(ialu_reg_reg);
25545 %}
25546 
25547 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25548 %{
25549   predicate(false);
25550   match(Set dst (LShiftL src shift));
25551   format %{ "leaq    $dst, [$src << $shift]" %}
25552   ins_encode %{
25553     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25554     Register src = $src$$Register;
25555     if (scale == Address::times_2 && src != rbp && src != r13) {
25556       __ leaq($dst$$Register, Address(src, src, Address::times_1));
25557     } else {
25558       __ leaq($dst$$Register, Address(noreg, src, scale));
25559     }
25560   %}
25561   ins_pipe(ialu_reg_reg);
25562 %}
25563 
25564 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25565 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25566 // processors with at least partial ALU support for lea
25567 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25568 // beneficial for processors with full ALU support
25569 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25570 
25571 peephole
25572 %{
25573   peeppredicate(VM_Version::supports_fast_2op_lea());
25574   peepmatch (addI_rReg);
25575   peepprocedure (lea_coalesce_reg);
25576   peepreplace (leaI_rReg_rReg_peep());
25577 %}
25578 
25579 peephole
25580 %{
25581   peeppredicate(VM_Version::supports_fast_2op_lea());
25582   peepmatch (addI_rReg_imm);
25583   peepprocedure (lea_coalesce_imm);
25584   peepreplace (leaI_rReg_immI_peep());
25585 %}
25586 
25587 peephole
25588 %{
25589   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25590                 VM_Version::is_intel_cascade_lake());
25591   peepmatch (incI_rReg);
25592   peepprocedure (lea_coalesce_imm);
25593   peepreplace (leaI_rReg_immI_peep());
25594 %}
25595 
25596 peephole
25597 %{
25598   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25599                 VM_Version::is_intel_cascade_lake());
25600   peepmatch (decI_rReg);
25601   peepprocedure (lea_coalesce_imm);
25602   peepreplace (leaI_rReg_immI_peep());
25603 %}
25604 
25605 peephole
25606 %{
25607   peeppredicate(VM_Version::supports_fast_2op_lea());
25608   peepmatch (salI_rReg_immI2);
25609   peepprocedure (lea_coalesce_imm);
25610   peepreplace (leaI_rReg_immI2_peep());
25611 %}
25612 
25613 peephole
25614 %{
25615   peeppredicate(VM_Version::supports_fast_2op_lea());
25616   peepmatch (addL_rReg);
25617   peepprocedure (lea_coalesce_reg);
25618   peepreplace (leaL_rReg_rReg_peep());
25619 %}
25620 
25621 peephole
25622 %{
25623   peeppredicate(VM_Version::supports_fast_2op_lea());
25624   peepmatch (addL_rReg_imm);
25625   peepprocedure (lea_coalesce_imm);
25626   peepreplace (leaL_rReg_immL32_peep());
25627 %}
25628 
25629 peephole
25630 %{
25631   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25632                 VM_Version::is_intel_cascade_lake());
25633   peepmatch (incL_rReg);
25634   peepprocedure (lea_coalesce_imm);
25635   peepreplace (leaL_rReg_immL32_peep());
25636 %}
25637 
25638 peephole
25639 %{
25640   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25641                 VM_Version::is_intel_cascade_lake());
25642   peepmatch (decL_rReg);
25643   peepprocedure (lea_coalesce_imm);
25644   peepreplace (leaL_rReg_immL32_peep());
25645 %}
25646 
25647 peephole
25648 %{
25649   peeppredicate(VM_Version::supports_fast_2op_lea());
25650   peepmatch (salL_rReg_immI2);
25651   peepprocedure (lea_coalesce_imm);
25652   peepreplace (leaL_rReg_immI2_peep());
25653 %}
25654 
25655 peephole
25656 %{
25657   peepmatch (leaPCompressedOopOffset);
25658   peepprocedure (lea_remove_redundant);
25659 %}
25660 
25661 peephole
25662 %{
25663   peepmatch (leaP8Narrow);
25664   peepprocedure (lea_remove_redundant);
25665 %}
25666 
25667 peephole
25668 %{
25669   peepmatch (leaP32Narrow);
25670   peepprocedure (lea_remove_redundant);
25671 %}
25672 
25673 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25674 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25675 
25676 //int variant
25677 peephole
25678 %{
25679   peepmatch (testI_reg);
25680   peepprocedure (test_may_remove);
25681 %}
25682 
25683 //long variant
25684 peephole
25685 %{
25686   peepmatch (testL_reg);
25687   peepprocedure (test_may_remove);
25688 %}
25689 
25690 
25691 //----------SMARTSPILL RULES---------------------------------------------------
25692 // These must follow all instruction definitions as they use the names
25693 // defined in the instructions definitions.