1 //
    2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }
 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;
 1678 }
 1679 
 1680 // This could be in MacroAssembler but it's fairly C2 specific
 1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1682   Label exit;
 1683   __ jccb(Assembler::noParity, exit);
 1684   __ pushf();
 1685   //
 1686   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1687   // zero OF,AF,SF for NaN values.
 1688   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1689   // values returns 'less than' result (CF is set).
 1690   // Leave the rest of flags unchanged.
 1691   //
 1692   //    7 6 5 4 3 2 1 0
 1693   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1694   //    0 0 1 0 1 0 1 1   (0x2B)
 1695   //
 1696   __ andq(Address(rsp, 0), 0xffffff2b);
 1697   __ popf();
 1698   __ bind(exit);
 1699 }
 1700 
 1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1702   // If any floating point comparison instruction is used, unordered case always triggers jump
 1703   // for below condition, CF=1 is true when at least one input is NaN
 1704   Label done;
 1705   __ movl(dst, -1);
 1706   __ jcc(Assembler::below, done);
 1707   __ setcc(Assembler::notEqual, dst);
 1708   __ bind(done);
 1709 }
 1710 
 1711 // Math.min()    # Math.max()
 1712 // --------------------------
 1713 // ucomis[s/d]   #
 1714 // ja   -> b     # a
 1715 // jp   -> NaN   # NaN
 1716 // jb   -> a     # b
 1717 // je            #
 1718 // |-jz -> a | b # a & b
 1719 // |    -> a     #
 1720 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1721                             XMMRegister a, XMMRegister b,
 1722                             XMMRegister xmmt, Register rt,
 1723                             bool min, bool single) {
 1724 
 1725   Label nan, zero, below, above, done;
 1726 
 1727   if (single)
 1728     __ ucomiss(a, b);
 1729   else
 1730     __ ucomisd(a, b);
 1731 
 1732   if (dst->encoding() != (min ? b : a)->encoding())
 1733     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1734   else
 1735     __ jccb(Assembler::above, done);
 1736 
 1737   __ jccb(Assembler::parity, nan);  // PF=1
 1738   __ jccb(Assembler::below, below); // CF=1
 1739 
 1740   // equal
 1741   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
 1742   if (single) {
 1743     __ ucomiss(a, xmmt);
 1744     __ jccb(Assembler::equal, zero);
 1745 
 1746     __ movflt(dst, a);
 1747     __ jmp(done);
 1748   }
 1749   else {
 1750     __ ucomisd(a, xmmt);
 1751     __ jccb(Assembler::equal, zero);
 1752 
 1753     __ movdbl(dst, a);
 1754     __ jmp(done);
 1755   }
 1756 
 1757   __ bind(zero);
 1758   if (min)
 1759     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1760   else
 1761     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1762 
 1763   __ jmp(done);
 1764 
 1765   __ bind(above);
 1766   if (single)
 1767     __ movflt(dst, min ? b : a);
 1768   else
 1769     __ movdbl(dst, min ? b : a);
 1770 
 1771   __ jmp(done);
 1772 
 1773   __ bind(nan);
 1774   if (single) {
 1775     __ movl(rt, 0x7fc00000); // Float.NaN
 1776     __ movdl(dst, rt);
 1777   }
 1778   else {
 1779     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1780     __ movdq(dst, rt);
 1781   }
 1782   __ jmp(done);
 1783 
 1784   __ bind(below);
 1785   if (single)
 1786     __ movflt(dst, min ? a : b);
 1787   else
 1788     __ movdbl(dst, min ? a : b);
 1789 
 1790   __ bind(done);
 1791 }
 1792 
 1793 //=============================================================================
 1794 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1795 
 1796 int ConstantTable::calculate_table_base_offset() const {
 1797   return 0;  // absolute addressing, no offset
 1798 }
 1799 
 1800 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1801 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1802   ShouldNotReachHere();
 1803 }
 1804 
 1805 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1806   // Empty encoding
 1807 }
 1808 
 1809 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1810   return 0;
 1811 }
 1812 
 1813 #ifndef PRODUCT
 1814 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1815   st->print("# MachConstantBaseNode (empty encoding)");
 1816 }
 1817 #endif
 1818 
 1819 
 1820 //=============================================================================
 1821 #ifndef PRODUCT
 1822 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1823   Compile* C = ra_->C;
 1824 
 1825   int framesize = C->output()->frame_size_in_bytes();
 1826   int bangsize = C->output()->bang_size_in_bytes();
 1827   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1828   // Remove wordSize for return addr which is already pushed.
 1829   framesize -= wordSize;
 1830 
 1831   if (C->output()->need_stack_bang(bangsize)) {
 1832     framesize -= wordSize;
 1833     st->print("# stack bang (%d bytes)", bangsize);
 1834     st->print("\n\t");
 1835     st->print("pushq   rbp\t# Save rbp");
 1836     if (PreserveFramePointer) {
 1837         st->print("\n\t");
 1838         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1839     }
 1840     if (framesize) {
 1841       st->print("\n\t");
 1842       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1843     }
 1844   } else {
 1845     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1846     st->print("\n\t");
 1847     framesize -= wordSize;
 1848     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1849     if (PreserveFramePointer) {
 1850       st->print("\n\t");
 1851       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1852       if (framesize > 0) {
 1853         st->print("\n\t");
 1854         st->print("addq    rbp, #%d", framesize);
 1855       }
 1856     }
 1857   }
 1858 
 1859   if (VerifyStackAtCalls) {
 1860     st->print("\n\t");
 1861     framesize -= wordSize;
 1862     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1863 #ifdef ASSERT
 1864     st->print("\n\t");
 1865     st->print("# stack alignment check");
 1866 #endif
 1867   }
 1868   if (C->stub_function() != nullptr) {
 1869     st->print("\n\t");
 1870     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1871     st->print("\n\t");
 1872     st->print("je      fast_entry\t");
 1873     st->print("\n\t");
 1874     st->print("call    #nmethod_entry_barrier_stub\t");
 1875     st->print("\n\tfast_entry:");
 1876   }
 1877   st->cr();
 1878 }
 1879 #endif
 1880 
 1881 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1882   Compile* C = ra_->C;
 1883 
 1884   int framesize = C->output()->frame_size_in_bytes();
 1885   int bangsize = C->output()->bang_size_in_bytes();
 1886 
 1887   if (C->clinit_barrier_on_entry()) {
 1888     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1889     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1890 
 1891     Label L_skip_barrier;
 1892     Register klass = rscratch1;
 1893 
 1894     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1895     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1896 
 1897     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1898 
 1899     __ bind(L_skip_barrier);
 1900   }
 1901 
 1902   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
 1903 
 1904   C->output()->set_frame_complete(__ offset());
 1905 
 1906   if (C->has_mach_constant_base_node()) {
 1907     // NOTE: We set the table base offset here because users might be
 1908     // emitted before MachConstantBaseNode.
 1909     ConstantTable& constant_table = C->output()->constant_table();
 1910     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1911   }
 1912 }
 1913 
 1914 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1915 {
 1916   return MachNode::size(ra_); // too many variables; just compute it
 1917                               // the hard way
 1918 }
 1919 
 1920 int MachPrologNode::reloc() const
 1921 {
 1922   return 0; // a large enough number
 1923 }
 1924 
 1925 //=============================================================================
 1926 #ifndef PRODUCT
 1927 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1928 {
 1929   Compile* C = ra_->C;
 1930   if (generate_vzeroupper(C)) {
 1931     st->print("vzeroupper");
 1932     st->cr(); st->print("\t");
 1933   }
 1934 
 1935   int framesize = C->output()->frame_size_in_bytes();
 1936   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1937   // Remove word for return adr already pushed
 1938   // and RBP
 1939   framesize -= 2*wordSize;
 1940 
 1941   if (framesize) {
 1942     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1943     st->print("\t");
 1944   }
 1945 
 1946   st->print_cr("popq    rbp");
 1947   if (do_polling() && C->is_method_compilation()) {
 1948     st->print("\t");
 1949     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1950                  "ja      #safepoint_stub\t"
 1951                  "# Safepoint: poll for GC");
 1952   }
 1953 }
 1954 #endif
 1955 
 1956 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1957 {
 1958   Compile* C = ra_->C;
 1959 
 1960   if (generate_vzeroupper(C)) {
 1961     // Clear upper bits of YMM registers when current compiled code uses
 1962     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1963     __ vzeroupper();
 1964   }
 1965 
 1966   int framesize = C->output()->frame_size_in_bytes();
 1967   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1968   // Remove word for return adr already pushed
 1969   // and RBP
 1970   framesize -= 2*wordSize;
 1971 
 1972   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1973 
 1974   if (framesize) {
 1975     __ addq(rsp, framesize);
 1976   }
 1977 
 1978   __ popq(rbp);
 1979 
 1980   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1981     __ reserved_stack_check();
 1982   }
 1983 
 1984   if (do_polling() && C->is_method_compilation()) {
 1985     Label dummy_label;
 1986     Label* code_stub = &dummy_label;
 1987     if (!C->output()->in_scratch_emit_size()) {
 1988       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1989       C->output()->add_stub(stub);
 1990       code_stub = &stub->entry();
 1991     }
 1992     __ relocate(relocInfo::poll_return_type);
 1993     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1994   }
 1995 }
 1996 
 1997 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1998 {
 1999   return MachNode::size(ra_); // too many variables; just compute it
 2000                               // the hard way
 2001 }
 2002 
 2003 int MachEpilogNode::reloc() const
 2004 {
 2005   return 2; // a large enough number
 2006 }
 2007 
 2008 const Pipeline* MachEpilogNode::pipeline() const
 2009 {
 2010   return MachNode::pipeline_class();
 2011 }
 2012 
 2013 //=============================================================================
 2014 
 2015 enum RC {
 2016   rc_bad,
 2017   rc_int,
 2018   rc_kreg,
 2019   rc_float,
 2020   rc_stack
 2021 };
 2022 
 2023 static enum RC rc_class(OptoReg::Name reg)
 2024 {
 2025   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 2026 
 2027   if (OptoReg::is_stack(reg)) return rc_stack;
 2028 
 2029   VMReg r = OptoReg::as_VMReg(reg);
 2030 
 2031   if (r->is_Register()) return rc_int;
 2032 
 2033   if (r->is_KRegister()) return rc_kreg;
 2034 
 2035   assert(r->is_XMMRegister(), "must be");
 2036   return rc_float;
 2037 }
 2038 
 2039 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2040 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2041                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2042 
 2043 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2044                      int stack_offset, int reg, uint ireg, outputStream* st);
 2045 
 2046 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2047                                       int dst_offset, uint ireg, outputStream* st) {
 2048   if (masm) {
 2049     switch (ireg) {
 2050     case Op_VecS:
 2051       __ movq(Address(rsp, -8), rax);
 2052       __ movl(rax, Address(rsp, src_offset));
 2053       __ movl(Address(rsp, dst_offset), rax);
 2054       __ movq(rax, Address(rsp, -8));
 2055       break;
 2056     case Op_VecD:
 2057       __ pushq(Address(rsp, src_offset));
 2058       __ popq (Address(rsp, dst_offset));
 2059       break;
 2060     case Op_VecX:
 2061       __ pushq(Address(rsp, src_offset));
 2062       __ popq (Address(rsp, dst_offset));
 2063       __ pushq(Address(rsp, src_offset+8));
 2064       __ popq (Address(rsp, dst_offset+8));
 2065       break;
 2066     case Op_VecY:
 2067       __ vmovdqu(Address(rsp, -32), xmm0);
 2068       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2069       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2070       __ vmovdqu(xmm0, Address(rsp, -32));
 2071       break;
 2072     case Op_VecZ:
 2073       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2074       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2075       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2076       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2077       break;
 2078     default:
 2079       ShouldNotReachHere();
 2080     }
 2081 #ifndef PRODUCT
 2082   } else {
 2083     switch (ireg) {
 2084     case Op_VecS:
 2085       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2086                 "movl    rax, [rsp + #%d]\n\t"
 2087                 "movl    [rsp + #%d], rax\n\t"
 2088                 "movq    rax, [rsp - #8]",
 2089                 src_offset, dst_offset);
 2090       break;
 2091     case Op_VecD:
 2092       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2093                 "popq    [rsp + #%d]",
 2094                 src_offset, dst_offset);
 2095       break;
 2096      case Op_VecX:
 2097       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2098                 "popq    [rsp + #%d]\n\t"
 2099                 "pushq   [rsp + #%d]\n\t"
 2100                 "popq    [rsp + #%d]",
 2101                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2102       break;
 2103     case Op_VecY:
 2104       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2105                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2106                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2107                 "vmovdqu xmm0, [rsp - #32]",
 2108                 src_offset, dst_offset);
 2109       break;
 2110     case Op_VecZ:
 2111       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2112                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2113                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2114                 "vmovdqu xmm0, [rsp - #64]",
 2115                 src_offset, dst_offset);
 2116       break;
 2117     default:
 2118       ShouldNotReachHere();
 2119     }
 2120 #endif
 2121   }
 2122 }
 2123 
 2124 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2125                                        PhaseRegAlloc* ra_,
 2126                                        bool do_size,
 2127                                        outputStream* st) const {
 2128   assert(masm != nullptr || st  != nullptr, "sanity");
 2129   // Get registers to move
 2130   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2131   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2132   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2133   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2134 
 2135   enum RC src_second_rc = rc_class(src_second);
 2136   enum RC src_first_rc = rc_class(src_first);
 2137   enum RC dst_second_rc = rc_class(dst_second);
 2138   enum RC dst_first_rc = rc_class(dst_first);
 2139 
 2140   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2141          "must move at least 1 register" );
 2142 
 2143   if (src_first == dst_first && src_second == dst_second) {
 2144     // Self copy, no move
 2145     return 0;
 2146   }
 2147   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 2148     uint ireg = ideal_reg();
 2149     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2150     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2151     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2152       // mem -> mem
 2153       int src_offset = ra_->reg2offset(src_first);
 2154       int dst_offset = ra_->reg2offset(dst_first);
 2155       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2156     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2157       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2158     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2159       int stack_offset = ra_->reg2offset(dst_first);
 2160       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2161     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2162       int stack_offset = ra_->reg2offset(src_first);
 2163       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2164     } else {
 2165       ShouldNotReachHere();
 2166     }
 2167     return 0;
 2168   }
 2169   if (src_first_rc == rc_stack) {
 2170     // mem ->
 2171     if (dst_first_rc == rc_stack) {
 2172       // mem -> mem
 2173       assert(src_second != dst_first, "overlap");
 2174       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2175           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2176         // 64-bit
 2177         int src_offset = ra_->reg2offset(src_first);
 2178         int dst_offset = ra_->reg2offset(dst_first);
 2179         if (masm) {
 2180           __ pushq(Address(rsp, src_offset));
 2181           __ popq (Address(rsp, dst_offset));
 2182 #ifndef PRODUCT
 2183         } else {
 2184           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2185                     "popq    [rsp + #%d]",
 2186                      src_offset, dst_offset);
 2187 #endif
 2188         }
 2189       } else {
 2190         // 32-bit
 2191         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2192         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2193         // No pushl/popl, so:
 2194         int src_offset = ra_->reg2offset(src_first);
 2195         int dst_offset = ra_->reg2offset(dst_first);
 2196         if (masm) {
 2197           __ movq(Address(rsp, -8), rax);
 2198           __ movl(rax, Address(rsp, src_offset));
 2199           __ movl(Address(rsp, dst_offset), rax);
 2200           __ movq(rax, Address(rsp, -8));
 2201 #ifndef PRODUCT
 2202         } else {
 2203           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2204                     "movl    rax, [rsp + #%d]\n\t"
 2205                     "movl    [rsp + #%d], rax\n\t"
 2206                     "movq    rax, [rsp - #8]",
 2207                      src_offset, dst_offset);
 2208 #endif
 2209         }
 2210       }
 2211       return 0;
 2212     } else if (dst_first_rc == rc_int) {
 2213       // mem -> gpr
 2214       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2215           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2216         // 64-bit
 2217         int offset = ra_->reg2offset(src_first);
 2218         if (masm) {
 2219           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2220 #ifndef PRODUCT
 2221         } else {
 2222           st->print("movq    %s, [rsp + #%d]\t# spill",
 2223                      Matcher::regName[dst_first],
 2224                      offset);
 2225 #endif
 2226         }
 2227       } else {
 2228         // 32-bit
 2229         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2230         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2231         int offset = ra_->reg2offset(src_first);
 2232         if (masm) {
 2233           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2234 #ifndef PRODUCT
 2235         } else {
 2236           st->print("movl    %s, [rsp + #%d]\t# spill",
 2237                      Matcher::regName[dst_first],
 2238                      offset);
 2239 #endif
 2240         }
 2241       }
 2242       return 0;
 2243     } else if (dst_first_rc == rc_float) {
 2244       // mem-> xmm
 2245       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2246           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2247         // 64-bit
 2248         int offset = ra_->reg2offset(src_first);
 2249         if (masm) {
 2250           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2251 #ifndef PRODUCT
 2252         } else {
 2253           st->print("%s  %s, [rsp + #%d]\t# spill",
 2254                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2255                      Matcher::regName[dst_first],
 2256                      offset);
 2257 #endif
 2258         }
 2259       } else {
 2260         // 32-bit
 2261         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2262         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2263         int offset = ra_->reg2offset(src_first);
 2264         if (masm) {
 2265           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2266 #ifndef PRODUCT
 2267         } else {
 2268           st->print("movss   %s, [rsp + #%d]\t# spill",
 2269                      Matcher::regName[dst_first],
 2270                      offset);
 2271 #endif
 2272         }
 2273       }
 2274       return 0;
 2275     } else if (dst_first_rc == rc_kreg) {
 2276       // mem -> kreg
 2277       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2278           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2279         // 64-bit
 2280         int offset = ra_->reg2offset(src_first);
 2281         if (masm) {
 2282           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2283 #ifndef PRODUCT
 2284         } else {
 2285           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2286                      Matcher::regName[dst_first],
 2287                      offset);
 2288 #endif
 2289         }
 2290       }
 2291       return 0;
 2292     }
 2293   } else if (src_first_rc == rc_int) {
 2294     // gpr ->
 2295     if (dst_first_rc == rc_stack) {
 2296       // gpr -> mem
 2297       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2298           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2299         // 64-bit
 2300         int offset = ra_->reg2offset(dst_first);
 2301         if (masm) {
 2302           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2303 #ifndef PRODUCT
 2304         } else {
 2305           st->print("movq    [rsp + #%d], %s\t# spill",
 2306                      offset,
 2307                      Matcher::regName[src_first]);
 2308 #endif
 2309         }
 2310       } else {
 2311         // 32-bit
 2312         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2313         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2314         int offset = ra_->reg2offset(dst_first);
 2315         if (masm) {
 2316           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2317 #ifndef PRODUCT
 2318         } else {
 2319           st->print("movl    [rsp + #%d], %s\t# spill",
 2320                      offset,
 2321                      Matcher::regName[src_first]);
 2322 #endif
 2323         }
 2324       }
 2325       return 0;
 2326     } else if (dst_first_rc == rc_int) {
 2327       // gpr -> gpr
 2328       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2329           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2330         // 64-bit
 2331         if (masm) {
 2332           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2333                   as_Register(Matcher::_regEncode[src_first]));
 2334 #ifndef PRODUCT
 2335         } else {
 2336           st->print("movq    %s, %s\t# spill",
 2337                      Matcher::regName[dst_first],
 2338                      Matcher::regName[src_first]);
 2339 #endif
 2340         }
 2341         return 0;
 2342       } else {
 2343         // 32-bit
 2344         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2345         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2346         if (masm) {
 2347           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2348                   as_Register(Matcher::_regEncode[src_first]));
 2349 #ifndef PRODUCT
 2350         } else {
 2351           st->print("movl    %s, %s\t# spill",
 2352                      Matcher::regName[dst_first],
 2353                      Matcher::regName[src_first]);
 2354 #endif
 2355         }
 2356         return 0;
 2357       }
 2358     } else if (dst_first_rc == rc_float) {
 2359       // gpr -> xmm
 2360       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2361           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2362         // 64-bit
 2363         if (masm) {
 2364           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2365 #ifndef PRODUCT
 2366         } else {
 2367           st->print("movdq   %s, %s\t# spill",
 2368                      Matcher::regName[dst_first],
 2369                      Matcher::regName[src_first]);
 2370 #endif
 2371         }
 2372       } else {
 2373         // 32-bit
 2374         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2375         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2376         if (masm) {
 2377           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2378 #ifndef PRODUCT
 2379         } else {
 2380           st->print("movdl   %s, %s\t# spill",
 2381                      Matcher::regName[dst_first],
 2382                      Matcher::regName[src_first]);
 2383 #endif
 2384         }
 2385       }
 2386       return 0;
 2387     } else if (dst_first_rc == rc_kreg) {
 2388       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2389           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2390         // 64-bit
 2391         if (masm) {
 2392           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2393   #ifndef PRODUCT
 2394         } else {
 2395            st->print("kmovq   %s, %s\t# spill",
 2396                        Matcher::regName[dst_first],
 2397                        Matcher::regName[src_first]);
 2398   #endif
 2399         }
 2400       }
 2401       Unimplemented();
 2402       return 0;
 2403     }
 2404   } else if (src_first_rc == rc_float) {
 2405     // xmm ->
 2406     if (dst_first_rc == rc_stack) {
 2407       // xmm -> mem
 2408       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2409           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2410         // 64-bit
 2411         int offset = ra_->reg2offset(dst_first);
 2412         if (masm) {
 2413           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2414 #ifndef PRODUCT
 2415         } else {
 2416           st->print("movsd   [rsp + #%d], %s\t# spill",
 2417                      offset,
 2418                      Matcher::regName[src_first]);
 2419 #endif
 2420         }
 2421       } else {
 2422         // 32-bit
 2423         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2424         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2425         int offset = ra_->reg2offset(dst_first);
 2426         if (masm) {
 2427           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2428 #ifndef PRODUCT
 2429         } else {
 2430           st->print("movss   [rsp + #%d], %s\t# spill",
 2431                      offset,
 2432                      Matcher::regName[src_first]);
 2433 #endif
 2434         }
 2435       }
 2436       return 0;
 2437     } else if (dst_first_rc == rc_int) {
 2438       // xmm -> gpr
 2439       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2440           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2441         // 64-bit
 2442         if (masm) {
 2443           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2444 #ifndef PRODUCT
 2445         } else {
 2446           st->print("movdq   %s, %s\t# spill",
 2447                      Matcher::regName[dst_first],
 2448                      Matcher::regName[src_first]);
 2449 #endif
 2450         }
 2451       } else {
 2452         // 32-bit
 2453         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2454         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2455         if (masm) {
 2456           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2457 #ifndef PRODUCT
 2458         } else {
 2459           st->print("movdl   %s, %s\t# spill",
 2460                      Matcher::regName[dst_first],
 2461                      Matcher::regName[src_first]);
 2462 #endif
 2463         }
 2464       }
 2465       return 0;
 2466     } else if (dst_first_rc == rc_float) {
 2467       // xmm -> xmm
 2468       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2469           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2470         // 64-bit
 2471         if (masm) {
 2472           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2473 #ifndef PRODUCT
 2474         } else {
 2475           st->print("%s  %s, %s\t# spill",
 2476                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2477                      Matcher::regName[dst_first],
 2478                      Matcher::regName[src_first]);
 2479 #endif
 2480         }
 2481       } else {
 2482         // 32-bit
 2483         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2484         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2485         if (masm) {
 2486           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2487 #ifndef PRODUCT
 2488         } else {
 2489           st->print("%s  %s, %s\t# spill",
 2490                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2491                      Matcher::regName[dst_first],
 2492                      Matcher::regName[src_first]);
 2493 #endif
 2494         }
 2495       }
 2496       return 0;
 2497     } else if (dst_first_rc == rc_kreg) {
 2498       assert(false, "Illegal spilling");
 2499       return 0;
 2500     }
 2501   } else if (src_first_rc == rc_kreg) {
 2502     if (dst_first_rc == rc_stack) {
 2503       // mem -> kreg
 2504       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2505           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2506         // 64-bit
 2507         int offset = ra_->reg2offset(dst_first);
 2508         if (masm) {
 2509           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2510 #ifndef PRODUCT
 2511         } else {
 2512           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2513                      offset,
 2514                      Matcher::regName[src_first]);
 2515 #endif
 2516         }
 2517       }
 2518       return 0;
 2519     } else if (dst_first_rc == rc_int) {
 2520       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2521           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2522         // 64-bit
 2523         if (masm) {
 2524           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2525 #ifndef PRODUCT
 2526         } else {
 2527          st->print("kmovq   %s, %s\t# spill",
 2528                      Matcher::regName[dst_first],
 2529                      Matcher::regName[src_first]);
 2530 #endif
 2531         }
 2532       }
 2533       Unimplemented();
 2534       return 0;
 2535     } else if (dst_first_rc == rc_kreg) {
 2536       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2537           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2538         // 64-bit
 2539         if (masm) {
 2540           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2541 #ifndef PRODUCT
 2542         } else {
 2543          st->print("kmovq   %s, %s\t# spill",
 2544                      Matcher::regName[dst_first],
 2545                      Matcher::regName[src_first]);
 2546 #endif
 2547         }
 2548       }
 2549       return 0;
 2550     } else if (dst_first_rc == rc_float) {
 2551       assert(false, "Illegal spill");
 2552       return 0;
 2553     }
 2554   }
 2555 
 2556   assert(0," foo ");
 2557   Unimplemented();
 2558   return 0;
 2559 }
 2560 
 2561 #ifndef PRODUCT
 2562 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2563   implementation(nullptr, ra_, false, st);
 2564 }
 2565 #endif
 2566 
 2567 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2568   implementation(masm, ra_, false, nullptr);
 2569 }
 2570 
 2571 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2572   return MachNode::size(ra_);
 2573 }
 2574 
 2575 //=============================================================================
 2576 #ifndef PRODUCT
 2577 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2578 {
 2579   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2580   int reg = ra_->get_reg_first(this);
 2581   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2582             Matcher::regName[reg], offset);
 2583 }
 2584 #endif
 2585 
 2586 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2587 {
 2588   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2589   int reg = ra_->get_encode(this);
 2590 
 2591   __ lea(as_Register(reg), Address(rsp, offset));
 2592 }
 2593 
 2594 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2595 {
 2596   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2597   if (ra_->get_encode(this) > 15) {
 2598     return (offset < 0x80) ? 6 : 9; // REX2
 2599   } else {
 2600     return (offset < 0x80) ? 5 : 8; // REX
 2601   }
 2602 }
 2603 
 2604 //=============================================================================
 2605 #ifndef PRODUCT
 2606 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2607 {
 2608   if (UseCompressedClassPointers) {
 2609     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2610     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2611   } else {
 2612     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2613     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2614   }
 2615   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2616 }
 2617 #endif
 2618 
 2619 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2620 {
 2621   __ ic_check(InteriorEntryAlignment);
 2622 }
 2623 
 2624 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2625 {
 2626   return MachNode::size(ra_); // too many variables; just compute it
 2627                               // the hard way
 2628 }
 2629 
 2630 
 2631 //=============================================================================
 2632 
 2633 bool Matcher::supports_vector_calling_convention(void) {
 2634   return EnableVectorSupport;
 2635 }
 2636 
 2637 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2638   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2639 }
 2640 
 2641 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2642   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2643 }
 2644 
 2645 #ifdef ASSERT
 2646 static bool is_ndd_demotable(const MachNode* mdef) {
 2647   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2648 }
 2649 #endif
 2650 
 2651 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
 2652                                             int oper_index) {
 2653   if (mdef == nullptr) {
 2654     return false;
 2655   }
 2656 
 2657   if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
 2658       mdef->in(mdef->operand_index(oper_index)) == nullptr) {
 2659     assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
 2660     assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
 2661     return false;
 2662   }
 2663 
 2664   // Complex memory operand covers multiple incoming edges needed for
 2665   // address computation. Biasing def towards any address component will not
 2666   // result in NDD demotion by assembler.
 2667   if (mdef->operand_num_edges(oper_index) != 1) {
 2668     return false;
 2669   }
 2670 
 2671   // Demotion candidate must be register mask compatible with definition.
 2672   const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
 2673   if (!oper_mask.overlap(mdef->out_RegMask())) {
 2674     assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
 2675     return false;
 2676   }
 2677 
 2678   switch (oper_index) {
 2679   // First operand of MachNode corresponding to Intel APX NDD selection
 2680   // pattern can share its assigned register with definition operand if
 2681   // their live ranges do not overlap. In such a scenario we can demote
 2682   // it to legacy map0/map1 instruction by replacing its 4-byte extended
 2683   // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
 2684   // are decorated with a special flag by instruction selector.
 2685   case 1:
 2686     return is_ndd_demotable_opr1(mdef);
 2687 
 2688   // Definition operand of commutative operation can be biased towards second
 2689   // operand.
 2690   case 2:
 2691     return is_ndd_demotable_opr2(mdef);
 2692 
 2693   // Current scheme only selects up to two biasing candidates
 2694   default:
 2695     assert(false, "unhandled operand index: %s", mdef->Name());
 2696     break;
 2697   }
 2698 
 2699   return false;
 2700 }
 2701 
 2702 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2703   assert(EnableVectorSupport, "sanity");
 2704   int lo = XMM0_num;
 2705   int hi = XMM0b_num;
 2706   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2707   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2708   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2709   return OptoRegPair(hi, lo);
 2710 }
 2711 
 2712 // Is this branch offset short enough that a short branch can be used?
 2713 //
 2714 // NOTE: If the platform does not provide any short branch variants, then
 2715 //       this method should return false for offset 0.
 2716 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2717   // The passed offset is relative to address of the branch.
 2718   // On 86 a branch displacement is calculated relative to address
 2719   // of a next instruction.
 2720   offset -= br_size;
 2721 
 2722   // the short version of jmpConUCF2 contains multiple branches,
 2723   // making the reach slightly less
 2724   if (rule == jmpConUCF2_rule)
 2725     return (-126 <= offset && offset <= 125);
 2726   return (-128 <= offset && offset <= 127);
 2727 }
 2728 
 2729 #ifdef ASSERT
 2730 // Return whether or not this register is ever used as an argument.
 2731 bool Matcher::can_be_java_arg(int reg)
 2732 {
 2733   return
 2734     reg ==  RDI_num || reg == RDI_H_num ||
 2735     reg ==  RSI_num || reg == RSI_H_num ||
 2736     reg ==  RDX_num || reg == RDX_H_num ||
 2737     reg ==  RCX_num || reg == RCX_H_num ||
 2738     reg ==   R8_num || reg ==  R8_H_num ||
 2739     reg ==   R9_num || reg ==  R9_H_num ||
 2740     reg ==  R12_num || reg == R12_H_num ||
 2741     reg == XMM0_num || reg == XMM0b_num ||
 2742     reg == XMM1_num || reg == XMM1b_num ||
 2743     reg == XMM2_num || reg == XMM2b_num ||
 2744     reg == XMM3_num || reg == XMM3b_num ||
 2745     reg == XMM4_num || reg == XMM4b_num ||
 2746     reg == XMM5_num || reg == XMM5b_num ||
 2747     reg == XMM6_num || reg == XMM6b_num ||
 2748     reg == XMM7_num || reg == XMM7b_num;
 2749 }
 2750 #endif
 2751 
 2752 uint Matcher::int_pressure_limit()
 2753 {
 2754   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2755 }
 2756 
 2757 uint Matcher::float_pressure_limit()
 2758 {
 2759   // After experiment around with different values, the following default threshold
 2760   // works best for LCM's register pressure scheduling on x64.
 2761   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2762   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2763   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2764 }
 2765 
 2766 // Register for DIVI projection of divmodI
 2767 const RegMask& Matcher::divI_proj_mask() {
 2768   return INT_RAX_REG_mask();
 2769 }
 2770 
 2771 // Register for MODI projection of divmodI
 2772 const RegMask& Matcher::modI_proj_mask() {
 2773   return INT_RDX_REG_mask();
 2774 }
 2775 
 2776 // Register for DIVL projection of divmodL
 2777 const RegMask& Matcher::divL_proj_mask() {
 2778   return LONG_RAX_REG_mask();
 2779 }
 2780 
 2781 // Register for MODL projection of divmodL
 2782 const RegMask& Matcher::modL_proj_mask() {
 2783   return LONG_RDX_REG_mask();
 2784 }
 2785 
 2786 %}
 2787 
 2788 source_hpp %{
 2789 // Header information of the source block.
 2790 // Method declarations/definitions which are used outside
 2791 // the ad-scope can conveniently be defined here.
 2792 //
 2793 // To keep related declarations/definitions/uses close together,
 2794 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2795 
 2796 #include "runtime/vm_version.hpp"
 2797 
 2798 class NativeJump;
 2799 
 2800 class CallStubImpl {
 2801 
 2802   //--------------------------------------------------------------
 2803   //---<  Used for optimization in Compile::shorten_branches  >---
 2804   //--------------------------------------------------------------
 2805 
 2806  public:
 2807   // Size of call trampoline stub.
 2808   static uint size_call_trampoline() {
 2809     return 0; // no call trampolines on this platform
 2810   }
 2811 
 2812   // number of relocations needed by a call trampoline stub
 2813   static uint reloc_call_trampoline() {
 2814     return 0; // no call trampolines on this platform
 2815   }
 2816 };
 2817 
 2818 class HandlerImpl {
 2819 
 2820  public:
 2821 
 2822   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2823 
 2824   static uint size_deopt_handler() {
 2825     // one call and one jmp.
 2826     return 7;
 2827   }
 2828 };
 2829 
 2830 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2831   switch(bytes) {
 2832     case  4: // fall-through
 2833     case  8: // fall-through
 2834     case 16: return Assembler::AVX_128bit;
 2835     case 32: return Assembler::AVX_256bit;
 2836     case 64: return Assembler::AVX_512bit;
 2837 
 2838     default: {
 2839       ShouldNotReachHere();
 2840       return Assembler::AVX_NoVec;
 2841     }
 2842   }
 2843 }
 2844 
 2845 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2846   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2847 }
 2848 
 2849 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2850   uint def_idx = use->operand_index(opnd);
 2851   Node* def = use->in(def_idx);
 2852   return vector_length_encoding(def);
 2853 }
 2854 
 2855 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2856   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2857          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2858 }
 2859 
 2860 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2861   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2862            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2863 }
 2864 
 2865 class Node::PD {
 2866 public:
 2867   enum NodeFlags : uint64_t {
 2868     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2869     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2870     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2871     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2872     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2873     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2874     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2875     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2876     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2877     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2878     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2879     Flag_ndd_demotable_opr1   = Node::_last_flag << 12,
 2880     Flag_ndd_demotable_opr2   = Node::_last_flag << 13,
 2881     _last_flag                = Flag_ndd_demotable_opr2
 2882   };
 2883 };
 2884 
 2885 %} // end source_hpp
 2886 
 2887 source %{
 2888 
 2889 #include "opto/addnode.hpp"
 2890 #include "c2_intelJccErratum_x86.hpp"
 2891 
 2892 void PhaseOutput::pd_perform_mach_node_analysis() {
 2893   if (VM_Version::has_intel_jcc_erratum()) {
 2894     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2895     _buf_sizes._code += extra_padding;
 2896   }
 2897 }
 2898 
 2899 int MachNode::pd_alignment_required() const {
 2900   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2901     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2902     return IntelJccErratum::largest_jcc_size() + 1;
 2903   } else {
 2904     return 1;
 2905   }
 2906 }
 2907 
 2908 int MachNode::compute_padding(int current_offset) const {
 2909   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2910     Compile* C = Compile::current();
 2911     PhaseOutput* output = C->output();
 2912     Block* block = output->block();
 2913     int index = output->index();
 2914     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2915   } else {
 2916     return 0;
 2917   }
 2918 }
 2919 
 2920 // Emit deopt handler code.
 2921 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2922 
 2923   // Note that the code buffer's insts_mark is always relative to insts.
 2924   // That's why we must use the macroassembler to generate a handler.
 2925   address base = __ start_a_stub(size_deopt_handler());
 2926   if (base == nullptr) {
 2927     ciEnv::current()->record_failure("CodeCache is full");
 2928     return 0;  // CodeBuffer::expand failed
 2929   }
 2930   int offset = __ offset();
 2931 
 2932   Label start;
 2933   __ bind(start);
 2934 
 2935   __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2936 
 2937   int entry_offset = __ offset();
 2938 
 2939   __ jmp(start);
 2940 
 2941   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2942   assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
 2943          "out of bounds read in post-call NOP check");
 2944   __ end_a_stub();
 2945   return entry_offset;
 2946 }
 2947 
 2948 static Assembler::Width widthForType(BasicType bt) {
 2949   if (bt == T_BYTE) {
 2950     return Assembler::B;
 2951   } else if (bt == T_SHORT) {
 2952     return Assembler::W;
 2953   } else if (bt == T_INT) {
 2954     return Assembler::D;
 2955   } else {
 2956     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2957     return Assembler::Q;
 2958   }
 2959 }
 2960 
 2961 //=============================================================================
 2962 
 2963   // Float masks come from different places depending on platform.
 2964   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2965   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2966   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2967   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2968   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2969   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2970   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2971   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2972   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2973   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2974   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2975   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 2976   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 2977   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 2978   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 2979   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 2980   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 2981   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 2982   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 2983 
 2984 //=============================================================================
 2985 bool Matcher::match_rule_supported(int opcode) {
 2986   if (!has_match_rule(opcode)) {
 2987     return false; // no match rule present
 2988   }
 2989   switch (opcode) {
 2990     case Op_AbsVL:
 2991     case Op_StoreVectorScatter:
 2992       if (UseAVX < 3) {
 2993         return false;
 2994       }
 2995       break;
 2996     case Op_PopCountI:
 2997     case Op_PopCountL:
 2998       if (!UsePopCountInstruction) {
 2999         return false;
 3000       }
 3001       break;
 3002     case Op_PopCountVI:
 3003       if (UseAVX < 2) {
 3004         return false;
 3005       }
 3006       break;
 3007     case Op_CompressV:
 3008     case Op_ExpandV:
 3009     case Op_PopCountVL:
 3010       if (UseAVX < 2) {
 3011         return false;
 3012       }
 3013       break;
 3014     case Op_MulVI:
 3015       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 3016         return false;
 3017       }
 3018       break;
 3019     case Op_MulVL:
 3020       if (UseSSE < 4) { // only with SSE4_1 or AVX
 3021         return false;
 3022       }
 3023       break;
 3024     case Op_MulReductionVL:
 3025       if (VM_Version::supports_avx512dq() == false) {
 3026         return false;
 3027       }
 3028       break;
 3029     case Op_AbsVB:
 3030     case Op_AbsVS:
 3031     case Op_AbsVI:
 3032     case Op_AddReductionVI:
 3033     case Op_AndReductionV:
 3034     case Op_OrReductionV:
 3035     case Op_XorReductionV:
 3036       if (UseSSE < 3) { // requires at least SSSE3
 3037         return false;
 3038       }
 3039       break;
 3040     case Op_MaxHF:
 3041     case Op_MinHF:
 3042       if (!VM_Version::supports_avx512vlbw()) {
 3043         return false;
 3044       }  // fallthrough
 3045     case Op_AddHF:
 3046     case Op_DivHF:
 3047     case Op_FmaHF:
 3048     case Op_MulHF:
 3049     case Op_ReinterpretS2HF:
 3050     case Op_ReinterpretHF2S:
 3051     case Op_SubHF:
 3052     case Op_SqrtHF:
 3053       if (!VM_Version::supports_avx512_fp16()) {
 3054         return false;
 3055       }
 3056       break;
 3057     case Op_VectorLoadShuffle:
 3058     case Op_VectorRearrange:
 3059     case Op_MulReductionVI:
 3060       if (UseSSE < 4) { // requires at least SSE4
 3061         return false;
 3062       }
 3063       break;
 3064     case Op_IsInfiniteF:
 3065     case Op_IsInfiniteD:
 3066       if (!VM_Version::supports_avx512dq()) {
 3067         return false;
 3068       }
 3069       break;
 3070     case Op_SqrtVD:
 3071     case Op_SqrtVF:
 3072     case Op_VectorMaskCmp:
 3073     case Op_VectorCastB2X:
 3074     case Op_VectorCastS2X:
 3075     case Op_VectorCastI2X:
 3076     case Op_VectorCastL2X:
 3077     case Op_VectorCastF2X:
 3078     case Op_VectorCastD2X:
 3079     case Op_VectorUCastB2X:
 3080     case Op_VectorUCastS2X:
 3081     case Op_VectorUCastI2X:
 3082     case Op_VectorMaskCast:
 3083       if (UseAVX < 1) { // enabled for AVX only
 3084         return false;
 3085       }
 3086       break;
 3087     case Op_PopulateIndex:
 3088       if (UseAVX < 2) {
 3089         return false;
 3090       }
 3091       break;
 3092     case Op_RoundVF:
 3093       if (UseAVX < 2) { // enabled for AVX2 only
 3094         return false;
 3095       }
 3096       break;
 3097     case Op_RoundVD:
 3098       if (UseAVX < 3) {
 3099         return false;  // enabled for AVX3 only
 3100       }
 3101       break;
 3102     case Op_CompareAndSwapL:
 3103     case Op_CompareAndSwapP:
 3104       break;
 3105     case Op_StrIndexOf:
 3106       if (!UseSSE42Intrinsics) {
 3107         return false;
 3108       }
 3109       break;
 3110     case Op_StrIndexOfChar:
 3111       if (!UseSSE42Intrinsics) {
 3112         return false;
 3113       }
 3114       break;
 3115     case Op_OnSpinWait:
 3116       if (VM_Version::supports_on_spin_wait() == false) {
 3117         return false;
 3118       }
 3119       break;
 3120     case Op_MulVB:
 3121     case Op_LShiftVB:
 3122     case Op_RShiftVB:
 3123     case Op_URShiftVB:
 3124     case Op_VectorInsert:
 3125     case Op_VectorLoadMask:
 3126     case Op_VectorStoreMask:
 3127     case Op_VectorBlend:
 3128       if (UseSSE < 4) {
 3129         return false;
 3130       }
 3131       break;
 3132     case Op_MaxD:
 3133     case Op_MaxF:
 3134     case Op_MinD:
 3135     case Op_MinF:
 3136       if (UseAVX < 1) { // enabled for AVX only
 3137         return false;
 3138       }
 3139       break;
 3140     case Op_CacheWB:
 3141     case Op_CacheWBPreSync:
 3142     case Op_CacheWBPostSync:
 3143       if (!VM_Version::supports_data_cache_line_flush()) {
 3144         return false;
 3145       }
 3146       break;
 3147     case Op_ExtractB:
 3148     case Op_ExtractL:
 3149     case Op_ExtractI:
 3150     case Op_RoundDoubleMode:
 3151       if (UseSSE < 4) {
 3152         return false;
 3153       }
 3154       break;
 3155     case Op_RoundDoubleModeV:
 3156       if (VM_Version::supports_avx() == false) {
 3157         return false; // 128bit vroundpd is not available
 3158       }
 3159       break;
 3160     case Op_LoadVectorGather:
 3161     case Op_LoadVectorGatherMasked:
 3162       if (UseAVX < 2) {
 3163         return false;
 3164       }
 3165       break;
 3166     case Op_FmaF:
 3167     case Op_FmaD:
 3168     case Op_FmaVD:
 3169     case Op_FmaVF:
 3170       if (!UseFMA) {
 3171         return false;
 3172       }
 3173       break;
 3174     case Op_MacroLogicV:
 3175       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3176         return false;
 3177       }
 3178       break;
 3179 
 3180     case Op_VectorCmpMasked:
 3181     case Op_VectorMaskGen:
 3182       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3183         return false;
 3184       }
 3185       break;
 3186     case Op_VectorMaskFirstTrue:
 3187     case Op_VectorMaskLastTrue:
 3188     case Op_VectorMaskTrueCount:
 3189     case Op_VectorMaskToLong:
 3190       if (UseAVX < 1) {
 3191          return false;
 3192       }
 3193       break;
 3194     case Op_RoundF:
 3195     case Op_RoundD:
 3196       break;
 3197     case Op_CopySignD:
 3198     case Op_CopySignF:
 3199       if (UseAVX < 3)  {
 3200         return false;
 3201       }
 3202       if (!VM_Version::supports_avx512vl()) {
 3203         return false;
 3204       }
 3205       break;
 3206     case Op_CompressBits:
 3207     case Op_ExpandBits:
 3208       if (!VM_Version::supports_bmi2()) {
 3209         return false;
 3210       }
 3211       break;
 3212     case Op_CompressM:
 3213       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3214         return false;
 3215       }
 3216       break;
 3217     case Op_ConvF2HF:
 3218     case Op_ConvHF2F:
 3219       if (!VM_Version::supports_float16()) {
 3220         return false;
 3221       }
 3222       break;
 3223     case Op_VectorCastF2HF:
 3224     case Op_VectorCastHF2F:
 3225       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3226         return false;
 3227       }
 3228       break;
 3229   }
 3230   return true;  // Match rules are supported by default.
 3231 }
 3232 
 3233 //------------------------------------------------------------------------
 3234 
 3235 static inline bool is_pop_count_instr_target(BasicType bt) {
 3236   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3237          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3238 }
 3239 
 3240 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3241   return match_rule_supported_vector(opcode, vlen, bt);
 3242 }
 3243 
 3244 // Identify extra cases that we might want to provide match rules for vector nodes and
 3245 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3246 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3247   if (!match_rule_supported(opcode)) {
 3248     return false;
 3249   }
 3250   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3251   //   * SSE2 supports 128bit vectors for all types;
 3252   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3253   //   * AVX2 supports 256bit vectors for all types;
 3254   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3255   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3256   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3257   // And MaxVectorSize is taken into account as well.
 3258   if (!vector_size_supported(bt, vlen)) {
 3259     return false;
 3260   }
 3261   // Special cases which require vector length follow:
 3262   //   * implementation limitations
 3263   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3264   //   * 128bit vroundpd instruction is present only in AVX1
 3265   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3266   switch (opcode) {
 3267     case Op_MaxVHF:
 3268     case Op_MinVHF:
 3269       if (!VM_Version::supports_avx512bw()) {
 3270         return false;
 3271       }
 3272     case Op_AddVHF:
 3273     case Op_DivVHF:
 3274     case Op_FmaVHF:
 3275     case Op_MulVHF:
 3276     case Op_SubVHF:
 3277     case Op_SqrtVHF:
 3278       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3279         return false;
 3280       }
 3281       if (!VM_Version::supports_avx512_fp16()) {
 3282         return false;
 3283       }
 3284       break;
 3285     case Op_AbsVF:
 3286     case Op_NegVF:
 3287       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3288         return false; // 512bit vandps and vxorps are not available
 3289       }
 3290       break;
 3291     case Op_AbsVD:
 3292     case Op_NegVD:
 3293       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3294         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3295       }
 3296       break;
 3297     case Op_RotateRightV:
 3298     case Op_RotateLeftV:
 3299       if (bt != T_INT && bt != T_LONG) {
 3300         return false;
 3301       } // fallthrough
 3302     case Op_MacroLogicV:
 3303       if (!VM_Version::supports_evex() ||
 3304           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3305         return false;
 3306       }
 3307       break;
 3308     case Op_ClearArray:
 3309     case Op_VectorMaskGen:
 3310     case Op_VectorCmpMasked:
 3311       if (!VM_Version::supports_avx512bw()) {
 3312         return false;
 3313       }
 3314       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3315         return false;
 3316       }
 3317       break;
 3318     case Op_LoadVectorMasked:
 3319     case Op_StoreVectorMasked:
 3320       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3321         return false;
 3322       }
 3323       break;
 3324     case Op_UMinV:
 3325     case Op_UMaxV:
 3326       if (UseAVX == 0) {
 3327         return false;
 3328       }
 3329       break;
 3330     case Op_UMinReductionV:
 3331     case Op_UMaxReductionV:
 3332       if (UseAVX == 0) {
 3333         return false;
 3334       }
 3335       if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
 3336         return false;
 3337       }
 3338       if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
 3339         return false;
 3340       }
 3341       break;
 3342     case Op_MaxV:
 3343     case Op_MinV:
 3344       if (UseSSE < 4 && is_integral_type(bt)) {
 3345         return false;
 3346       }
 3347       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3348           // Float/Double intrinsics are enabled for AVX family currently.
 3349           if (UseAVX == 0) {
 3350             return false;
 3351           }
 3352           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3353             return false;
 3354           }
 3355       }
 3356       break;
 3357     case Op_CallLeafVector:
 3358       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3359         return false;
 3360       }
 3361       break;
 3362     case Op_AddReductionVI:
 3363       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3364         return false;
 3365       }
 3366       // fallthrough
 3367     case Op_AndReductionV:
 3368     case Op_OrReductionV:
 3369     case Op_XorReductionV:
 3370       if (is_subword_type(bt) && (UseSSE < 4)) {
 3371         return false;
 3372       }
 3373       break;
 3374     case Op_MinReductionV:
 3375     case Op_MaxReductionV:
 3376       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3377         return false;
 3378       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3379         return false;
 3380       }
 3381       // Float/Double intrinsics enabled for AVX family.
 3382       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3383         return false;
 3384       }
 3385       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3386         return false;
 3387       }
 3388       break;
 3389     case Op_VectorBlend:
 3390       if (UseAVX == 0 && size_in_bits < 128) {
 3391         return false;
 3392       }
 3393       break;
 3394     case Op_VectorTest:
 3395       if (UseSSE < 4) {
 3396         return false; // Implementation limitation
 3397       } else if (size_in_bits < 32) {
 3398         return false; // Implementation limitation
 3399       }
 3400       break;
 3401     case Op_VectorLoadShuffle:
 3402     case Op_VectorRearrange:
 3403       if(vlen == 2) {
 3404         return false; // Implementation limitation due to how shuffle is loaded
 3405       } else if (size_in_bits == 256 && UseAVX < 2) {
 3406         return false; // Implementation limitation
 3407       }
 3408       break;
 3409     case Op_VectorLoadMask:
 3410     case Op_VectorMaskCast:
 3411       if (size_in_bits == 256 && UseAVX < 2) {
 3412         return false; // Implementation limitation
 3413       }
 3414       // fallthrough
 3415     case Op_VectorStoreMask:
 3416       if (vlen == 2) {
 3417         return false; // Implementation limitation
 3418       }
 3419       break;
 3420     case Op_PopulateIndex:
 3421       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3422         return false;
 3423       }
 3424       break;
 3425     case Op_VectorCastB2X:
 3426     case Op_VectorCastS2X:
 3427     case Op_VectorCastI2X:
 3428       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3429         return false;
 3430       }
 3431       break;
 3432     case Op_VectorCastL2X:
 3433       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3434         return false;
 3435       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3436         return false;
 3437       }
 3438       break;
 3439     case Op_VectorCastF2X: {
 3440         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3441         // happen after intermediate conversion to integer and special handling
 3442         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3443         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3444         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3445           return false;
 3446         }
 3447       }
 3448       // fallthrough
 3449     case Op_VectorCastD2X:
 3450       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3451         return false;
 3452       }
 3453       break;
 3454     case Op_VectorCastF2HF:
 3455     case Op_VectorCastHF2F:
 3456       if (!VM_Version::supports_f16c() &&
 3457          ((!VM_Version::supports_evex() ||
 3458          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3459         return false;
 3460       }
 3461       break;
 3462     case Op_RoundVD:
 3463       if (!VM_Version::supports_avx512dq()) {
 3464         return false;
 3465       }
 3466       break;
 3467     case Op_MulReductionVI:
 3468       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3469         return false;
 3470       }
 3471       break;
 3472     case Op_LoadVectorGatherMasked:
 3473       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3474         return false;
 3475       }
 3476       if (is_subword_type(bt) &&
 3477          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3478           (size_in_bits < 64)                                      ||
 3479           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3480         return false;
 3481       }
 3482       break;
 3483     case Op_StoreVectorScatterMasked:
 3484     case Op_StoreVectorScatter:
 3485       if (is_subword_type(bt)) {
 3486         return false;
 3487       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3488         return false;
 3489       }
 3490       // fallthrough
 3491     case Op_LoadVectorGather:
 3492       if (!is_subword_type(bt) && size_in_bits == 64) {
 3493         return false;
 3494       }
 3495       if (is_subword_type(bt) && size_in_bits < 64) {
 3496         return false;
 3497       }
 3498       break;
 3499     case Op_SaturatingAddV:
 3500     case Op_SaturatingSubV:
 3501       if (UseAVX < 1) {
 3502         return false; // Implementation limitation
 3503       }
 3504       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3505         return false;
 3506       }
 3507       break;
 3508     case Op_SelectFromTwoVector:
 3509        if (size_in_bits < 128) {
 3510          return false;
 3511        }
 3512        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3513          return false;
 3514        }
 3515        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3516          return false;
 3517        }
 3518        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3519          return false;
 3520        }
 3521        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3522          return false;
 3523        }
 3524        break;
 3525     case Op_MaskAll:
 3526       if (!VM_Version::supports_evex()) {
 3527         return false;
 3528       }
 3529       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3530         return false;
 3531       }
 3532       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3533         return false;
 3534       }
 3535       break;
 3536     case Op_VectorMaskCmp:
 3537       if (vlen < 2 || size_in_bits < 32) {
 3538         return false;
 3539       }
 3540       break;
 3541     case Op_CompressM:
 3542       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3543         return false;
 3544       }
 3545       break;
 3546     case Op_CompressV:
 3547     case Op_ExpandV:
 3548       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3549         return false;
 3550       }
 3551       if (size_in_bits < 128 ) {
 3552         return false;
 3553       }
 3554     case Op_VectorLongToMask:
 3555       if (UseAVX < 1) {
 3556         return false;
 3557       }
 3558       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3559         return false;
 3560       }
 3561       break;
 3562     case Op_SignumVD:
 3563     case Op_SignumVF:
 3564       if (UseAVX < 1) {
 3565         return false;
 3566       }
 3567       break;
 3568     case Op_PopCountVI:
 3569     case Op_PopCountVL: {
 3570         if (!is_pop_count_instr_target(bt) &&
 3571             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3572           return false;
 3573         }
 3574       }
 3575       break;
 3576     case Op_ReverseV:
 3577     case Op_ReverseBytesV:
 3578       if (UseAVX < 2) {
 3579         return false;
 3580       }
 3581       break;
 3582     case Op_CountTrailingZerosV:
 3583     case Op_CountLeadingZerosV:
 3584       if (UseAVX < 2) {
 3585         return false;
 3586       }
 3587       break;
 3588   }
 3589   return true;  // Per default match rules are supported.
 3590 }
 3591 
 3592 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3593   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3594   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3595   // of their non-masked counterpart with mask edge being the differentiator.
 3596   // This routine does a strict check on the existence of masked operation patterns
 3597   // by returning a default false value for all the other opcodes apart from the
 3598   // ones whose masked instruction patterns are defined in this file.
 3599   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3600     return false;
 3601   }
 3602 
 3603   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3604   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3605     return false;
 3606   }
 3607   switch(opcode) {
 3608     // Unary masked operations
 3609     case Op_AbsVB:
 3610     case Op_AbsVS:
 3611       if(!VM_Version::supports_avx512bw()) {
 3612         return false;  // Implementation limitation
 3613       }
 3614     case Op_AbsVI:
 3615     case Op_AbsVL:
 3616       return true;
 3617 
 3618     // Ternary masked operations
 3619     case Op_FmaVF:
 3620     case Op_FmaVD:
 3621       return true;
 3622 
 3623     case Op_MacroLogicV:
 3624       if(bt != T_INT && bt != T_LONG) {
 3625         return false;
 3626       }
 3627       return true;
 3628 
 3629     // Binary masked operations
 3630     case Op_AddVB:
 3631     case Op_AddVS:
 3632     case Op_SubVB:
 3633     case Op_SubVS:
 3634     case Op_MulVS:
 3635     case Op_LShiftVS:
 3636     case Op_RShiftVS:
 3637     case Op_URShiftVS:
 3638       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3639       if (!VM_Version::supports_avx512bw()) {
 3640         return false;  // Implementation limitation
 3641       }
 3642       return true;
 3643 
 3644     case Op_MulVL:
 3645       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3646       if (!VM_Version::supports_avx512dq()) {
 3647         return false;  // Implementation limitation
 3648       }
 3649       return true;
 3650 
 3651     case Op_AndV:
 3652     case Op_OrV:
 3653     case Op_XorV:
 3654     case Op_RotateRightV:
 3655     case Op_RotateLeftV:
 3656       if (bt != T_INT && bt != T_LONG) {
 3657         return false; // Implementation limitation
 3658       }
 3659       return true;
 3660 
 3661     case Op_VectorLoadMask:
 3662       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3663       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3664         return false;
 3665       }
 3666       return true;
 3667 
 3668     case Op_AddVI:
 3669     case Op_AddVL:
 3670     case Op_AddVF:
 3671     case Op_AddVD:
 3672     case Op_SubVI:
 3673     case Op_SubVL:
 3674     case Op_SubVF:
 3675     case Op_SubVD:
 3676     case Op_MulVI:
 3677     case Op_MulVF:
 3678     case Op_MulVD:
 3679     case Op_DivVF:
 3680     case Op_DivVD:
 3681     case Op_SqrtVF:
 3682     case Op_SqrtVD:
 3683     case Op_LShiftVI:
 3684     case Op_LShiftVL:
 3685     case Op_RShiftVI:
 3686     case Op_RShiftVL:
 3687     case Op_URShiftVI:
 3688     case Op_URShiftVL:
 3689     case Op_LoadVectorMasked:
 3690     case Op_StoreVectorMasked:
 3691     case Op_LoadVectorGatherMasked:
 3692     case Op_StoreVectorScatterMasked:
 3693       return true;
 3694 
 3695     case Op_UMinV:
 3696     case Op_UMaxV:
 3697       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3698         return false;
 3699       } // fallthrough
 3700     case Op_MaxV:
 3701     case Op_MinV:
 3702       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3703         return false; // Implementation limitation
 3704       }
 3705       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3706         return false; // Implementation limitation
 3707       }
 3708       return true;
 3709     case Op_SaturatingAddV:
 3710     case Op_SaturatingSubV:
 3711       if (!is_subword_type(bt)) {
 3712         return false;
 3713       }
 3714       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3715         return false; // Implementation limitation
 3716       }
 3717       return true;
 3718 
 3719     case Op_VectorMaskCmp:
 3720       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3721         return false; // Implementation limitation
 3722       }
 3723       return true;
 3724 
 3725     case Op_VectorRearrange:
 3726       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3727         return false; // Implementation limitation
 3728       }
 3729       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3730         return false; // Implementation limitation
 3731       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3732         return false; // Implementation limitation
 3733       }
 3734       return true;
 3735 
 3736     // Binary Logical operations
 3737     case Op_AndVMask:
 3738     case Op_OrVMask:
 3739     case Op_XorVMask:
 3740       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3741         return false; // Implementation limitation
 3742       }
 3743       return true;
 3744 
 3745     case Op_PopCountVI:
 3746     case Op_PopCountVL:
 3747       if (!is_pop_count_instr_target(bt)) {
 3748         return false;
 3749       }
 3750       return true;
 3751 
 3752     case Op_MaskAll:
 3753       return true;
 3754 
 3755     case Op_CountLeadingZerosV:
 3756       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3757         return true;
 3758       }
 3759     default:
 3760       return false;
 3761   }
 3762 }
 3763 
 3764 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3765   return false;
 3766 }
 3767 
 3768 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3769 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3770   switch (elem_bt) {
 3771     case T_BYTE:  return false;
 3772     case T_SHORT: return !VM_Version::supports_avx512bw();
 3773     case T_INT:   return !VM_Version::supports_avx();
 3774     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3775     default:
 3776       ShouldNotReachHere();
 3777       return false;
 3778   }
 3779 }
 3780 
 3781 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 3782   // Prefer predicate if the mask type is "TypeVectMask".
 3783   return vt->isa_vectmask() != nullptr;
 3784 }
 3785 
 3786 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3787   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3788   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3789   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3790       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3791     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3792     return new legVecZOper();
 3793   }
 3794   if (legacy) {
 3795     switch (ideal_reg) {
 3796       case Op_VecS: return new legVecSOper();
 3797       case Op_VecD: return new legVecDOper();
 3798       case Op_VecX: return new legVecXOper();
 3799       case Op_VecY: return new legVecYOper();
 3800       case Op_VecZ: return new legVecZOper();
 3801     }
 3802   } else {
 3803     switch (ideal_reg) {
 3804       case Op_VecS: return new vecSOper();
 3805       case Op_VecD: return new vecDOper();
 3806       case Op_VecX: return new vecXOper();
 3807       case Op_VecY: return new vecYOper();
 3808       case Op_VecZ: return new vecZOper();
 3809     }
 3810   }
 3811   ShouldNotReachHere();
 3812   return nullptr;
 3813 }
 3814 
 3815 bool Matcher::is_reg2reg_move(MachNode* m) {
 3816   switch (m->rule()) {
 3817     case MoveVec2Leg_rule:
 3818     case MoveLeg2Vec_rule:
 3819     case MoveF2VL_rule:
 3820     case MoveF2LEG_rule:
 3821     case MoveVL2F_rule:
 3822     case MoveLEG2F_rule:
 3823     case MoveD2VL_rule:
 3824     case MoveD2LEG_rule:
 3825     case MoveVL2D_rule:
 3826     case MoveLEG2D_rule:
 3827       return true;
 3828     default:
 3829       return false;
 3830   }
 3831 }
 3832 
 3833 bool Matcher::is_generic_vector(MachOper* opnd) {
 3834   switch (opnd->opcode()) {
 3835     case VEC:
 3836     case LEGVEC:
 3837       return true;
 3838     default:
 3839       return false;
 3840   }
 3841 }
 3842 
 3843 //------------------------------------------------------------------------
 3844 
 3845 const RegMask* Matcher::predicate_reg_mask(void) {
 3846   return &_VECTMASK_REG_mask;
 3847 }
 3848 
 3849 // Max vector size in bytes. 0 if not supported.
 3850 int Matcher::vector_width_in_bytes(BasicType bt) {
 3851   assert(is_java_primitive(bt), "only primitive type vectors");
 3852   // SSE2 supports 128bit vectors for all types.
 3853   // AVX2 supports 256bit vectors for all types.
 3854   // AVX2/EVEX supports 512bit vectors for all types.
 3855   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3856   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3857   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3858     size = (UseAVX > 2) ? 64 : 32;
 3859   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3860     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3861   // Use flag to limit vector size.
 3862   size = MIN2(size,(int)MaxVectorSize);
 3863   // Minimum 2 values in vector (or 4 for bytes).
 3864   switch (bt) {
 3865   case T_DOUBLE:
 3866   case T_LONG:
 3867     if (size < 16) return 0;
 3868     break;
 3869   case T_FLOAT:
 3870   case T_INT:
 3871     if (size < 8) return 0;
 3872     break;
 3873   case T_BOOLEAN:
 3874     if (size < 4) return 0;
 3875     break;
 3876   case T_CHAR:
 3877     if (size < 4) return 0;
 3878     break;
 3879   case T_BYTE:
 3880     if (size < 4) return 0;
 3881     break;
 3882   case T_SHORT:
 3883     if (size < 4) return 0;
 3884     break;
 3885   default:
 3886     ShouldNotReachHere();
 3887   }
 3888   return size;
 3889 }
 3890 
 3891 // Limits on vector size (number of elements) loaded into vector.
 3892 int Matcher::max_vector_size(const BasicType bt) {
 3893   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3894 }
 3895 int Matcher::min_vector_size(const BasicType bt) {
 3896   int max_size = max_vector_size(bt);
 3897   // Min size which can be loaded into vector is 4 bytes.
 3898   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3899   // Support for calling svml double64 vectors
 3900   if (bt == T_DOUBLE) {
 3901     size = 1;
 3902   }
 3903   return MIN2(size,max_size);
 3904 }
 3905 
 3906 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3907   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3908   // by default on Cascade Lake
 3909   if (VM_Version::is_default_intel_cascade_lake()) {
 3910     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3911   }
 3912   return Matcher::max_vector_size(bt);
 3913 }
 3914 
 3915 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3916   return -1;
 3917 }
 3918 
 3919 // Vector ideal reg corresponding to specified size in bytes
 3920 uint Matcher::vector_ideal_reg(int size) {
 3921   assert(MaxVectorSize >= size, "");
 3922   switch(size) {
 3923     case  4: return Op_VecS;
 3924     case  8: return Op_VecD;
 3925     case 16: return Op_VecX;
 3926     case 32: return Op_VecY;
 3927     case 64: return Op_VecZ;
 3928   }
 3929   ShouldNotReachHere();
 3930   return 0;
 3931 }
 3932 
 3933 // Check for shift by small constant as well
 3934 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3935   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3936       shift->in(2)->get_int() <= 3 &&
 3937       // Are there other uses besides address expressions?
 3938       !matcher->is_visited(shift)) {
 3939     address_visited.set(shift->_idx); // Flag as address_visited
 3940     mstack.push(shift->in(2), Matcher::Visit);
 3941     Node *conv = shift->in(1);
 3942     // Allow Matcher to match the rule which bypass
 3943     // ConvI2L operation for an array index on LP64
 3944     // if the index value is positive.
 3945     if (conv->Opcode() == Op_ConvI2L &&
 3946         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3947         // Are there other uses besides address expressions?
 3948         !matcher->is_visited(conv)) {
 3949       address_visited.set(conv->_idx); // Flag as address_visited
 3950       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3951     } else {
 3952       mstack.push(conv, Matcher::Pre_Visit);
 3953     }
 3954     return true;
 3955   }
 3956   return false;
 3957 }
 3958 
 3959 // This function identifies sub-graphs in which a 'load' node is
 3960 // input to two different nodes, and such that it can be matched
 3961 // with BMI instructions like blsi, blsr, etc.
 3962 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3963 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3964 // refers to the same node.
 3965 //
 3966 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3967 // This is a temporary solution until we make DAGs expressible in ADL.
 3968 template<typename ConType>
 3969 class FusedPatternMatcher {
 3970   Node* _op1_node;
 3971   Node* _mop_node;
 3972   int _con_op;
 3973 
 3974   static int match_next(Node* n, int next_op, int next_op_idx) {
 3975     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3976       return -1;
 3977     }
 3978 
 3979     if (next_op_idx == -1) { // n is commutative, try rotations
 3980       if (n->in(1)->Opcode() == next_op) {
 3981         return 1;
 3982       } else if (n->in(2)->Opcode() == next_op) {
 3983         return 2;
 3984       }
 3985     } else {
 3986       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 3987       if (n->in(next_op_idx)->Opcode() == next_op) {
 3988         return next_op_idx;
 3989       }
 3990     }
 3991     return -1;
 3992   }
 3993 
 3994  public:
 3995   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 3996     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 3997 
 3998   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 3999              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 4000              typename ConType::NativeType con_value) {
 4001     if (_op1_node->Opcode() != op1) {
 4002       return false;
 4003     }
 4004     if (_mop_node->outcnt() > 2) {
 4005       return false;
 4006     }
 4007     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 4008     if (op1_op2_idx == -1) {
 4009       return false;
 4010     }
 4011     // Memory operation must be the other edge
 4012     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 4013 
 4014     // Check that the mop node is really what we want
 4015     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 4016       Node* op2_node = _op1_node->in(op1_op2_idx);
 4017       if (op2_node->outcnt() > 1) {
 4018         return false;
 4019       }
 4020       assert(op2_node->Opcode() == op2, "Should be");
 4021       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 4022       if (op2_con_idx == -1) {
 4023         return false;
 4024       }
 4025       // Memory operation must be the other edge
 4026       int op2_mop_idx = (op2_con_idx & 1) + 1;
 4027       // Check that the memory operation is the same node
 4028       if (op2_node->in(op2_mop_idx) == _mop_node) {
 4029         // Now check the constant
 4030         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 4031         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 4032           return true;
 4033         }
 4034       }
 4035     }
 4036     return false;
 4037   }
 4038 };
 4039 
 4040 static bool is_bmi_pattern(Node* n, Node* m) {
 4041   assert(UseBMI1Instructions, "sanity");
 4042   if (n != nullptr && m != nullptr) {
 4043     if (m->Opcode() == Op_LoadI) {
 4044       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 4045       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 4046              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 4047              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 4048     } else if (m->Opcode() == Op_LoadL) {
 4049       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 4050       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 4051              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 4052              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 4053     }
 4054   }
 4055   return false;
 4056 }
 4057 
 4058 // Should the matcher clone input 'm' of node 'n'?
 4059 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 4060   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 4061   if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
 4062     mstack.push(m, Visit);
 4063     return true;
 4064   }
 4065   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 4066     mstack.push(m, Visit);           // m = ShiftCntV
 4067     return true;
 4068   }
 4069   if (is_encode_and_store_pattern(n, m)) {
 4070     mstack.push(m, Visit);
 4071     return true;
 4072   }
 4073   return false;
 4074 }
 4075 
 4076 // Should the Matcher clone shifts on addressing modes, expecting them
 4077 // to be subsumed into complex addressing expressions or compute them
 4078 // into registers?
 4079 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4080   Node *off = m->in(AddPNode::Offset);
 4081   if (off->is_Con()) {
 4082     address_visited.test_set(m->_idx); // Flag as address_visited
 4083     Node *adr = m->in(AddPNode::Address);
 4084 
 4085     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4086     // AtomicAdd is not an addressing expression.
 4087     // Cheap to find it by looking for screwy base.
 4088     if (adr->is_AddP() &&
 4089         !adr->in(AddPNode::Base)->is_top() &&
 4090         !adr->in(AddPNode::Offset)->is_Con() &&
 4091         off->get_long() == (int) (off->get_long()) && // immL32
 4092         // Are there other uses besides address expressions?
 4093         !is_visited(adr)) {
 4094       address_visited.set(adr->_idx); // Flag as address_visited
 4095       Node *shift = adr->in(AddPNode::Offset);
 4096       if (!clone_shift(shift, this, mstack, address_visited)) {
 4097         mstack.push(shift, Pre_Visit);
 4098       }
 4099       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4100       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4101     } else {
 4102       mstack.push(adr, Pre_Visit);
 4103     }
 4104 
 4105     // Clone X+offset as it also folds into most addressing expressions
 4106     mstack.push(off, Visit);
 4107     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4108     return true;
 4109   } else if (clone_shift(off, this, mstack, address_visited)) {
 4110     address_visited.test_set(m->_idx); // Flag as address_visited
 4111     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4112     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4113     return true;
 4114   }
 4115   return false;
 4116 }
 4117 
 4118 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4119   switch (bt) {
 4120     case BoolTest::eq:
 4121       return Assembler::eq;
 4122     case BoolTest::ne:
 4123       return Assembler::neq;
 4124     case BoolTest::le:
 4125     case BoolTest::ule:
 4126       return Assembler::le;
 4127     case BoolTest::ge:
 4128     case BoolTest::uge:
 4129       return Assembler::nlt;
 4130     case BoolTest::lt:
 4131     case BoolTest::ult:
 4132       return Assembler::lt;
 4133     case BoolTest::gt:
 4134     case BoolTest::ugt:
 4135       return Assembler::nle;
 4136     default : ShouldNotReachHere(); return Assembler::_false;
 4137   }
 4138 }
 4139 
 4140 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4141   switch (bt) {
 4142   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4143   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4144   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4145   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4146   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4147   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4148   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4149   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4150   }
 4151 }
 4152 
 4153 // Helper methods for MachSpillCopyNode::implementation().
 4154 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4155                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4156   assert(ireg == Op_VecS || // 32bit vector
 4157          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4158           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4159          "no non-adjacent vector moves" );
 4160   if (masm) {
 4161     switch (ireg) {
 4162     case Op_VecS: // copy whole register
 4163     case Op_VecD:
 4164     case Op_VecX:
 4165       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4166         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4167       } else {
 4168         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4169      }
 4170       break;
 4171     case Op_VecY:
 4172       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4173         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4174       } else {
 4175         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4176      }
 4177       break;
 4178     case Op_VecZ:
 4179       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4180       break;
 4181     default:
 4182       ShouldNotReachHere();
 4183     }
 4184 #ifndef PRODUCT
 4185   } else {
 4186     switch (ireg) {
 4187     case Op_VecS:
 4188     case Op_VecD:
 4189     case Op_VecX:
 4190       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4191       break;
 4192     case Op_VecY:
 4193     case Op_VecZ:
 4194       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4195       break;
 4196     default:
 4197       ShouldNotReachHere();
 4198     }
 4199 #endif
 4200   }
 4201 }
 4202 
 4203 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4204                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4205   if (masm) {
 4206     if (is_load) {
 4207       switch (ireg) {
 4208       case Op_VecS:
 4209         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4210         break;
 4211       case Op_VecD:
 4212         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4213         break;
 4214       case Op_VecX:
 4215         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4216           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4217         } else {
 4218           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4219           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4220         }
 4221         break;
 4222       case Op_VecY:
 4223         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4224           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4225         } else {
 4226           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4227           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4228         }
 4229         break;
 4230       case Op_VecZ:
 4231         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4232         break;
 4233       default:
 4234         ShouldNotReachHere();
 4235       }
 4236     } else { // store
 4237       switch (ireg) {
 4238       case Op_VecS:
 4239         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4240         break;
 4241       case Op_VecD:
 4242         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4243         break;
 4244       case Op_VecX:
 4245         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4246           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4247         }
 4248         else {
 4249           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4250         }
 4251         break;
 4252       case Op_VecY:
 4253         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4254           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4255         }
 4256         else {
 4257           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4258         }
 4259         break;
 4260       case Op_VecZ:
 4261         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4262         break;
 4263       default:
 4264         ShouldNotReachHere();
 4265       }
 4266     }
 4267 #ifndef PRODUCT
 4268   } else {
 4269     if (is_load) {
 4270       switch (ireg) {
 4271       case Op_VecS:
 4272         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4273         break;
 4274       case Op_VecD:
 4275         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4276         break;
 4277        case Op_VecX:
 4278         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4279         break;
 4280       case Op_VecY:
 4281       case Op_VecZ:
 4282         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4283         break;
 4284       default:
 4285         ShouldNotReachHere();
 4286       }
 4287     } else { // store
 4288       switch (ireg) {
 4289       case Op_VecS:
 4290         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4291         break;
 4292       case Op_VecD:
 4293         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4294         break;
 4295        case Op_VecX:
 4296         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4297         break;
 4298       case Op_VecY:
 4299       case Op_VecZ:
 4300         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4301         break;
 4302       default:
 4303         ShouldNotReachHere();
 4304       }
 4305     }
 4306 #endif
 4307   }
 4308 }
 4309 
 4310 template <class T>
 4311 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4312   int size = type2aelembytes(bt) * len;
 4313   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4314   for (int i = 0; i < len; i++) {
 4315     int offset = i * type2aelembytes(bt);
 4316     switch (bt) {
 4317       case T_BYTE: val->at(i) = con; break;
 4318       case T_SHORT: {
 4319         jshort c = con;
 4320         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4321         break;
 4322       }
 4323       case T_INT: {
 4324         jint c = con;
 4325         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4326         break;
 4327       }
 4328       case T_LONG: {
 4329         jlong c = con;
 4330         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4331         break;
 4332       }
 4333       case T_FLOAT: {
 4334         jfloat c = con;
 4335         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4336         break;
 4337       }
 4338       case T_DOUBLE: {
 4339         jdouble c = con;
 4340         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4341         break;
 4342       }
 4343       default: assert(false, "%s", type2name(bt));
 4344     }
 4345   }
 4346   return val;
 4347 }
 4348 
 4349 static inline jlong high_bit_set(BasicType bt) {
 4350   switch (bt) {
 4351     case T_BYTE:  return 0x8080808080808080;
 4352     case T_SHORT: return 0x8000800080008000;
 4353     case T_INT:   return 0x8000000080000000;
 4354     case T_LONG:  return 0x8000000000000000;
 4355     default:
 4356       ShouldNotReachHere();
 4357       return 0;
 4358   }
 4359 }
 4360 
 4361 #ifndef PRODUCT
 4362   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4363     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4364   }
 4365 #endif
 4366 
 4367   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4368     __ nop(_count);
 4369   }
 4370 
 4371   uint MachNopNode::size(PhaseRegAlloc*) const {
 4372     return _count;
 4373   }
 4374 
 4375 #ifndef PRODUCT
 4376   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4377     st->print("# breakpoint");
 4378   }
 4379 #endif
 4380 
 4381   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4382     __ int3();
 4383   }
 4384 
 4385   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4386     return MachNode::size(ra_);
 4387   }
 4388 
 4389 %}
 4390 
 4391 //----------ENCODING BLOCK-----------------------------------------------------
 4392 // This block specifies the encoding classes used by the compiler to
 4393 // output byte streams.  Encoding classes are parameterized macros
 4394 // used by Machine Instruction Nodes in order to generate the bit
 4395 // encoding of the instruction.  Operands specify their base encoding
 4396 // interface with the interface keyword.  There are currently
 4397 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4398 // COND_INTER.  REG_INTER causes an operand to generate a function
 4399 // which returns its register number when queried.  CONST_INTER causes
 4400 // an operand to generate a function which returns the value of the
 4401 // constant when queried.  MEMORY_INTER causes an operand to generate
 4402 // four functions which return the Base Register, the Index Register,
 4403 // the Scale Value, and the Offset Value of the operand when queried.
 4404 // COND_INTER causes an operand to generate six functions which return
 4405 // the encoding code (ie - encoding bits for the instruction)
 4406 // associated with each basic boolean condition for a conditional
 4407 // instruction.
 4408 //
 4409 // Instructions specify two basic values for encoding.  Again, a
 4410 // function is available to check if the constant displacement is an
 4411 // oop. They use the ins_encode keyword to specify their encoding
 4412 // classes (which must be a sequence of enc_class names, and their
 4413 // parameters, specified in the encoding block), and they use the
 4414 // opcode keyword to specify, in order, their primary, secondary, and
 4415 // tertiary opcode.  Only the opcode sections which a particular
 4416 // instruction needs for encoding need to be specified.
 4417 encode %{
 4418   enc_class cdql_enc(no_rax_rdx_RegI div)
 4419   %{
 4420     // Full implementation of Java idiv and irem; checks for
 4421     // special case as described in JVM spec., p.243 & p.271.
 4422     //
 4423     //         normal case                           special case
 4424     //
 4425     // input : rax: dividend                         min_int
 4426     //         reg: divisor                          -1
 4427     //
 4428     // output: rax: quotient  (= rax idiv reg)       min_int
 4429     //         rdx: remainder (= rax irem reg)       0
 4430     //
 4431     //  Code sequnce:
 4432     //
 4433     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4434     //    5:   75 07/08                jne    e <normal>
 4435     //    7:   33 d2                   xor    %edx,%edx
 4436     //  [div >= 8 -> offset + 1]
 4437     //  [REX_B]
 4438     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4439     //    c:   74 03/04                je     11 <done>
 4440     // 000000000000000e <normal>:
 4441     //    e:   99                      cltd
 4442     //  [div >= 8 -> offset + 1]
 4443     //  [REX_B]
 4444     //    f:   f7 f9                   idiv   $div
 4445     // 0000000000000011 <done>:
 4446     Label normal;
 4447     Label done;
 4448 
 4449     // cmp    $0x80000000,%eax
 4450     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4451 
 4452     // jne    e <normal>
 4453     __ jccb(Assembler::notEqual, normal);
 4454 
 4455     // xor    %edx,%edx
 4456     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4457 
 4458     // cmp    $0xffffffffffffffff,%ecx
 4459     __ cmpl($div$$Register, -1);
 4460 
 4461     // je     11 <done>
 4462     __ jccb(Assembler::equal, done);
 4463 
 4464     // <normal>
 4465     // cltd
 4466     __ bind(normal);
 4467     __ cdql();
 4468 
 4469     // idivl
 4470     // <done>
 4471     __ idivl($div$$Register);
 4472     __ bind(done);
 4473   %}
 4474 
 4475   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4476   %{
 4477     // Full implementation of Java ldiv and lrem; checks for
 4478     // special case as described in JVM spec., p.243 & p.271.
 4479     //
 4480     //         normal case                           special case
 4481     //
 4482     // input : rax: dividend                         min_long
 4483     //         reg: divisor                          -1
 4484     //
 4485     // output: rax: quotient  (= rax idiv reg)       min_long
 4486     //         rdx: remainder (= rax irem reg)       0
 4487     //
 4488     //  Code sequnce:
 4489     //
 4490     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4491     //    7:   00 00 80
 4492     //    a:   48 39 d0                cmp    %rdx,%rax
 4493     //    d:   75 08                   jne    17 <normal>
 4494     //    f:   33 d2                   xor    %edx,%edx
 4495     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4496     //   15:   74 05                   je     1c <done>
 4497     // 0000000000000017 <normal>:
 4498     //   17:   48 99                   cqto
 4499     //   19:   48 f7 f9                idiv   $div
 4500     // 000000000000001c <done>:
 4501     Label normal;
 4502     Label done;
 4503 
 4504     // mov    $0x8000000000000000,%rdx
 4505     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4506 
 4507     // cmp    %rdx,%rax
 4508     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4509 
 4510     // jne    17 <normal>
 4511     __ jccb(Assembler::notEqual, normal);
 4512 
 4513     // xor    %edx,%edx
 4514     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4515 
 4516     // cmp    $0xffffffffffffffff,$div
 4517     __ cmpq($div$$Register, -1);
 4518 
 4519     // je     1e <done>
 4520     __ jccb(Assembler::equal, done);
 4521 
 4522     // <normal>
 4523     // cqto
 4524     __ bind(normal);
 4525     __ cdqq();
 4526 
 4527     // idivq (note: must be emitted by the user of this rule)
 4528     // <done>
 4529     __ idivq($div$$Register);
 4530     __ bind(done);
 4531   %}
 4532 
 4533   enc_class clear_avx %{
 4534     DEBUG_ONLY(int off0 = __ offset());
 4535     if (generate_vzeroupper(Compile::current())) {
 4536       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4537       // Clear upper bits of YMM registers when current compiled code uses
 4538       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4539       __ vzeroupper();
 4540     }
 4541     DEBUG_ONLY(int off1 = __ offset());
 4542     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4543   %}
 4544 
 4545   enc_class Java_To_Runtime(method meth) %{
 4546     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4547     __ call(r10);
 4548     __ post_call_nop();
 4549   %}
 4550 
 4551   enc_class Java_Static_Call(method meth)
 4552   %{
 4553     // JAVA STATIC CALL
 4554     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4555     // determine who we intended to call.
 4556     if (!_method) {
 4557       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4558     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4559       // The NOP here is purely to ensure that eliding a call to
 4560       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4561       __ addr_nop_5();
 4562       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4563     } else {
 4564       int method_index = resolved_method_index(masm);
 4565       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4566                                                   : static_call_Relocation::spec(method_index);
 4567       address mark = __ pc();
 4568       int call_offset = __ offset();
 4569       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4570       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4571         // Calls of the same statically bound method can share
 4572         // a stub to the interpreter.
 4573         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4574       } else {
 4575         // Emit stubs for static call.
 4576         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4577         __ clear_inst_mark();
 4578         if (stub == nullptr) {
 4579           ciEnv::current()->record_failure("CodeCache is full");
 4580           return;
 4581         }
 4582       }
 4583     }
 4584     __ post_call_nop();
 4585   %}
 4586 
 4587   enc_class Java_Dynamic_Call(method meth) %{
 4588     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4589     __ post_call_nop();
 4590   %}
 4591 
 4592   enc_class call_epilog %{
 4593     if (VerifyStackAtCalls) {
 4594       // Check that stack depth is unchanged: find majik cookie on stack
 4595       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4596       Label L;
 4597       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4598       __ jccb(Assembler::equal, L);
 4599       // Die if stack mismatch
 4600       __ int3();
 4601       __ bind(L);
 4602     }
 4603   %}
 4604 
 4605 %}
 4606 
 4607 //----------FRAME--------------------------------------------------------------
 4608 // Definition of frame structure and management information.
 4609 //
 4610 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4611 //                             |   (to get allocators register number
 4612 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4613 //  r   CALLER     |        |
 4614 //  o     |        +--------+      pad to even-align allocators stack-slot
 4615 //  w     V        |  pad0  |        numbers; owned by CALLER
 4616 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4617 //  h     ^        |   in   |  5
 4618 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4619 //  |     |        |        |  3
 4620 //  |     |        +--------+
 4621 //  V     |        | old out|      Empty on Intel, window on Sparc
 4622 //        |    old |preserve|      Must be even aligned.
 4623 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4624 //        |        |   in   |  3   area for Intel ret address
 4625 //     Owned by    |preserve|      Empty on Sparc.
 4626 //       SELF      +--------+
 4627 //        |        |  pad2  |  2   pad to align old SP
 4628 //        |        +--------+  1
 4629 //        |        | locks  |  0
 4630 //        |        +--------+----> OptoReg::stack0(), even aligned
 4631 //        |        |  pad1  | 11   pad to align new SP
 4632 //        |        +--------+
 4633 //        |        |        | 10
 4634 //        |        | spills |  9   spills
 4635 //        V        |        |  8   (pad0 slot for callee)
 4636 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4637 //        ^        |  out   |  7
 4638 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4639 //     Owned by    +--------+
 4640 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4641 //        |    new |preserve|      Must be even-aligned.
 4642 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4643 //        |        |        |
 4644 //
 4645 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4646 //         known from SELF's arguments and the Java calling convention.
 4647 //         Region 6-7 is determined per call site.
 4648 // Note 2: If the calling convention leaves holes in the incoming argument
 4649 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4650 //         are owned by the CALLEE.  Holes should not be necessary in the
 4651 //         incoming area, as the Java calling convention is completely under
 4652 //         the control of the AD file.  Doubles can be sorted and packed to
 4653 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4654 //         varargs C calling conventions.
 4655 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4656 //         even aligned with pad0 as needed.
 4657 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4658 //         region 6-11 is even aligned; it may be padded out more so that
 4659 //         the region from SP to FP meets the minimum stack alignment.
 4660 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4661 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4662 //         SP meets the minimum alignment.
 4663 
 4664 frame
 4665 %{
 4666   // These three registers define part of the calling convention
 4667   // between compiled code and the interpreter.
 4668   inline_cache_reg(RAX);                // Inline Cache Register
 4669 
 4670   // Optional: name the operand used by cisc-spilling to access
 4671   // [stack_pointer + offset]
 4672   cisc_spilling_operand_name(indOffset32);
 4673 
 4674   // Number of stack slots consumed by locking an object
 4675   sync_stack_slots(2);
 4676 
 4677   // Compiled code's Frame Pointer
 4678   frame_pointer(RSP);
 4679 
 4680   // Stack alignment requirement
 4681   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4682 
 4683   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4684   // for calls to C.  Supports the var-args backing area for register parms.
 4685   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4686 
 4687   // The after-PROLOG location of the return address.  Location of
 4688   // return address specifies a type (REG or STACK) and a number
 4689   // representing the register number (i.e. - use a register name) or
 4690   // stack slot.
 4691   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4692   // Otherwise, it is above the locks and verification slot and alignment word
 4693   return_addr(STACK - 2 +
 4694               align_up((Compile::current()->in_preserve_stack_slots() +
 4695                         Compile::current()->fixed_slots()),
 4696                        stack_alignment_in_slots()));
 4697 
 4698   // Location of compiled Java return values.  Same as C for now.
 4699   return_value
 4700   %{
 4701     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4702            "only return normal values");
 4703 
 4704     static const int lo[Op_RegL + 1] = {
 4705       0,
 4706       0,
 4707       RAX_num,  // Op_RegN
 4708       RAX_num,  // Op_RegI
 4709       RAX_num,  // Op_RegP
 4710       XMM0_num, // Op_RegF
 4711       XMM0_num, // Op_RegD
 4712       RAX_num   // Op_RegL
 4713     };
 4714     static const int hi[Op_RegL + 1] = {
 4715       0,
 4716       0,
 4717       OptoReg::Bad, // Op_RegN
 4718       OptoReg::Bad, // Op_RegI
 4719       RAX_H_num,    // Op_RegP
 4720       OptoReg::Bad, // Op_RegF
 4721       XMM0b_num,    // Op_RegD
 4722       RAX_H_num     // Op_RegL
 4723     };
 4724     // Excluded flags and vector registers.
 4725     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4726     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4727   %}
 4728 %}
 4729 
 4730 //----------ATTRIBUTES---------------------------------------------------------
 4731 //----------Operand Attributes-------------------------------------------------
 4732 op_attrib op_cost(0);        // Required cost attribute
 4733 
 4734 //----------Instruction Attributes---------------------------------------------
 4735 ins_attrib ins_cost(100);       // Required cost attribute
 4736 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4737 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4738                                 // a non-matching short branch variant
 4739                                 // of some long branch?
 4740 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4741                                 // be a power of 2) specifies the
 4742                                 // alignment that some part of the
 4743                                 // instruction (not necessarily the
 4744                                 // start) requires.  If > 1, a
 4745                                 // compute_padding() function must be
 4746                                 // provided for the instruction
 4747 
 4748 // Whether this node is expanded during code emission into a sequence of
 4749 // instructions and the first instruction can perform an implicit null check.
 4750 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4751 
 4752 //----------OPERANDS-----------------------------------------------------------
 4753 // Operand definitions must precede instruction definitions for correct parsing
 4754 // in the ADLC because operands constitute user defined types which are used in
 4755 // instruction definitions.
 4756 
 4757 //----------Simple Operands----------------------------------------------------
 4758 // Immediate Operands
 4759 // Integer Immediate
 4760 operand immI()
 4761 %{
 4762   match(ConI);
 4763 
 4764   op_cost(10);
 4765   format %{ %}
 4766   interface(CONST_INTER);
 4767 %}
 4768 
 4769 // Constant for test vs zero
 4770 operand immI_0()
 4771 %{
 4772   predicate(n->get_int() == 0);
 4773   match(ConI);
 4774 
 4775   op_cost(0);
 4776   format %{ %}
 4777   interface(CONST_INTER);
 4778 %}
 4779 
 4780 // Constant for increment
 4781 operand immI_1()
 4782 %{
 4783   predicate(n->get_int() == 1);
 4784   match(ConI);
 4785 
 4786   op_cost(0);
 4787   format %{ %}
 4788   interface(CONST_INTER);
 4789 %}
 4790 
 4791 // Constant for decrement
 4792 operand immI_M1()
 4793 %{
 4794   predicate(n->get_int() == -1);
 4795   match(ConI);
 4796 
 4797   op_cost(0);
 4798   format %{ %}
 4799   interface(CONST_INTER);
 4800 %}
 4801 
 4802 operand immI_2()
 4803 %{
 4804   predicate(n->get_int() == 2);
 4805   match(ConI);
 4806 
 4807   op_cost(0);
 4808   format %{ %}
 4809   interface(CONST_INTER);
 4810 %}
 4811 
 4812 operand immI_4()
 4813 %{
 4814   predicate(n->get_int() == 4);
 4815   match(ConI);
 4816 
 4817   op_cost(0);
 4818   format %{ %}
 4819   interface(CONST_INTER);
 4820 %}
 4821 
 4822 operand immI_8()
 4823 %{
 4824   predicate(n->get_int() == 8);
 4825   match(ConI);
 4826 
 4827   op_cost(0);
 4828   format %{ %}
 4829   interface(CONST_INTER);
 4830 %}
 4831 
 4832 // Valid scale values for addressing modes
 4833 operand immI2()
 4834 %{
 4835   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4836   match(ConI);
 4837 
 4838   format %{ %}
 4839   interface(CONST_INTER);
 4840 %}
 4841 
 4842 operand immU7()
 4843 %{
 4844   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4845   match(ConI);
 4846 
 4847   op_cost(5);
 4848   format %{ %}
 4849   interface(CONST_INTER);
 4850 %}
 4851 
 4852 operand immI8()
 4853 %{
 4854   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4855   match(ConI);
 4856 
 4857   op_cost(5);
 4858   format %{ %}
 4859   interface(CONST_INTER);
 4860 %}
 4861 
 4862 operand immU8()
 4863 %{
 4864   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4865   match(ConI);
 4866 
 4867   op_cost(5);
 4868   format %{ %}
 4869   interface(CONST_INTER);
 4870 %}
 4871 
 4872 operand immI16()
 4873 %{
 4874   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4875   match(ConI);
 4876 
 4877   op_cost(10);
 4878   format %{ %}
 4879   interface(CONST_INTER);
 4880 %}
 4881 
 4882 // Int Immediate non-negative
 4883 operand immU31()
 4884 %{
 4885   predicate(n->get_int() >= 0);
 4886   match(ConI);
 4887 
 4888   op_cost(0);
 4889   format %{ %}
 4890   interface(CONST_INTER);
 4891 %}
 4892 
 4893 // Pointer Immediate
 4894 operand immP()
 4895 %{
 4896   match(ConP);
 4897 
 4898   op_cost(10);
 4899   format %{ %}
 4900   interface(CONST_INTER);
 4901 %}
 4902 
 4903 // Null Pointer Immediate
 4904 operand immP0()
 4905 %{
 4906   predicate(n->get_ptr() == 0);
 4907   match(ConP);
 4908 
 4909   op_cost(5);
 4910   format %{ %}
 4911   interface(CONST_INTER);
 4912 %}
 4913 
 4914 // Pointer Immediate
 4915 operand immN() %{
 4916   match(ConN);
 4917 
 4918   op_cost(10);
 4919   format %{ %}
 4920   interface(CONST_INTER);
 4921 %}
 4922 
 4923 operand immNKlass() %{
 4924   match(ConNKlass);
 4925 
 4926   op_cost(10);
 4927   format %{ %}
 4928   interface(CONST_INTER);
 4929 %}
 4930 
 4931 // Null Pointer Immediate
 4932 operand immN0() %{
 4933   predicate(n->get_narrowcon() == 0);
 4934   match(ConN);
 4935 
 4936   op_cost(5);
 4937   format %{ %}
 4938   interface(CONST_INTER);
 4939 %}
 4940 
 4941 operand immP31()
 4942 %{
 4943   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 4944             && (n->get_ptr() >> 31) == 0);
 4945   match(ConP);
 4946 
 4947   op_cost(5);
 4948   format %{ %}
 4949   interface(CONST_INTER);
 4950 %}
 4951 
 4952 
 4953 // Long Immediate
 4954 operand immL()
 4955 %{
 4956   match(ConL);
 4957 
 4958   op_cost(20);
 4959   format %{ %}
 4960   interface(CONST_INTER);
 4961 %}
 4962 
 4963 // Long Immediate 8-bit
 4964 operand immL8()
 4965 %{
 4966   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 4967   match(ConL);
 4968 
 4969   op_cost(5);
 4970   format %{ %}
 4971   interface(CONST_INTER);
 4972 %}
 4973 
 4974 // Long Immediate 32-bit unsigned
 4975 operand immUL32()
 4976 %{
 4977   predicate(n->get_long() == (unsigned int) (n->get_long()));
 4978   match(ConL);
 4979 
 4980   op_cost(10);
 4981   format %{ %}
 4982   interface(CONST_INTER);
 4983 %}
 4984 
 4985 // Long Immediate 32-bit signed
 4986 operand immL32()
 4987 %{
 4988   predicate(n->get_long() == (int) (n->get_long()));
 4989   match(ConL);
 4990 
 4991   op_cost(15);
 4992   format %{ %}
 4993   interface(CONST_INTER);
 4994 %}
 4995 
 4996 operand immL_Pow2()
 4997 %{
 4998   predicate(is_power_of_2((julong)n->get_long()));
 4999   match(ConL);
 5000 
 5001   op_cost(15);
 5002   format %{ %}
 5003   interface(CONST_INTER);
 5004 %}
 5005 
 5006 operand immL_NotPow2()
 5007 %{
 5008   predicate(is_power_of_2((julong)~n->get_long()));
 5009   match(ConL);
 5010 
 5011   op_cost(15);
 5012   format %{ %}
 5013   interface(CONST_INTER);
 5014 %}
 5015 
 5016 // Long Immediate zero
 5017 operand immL0()
 5018 %{
 5019   predicate(n->get_long() == 0L);
 5020   match(ConL);
 5021 
 5022   op_cost(10);
 5023   format %{ %}
 5024   interface(CONST_INTER);
 5025 %}
 5026 
 5027 // Constant for increment
 5028 operand immL1()
 5029 %{
 5030   predicate(n->get_long() == 1);
 5031   match(ConL);
 5032 
 5033   format %{ %}
 5034   interface(CONST_INTER);
 5035 %}
 5036 
 5037 // Constant for decrement
 5038 operand immL_M1()
 5039 %{
 5040   predicate(n->get_long() == -1);
 5041   match(ConL);
 5042 
 5043   format %{ %}
 5044   interface(CONST_INTER);
 5045 %}
 5046 
 5047 // Long Immediate: low 32-bit mask
 5048 operand immL_32bits()
 5049 %{
 5050   predicate(n->get_long() == 0xFFFFFFFFL);
 5051   match(ConL);
 5052   op_cost(20);
 5053 
 5054   format %{ %}
 5055   interface(CONST_INTER);
 5056 %}
 5057 
 5058 // Int Immediate: 2^n-1, positive
 5059 operand immI_Pow2M1()
 5060 %{
 5061   predicate((n->get_int() > 0)
 5062             && is_power_of_2((juint)n->get_int() + 1));
 5063   match(ConI);
 5064 
 5065   op_cost(20);
 5066   format %{ %}
 5067   interface(CONST_INTER);
 5068 %}
 5069 
 5070 // Float Immediate zero
 5071 operand immF0()
 5072 %{
 5073   predicate(jint_cast(n->getf()) == 0);
 5074   match(ConF);
 5075 
 5076   op_cost(5);
 5077   format %{ %}
 5078   interface(CONST_INTER);
 5079 %}
 5080 
 5081 // Float Immediate
 5082 operand immF()
 5083 %{
 5084   match(ConF);
 5085 
 5086   op_cost(15);
 5087   format %{ %}
 5088   interface(CONST_INTER);
 5089 %}
 5090 
 5091 // Half Float Immediate
 5092 operand immH()
 5093 %{
 5094   match(ConH);
 5095 
 5096   op_cost(15);
 5097   format %{ %}
 5098   interface(CONST_INTER);
 5099 %}
 5100 
 5101 // Double Immediate zero
 5102 operand immD0()
 5103 %{
 5104   predicate(jlong_cast(n->getd()) == 0);
 5105   match(ConD);
 5106 
 5107   op_cost(5);
 5108   format %{ %}
 5109   interface(CONST_INTER);
 5110 %}
 5111 
 5112 // Double Immediate
 5113 operand immD()
 5114 %{
 5115   match(ConD);
 5116 
 5117   op_cost(15);
 5118   format %{ %}
 5119   interface(CONST_INTER);
 5120 %}
 5121 
 5122 // Immediates for special shifts (sign extend)
 5123 
 5124 // Constants for increment
 5125 operand immI_16()
 5126 %{
 5127   predicate(n->get_int() == 16);
 5128   match(ConI);
 5129 
 5130   format %{ %}
 5131   interface(CONST_INTER);
 5132 %}
 5133 
 5134 operand immI_24()
 5135 %{
 5136   predicate(n->get_int() == 24);
 5137   match(ConI);
 5138 
 5139   format %{ %}
 5140   interface(CONST_INTER);
 5141 %}
 5142 
 5143 // Constant for byte-wide masking
 5144 operand immI_255()
 5145 %{
 5146   predicate(n->get_int() == 255);
 5147   match(ConI);
 5148 
 5149   format %{ %}
 5150   interface(CONST_INTER);
 5151 %}
 5152 
 5153 // Constant for short-wide masking
 5154 operand immI_65535()
 5155 %{
 5156   predicate(n->get_int() == 65535);
 5157   match(ConI);
 5158 
 5159   format %{ %}
 5160   interface(CONST_INTER);
 5161 %}
 5162 
 5163 // Constant for byte-wide masking
 5164 operand immL_255()
 5165 %{
 5166   predicate(n->get_long() == 255);
 5167   match(ConL);
 5168 
 5169   format %{ %}
 5170   interface(CONST_INTER);
 5171 %}
 5172 
 5173 // Constant for short-wide masking
 5174 operand immL_65535()
 5175 %{
 5176   predicate(n->get_long() == 65535);
 5177   match(ConL);
 5178 
 5179   format %{ %}
 5180   interface(CONST_INTER);
 5181 %}
 5182 
 5183 // AOT Runtime Constants Address
 5184 operand immAOTRuntimeConstantsAddress()
 5185 %{
 5186   // Check if the address is in the range of AOT Runtime Constants
 5187   predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
 5188   match(ConP);
 5189 
 5190   op_cost(0);
 5191   format %{ %}
 5192   interface(CONST_INTER);
 5193 %}
 5194 
 5195 operand kReg()
 5196 %{
 5197   constraint(ALLOC_IN_RC(vectmask_reg));
 5198   match(RegVectMask);
 5199   format %{%}
 5200   interface(REG_INTER);
 5201 %}
 5202 
 5203 // Register Operands
 5204 // Integer Register
 5205 operand rRegI()
 5206 %{
 5207   constraint(ALLOC_IN_RC(int_reg));
 5208   match(RegI);
 5209 
 5210   match(rax_RegI);
 5211   match(rbx_RegI);
 5212   match(rcx_RegI);
 5213   match(rdx_RegI);
 5214   match(rdi_RegI);
 5215 
 5216   format %{ %}
 5217   interface(REG_INTER);
 5218 %}
 5219 
 5220 // Special Registers
 5221 operand rax_RegI()
 5222 %{
 5223   constraint(ALLOC_IN_RC(int_rax_reg));
 5224   match(RegI);
 5225   match(rRegI);
 5226 
 5227   format %{ "RAX" %}
 5228   interface(REG_INTER);
 5229 %}
 5230 
 5231 // Special Registers
 5232 operand rbx_RegI()
 5233 %{
 5234   constraint(ALLOC_IN_RC(int_rbx_reg));
 5235   match(RegI);
 5236   match(rRegI);
 5237 
 5238   format %{ "RBX" %}
 5239   interface(REG_INTER);
 5240 %}
 5241 
 5242 operand rcx_RegI()
 5243 %{
 5244   constraint(ALLOC_IN_RC(int_rcx_reg));
 5245   match(RegI);
 5246   match(rRegI);
 5247 
 5248   format %{ "RCX" %}
 5249   interface(REG_INTER);
 5250 %}
 5251 
 5252 operand rdx_RegI()
 5253 %{
 5254   constraint(ALLOC_IN_RC(int_rdx_reg));
 5255   match(RegI);
 5256   match(rRegI);
 5257 
 5258   format %{ "RDX" %}
 5259   interface(REG_INTER);
 5260 %}
 5261 
 5262 operand rdi_RegI()
 5263 %{
 5264   constraint(ALLOC_IN_RC(int_rdi_reg));
 5265   match(RegI);
 5266   match(rRegI);
 5267 
 5268   format %{ "RDI" %}
 5269   interface(REG_INTER);
 5270 %}
 5271 
 5272 operand no_rax_rdx_RegI()
 5273 %{
 5274   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5275   match(RegI);
 5276   match(rbx_RegI);
 5277   match(rcx_RegI);
 5278   match(rdi_RegI);
 5279 
 5280   format %{ %}
 5281   interface(REG_INTER);
 5282 %}
 5283 
 5284 operand no_rbp_r13_RegI()
 5285 %{
 5286   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5287   match(RegI);
 5288   match(rRegI);
 5289   match(rax_RegI);
 5290   match(rbx_RegI);
 5291   match(rcx_RegI);
 5292   match(rdx_RegI);
 5293   match(rdi_RegI);
 5294 
 5295   format %{ %}
 5296   interface(REG_INTER);
 5297 %}
 5298 
 5299 // Pointer Register
 5300 operand any_RegP()
 5301 %{
 5302   constraint(ALLOC_IN_RC(any_reg));
 5303   match(RegP);
 5304   match(rax_RegP);
 5305   match(rbx_RegP);
 5306   match(rdi_RegP);
 5307   match(rsi_RegP);
 5308   match(rbp_RegP);
 5309   match(r15_RegP);
 5310   match(rRegP);
 5311 
 5312   format %{ %}
 5313   interface(REG_INTER);
 5314 %}
 5315 
 5316 operand rRegP()
 5317 %{
 5318   constraint(ALLOC_IN_RC(ptr_reg));
 5319   match(RegP);
 5320   match(rax_RegP);
 5321   match(rbx_RegP);
 5322   match(rdi_RegP);
 5323   match(rsi_RegP);
 5324   match(rbp_RegP);  // See Q&A below about
 5325   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5326 
 5327   format %{ %}
 5328   interface(REG_INTER);
 5329 %}
 5330 
 5331 operand rRegN() %{
 5332   constraint(ALLOC_IN_RC(int_reg));
 5333   match(RegN);
 5334 
 5335   format %{ %}
 5336   interface(REG_INTER);
 5337 %}
 5338 
 5339 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5340 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5341 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5342 // The output of an instruction is controlled by the allocator, which respects
 5343 // register class masks, not match rules.  Unless an instruction mentions
 5344 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5345 // by the allocator as an input.
 5346 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5347 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5348 // result, RBP is not included in the output of the instruction either.
 5349 
 5350 // This operand is not allowed to use RBP even if
 5351 // RBP is not used to hold the frame pointer.
 5352 operand no_rbp_RegP()
 5353 %{
 5354   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5355   match(RegP);
 5356   match(rbx_RegP);
 5357   match(rsi_RegP);
 5358   match(rdi_RegP);
 5359 
 5360   format %{ %}
 5361   interface(REG_INTER);
 5362 %}
 5363 
 5364 // Special Registers
 5365 // Return a pointer value
 5366 operand rax_RegP()
 5367 %{
 5368   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5369   match(RegP);
 5370   match(rRegP);
 5371 
 5372   format %{ %}
 5373   interface(REG_INTER);
 5374 %}
 5375 
 5376 // Special Registers
 5377 // Return a compressed pointer value
 5378 operand rax_RegN()
 5379 %{
 5380   constraint(ALLOC_IN_RC(int_rax_reg));
 5381   match(RegN);
 5382   match(rRegN);
 5383 
 5384   format %{ %}
 5385   interface(REG_INTER);
 5386 %}
 5387 
 5388 // Used in AtomicAdd
 5389 operand rbx_RegP()
 5390 %{
 5391   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5392   match(RegP);
 5393   match(rRegP);
 5394 
 5395   format %{ %}
 5396   interface(REG_INTER);
 5397 %}
 5398 
 5399 operand rsi_RegP()
 5400 %{
 5401   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5402   match(RegP);
 5403   match(rRegP);
 5404 
 5405   format %{ %}
 5406   interface(REG_INTER);
 5407 %}
 5408 
 5409 operand rbp_RegP()
 5410 %{
 5411   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5412   match(RegP);
 5413   match(rRegP);
 5414 
 5415   format %{ %}
 5416   interface(REG_INTER);
 5417 %}
 5418 
 5419 // Used in rep stosq
 5420 operand rdi_RegP()
 5421 %{
 5422   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5423   match(RegP);
 5424   match(rRegP);
 5425 
 5426   format %{ %}
 5427   interface(REG_INTER);
 5428 %}
 5429 
 5430 operand r15_RegP()
 5431 %{
 5432   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5433   match(RegP);
 5434   match(rRegP);
 5435 
 5436   format %{ %}
 5437   interface(REG_INTER);
 5438 %}
 5439 
 5440 operand rRegL()
 5441 %{
 5442   constraint(ALLOC_IN_RC(long_reg));
 5443   match(RegL);
 5444   match(rax_RegL);
 5445   match(rdx_RegL);
 5446 
 5447   format %{ %}
 5448   interface(REG_INTER);
 5449 %}
 5450 
 5451 // Special Registers
 5452 operand no_rax_rdx_RegL()
 5453 %{
 5454   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5455   match(RegL);
 5456   match(rRegL);
 5457 
 5458   format %{ %}
 5459   interface(REG_INTER);
 5460 %}
 5461 
 5462 operand rax_RegL()
 5463 %{
 5464   constraint(ALLOC_IN_RC(long_rax_reg));
 5465   match(RegL);
 5466   match(rRegL);
 5467 
 5468   format %{ "RAX" %}
 5469   interface(REG_INTER);
 5470 %}
 5471 
 5472 operand rcx_RegL()
 5473 %{
 5474   constraint(ALLOC_IN_RC(long_rcx_reg));
 5475   match(RegL);
 5476   match(rRegL);
 5477 
 5478   format %{ %}
 5479   interface(REG_INTER);
 5480 %}
 5481 
 5482 operand rdx_RegL()
 5483 %{
 5484   constraint(ALLOC_IN_RC(long_rdx_reg));
 5485   match(RegL);
 5486   match(rRegL);
 5487 
 5488   format %{ %}
 5489   interface(REG_INTER);
 5490 %}
 5491 
 5492 operand r11_RegL()
 5493 %{
 5494   constraint(ALLOC_IN_RC(long_r11_reg));
 5495   match(RegL);
 5496   match(rRegL);
 5497 
 5498   format %{ %}
 5499   interface(REG_INTER);
 5500 %}
 5501 
 5502 operand no_rbp_r13_RegL()
 5503 %{
 5504   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5505   match(RegL);
 5506   match(rRegL);
 5507   match(rax_RegL);
 5508   match(rcx_RegL);
 5509   match(rdx_RegL);
 5510 
 5511   format %{ %}
 5512   interface(REG_INTER);
 5513 %}
 5514 
 5515 // Flags register, used as output of compare instructions
 5516 operand rFlagsReg()
 5517 %{
 5518   constraint(ALLOC_IN_RC(int_flags));
 5519   match(RegFlags);
 5520 
 5521   format %{ "RFLAGS" %}
 5522   interface(REG_INTER);
 5523 %}
 5524 
 5525 // Flags register, used as output of FLOATING POINT compare instructions
 5526 operand rFlagsRegU()
 5527 %{
 5528   constraint(ALLOC_IN_RC(int_flags));
 5529   match(RegFlags);
 5530 
 5531   format %{ "RFLAGS_U" %}
 5532   interface(REG_INTER);
 5533 %}
 5534 
 5535 operand rFlagsRegUCF() %{
 5536   constraint(ALLOC_IN_RC(int_flags));
 5537   match(RegFlags);
 5538   predicate(!UseAPX || !VM_Version::supports_avx10_2());
 5539 
 5540   format %{ "RFLAGS_U_CF" %}
 5541   interface(REG_INTER);
 5542 %}
 5543 
 5544 operand rFlagsRegUCFE() %{
 5545   constraint(ALLOC_IN_RC(int_flags));
 5546   match(RegFlags);
 5547   predicate(UseAPX && VM_Version::supports_avx10_2());
 5548 
 5549   format %{ "RFLAGS_U_CFE" %}
 5550   interface(REG_INTER);
 5551 %}
 5552 
 5553 // Float register operands
 5554 operand regF() %{
 5555    constraint(ALLOC_IN_RC(float_reg));
 5556    match(RegF);
 5557 
 5558    format %{ %}
 5559    interface(REG_INTER);
 5560 %}
 5561 
 5562 // Float register operands
 5563 operand legRegF() %{
 5564    constraint(ALLOC_IN_RC(float_reg_legacy));
 5565    match(RegF);
 5566 
 5567    format %{ %}
 5568    interface(REG_INTER);
 5569 %}
 5570 
 5571 // Float register operands
 5572 operand vlRegF() %{
 5573    constraint(ALLOC_IN_RC(float_reg_vl));
 5574    match(RegF);
 5575 
 5576    format %{ %}
 5577    interface(REG_INTER);
 5578 %}
 5579 
 5580 // Double register operands
 5581 operand regD() %{
 5582    constraint(ALLOC_IN_RC(double_reg));
 5583    match(RegD);
 5584 
 5585    format %{ %}
 5586    interface(REG_INTER);
 5587 %}
 5588 
 5589 // Double register operands
 5590 operand legRegD() %{
 5591    constraint(ALLOC_IN_RC(double_reg_legacy));
 5592    match(RegD);
 5593 
 5594    format %{ %}
 5595    interface(REG_INTER);
 5596 %}
 5597 
 5598 // Double register operands
 5599 operand vlRegD() %{
 5600    constraint(ALLOC_IN_RC(double_reg_vl));
 5601    match(RegD);
 5602 
 5603    format %{ %}
 5604    interface(REG_INTER);
 5605 %}
 5606 
 5607 //----------Memory Operands----------------------------------------------------
 5608 // Direct Memory Operand
 5609 // operand direct(immP addr)
 5610 // %{
 5611 //   match(addr);
 5612 
 5613 //   format %{ "[$addr]" %}
 5614 //   interface(MEMORY_INTER) %{
 5615 //     base(0xFFFFFFFF);
 5616 //     index(0x4);
 5617 //     scale(0x0);
 5618 //     disp($addr);
 5619 //   %}
 5620 // %}
 5621 
 5622 // Indirect Memory Operand
 5623 operand indirect(any_RegP reg)
 5624 %{
 5625   constraint(ALLOC_IN_RC(ptr_reg));
 5626   match(reg);
 5627 
 5628   format %{ "[$reg]" %}
 5629   interface(MEMORY_INTER) %{
 5630     base($reg);
 5631     index(0x4);
 5632     scale(0x0);
 5633     disp(0x0);
 5634   %}
 5635 %}
 5636 
 5637 // Indirect Memory Plus Short Offset Operand
 5638 operand indOffset8(any_RegP reg, immL8 off)
 5639 %{
 5640   constraint(ALLOC_IN_RC(ptr_reg));
 5641   match(AddP reg off);
 5642 
 5643   format %{ "[$reg + $off (8-bit)]" %}
 5644   interface(MEMORY_INTER) %{
 5645     base($reg);
 5646     index(0x4);
 5647     scale(0x0);
 5648     disp($off);
 5649   %}
 5650 %}
 5651 
 5652 // Indirect Memory Plus Long Offset Operand
 5653 operand indOffset32(any_RegP reg, immL32 off)
 5654 %{
 5655   constraint(ALLOC_IN_RC(ptr_reg));
 5656   match(AddP reg off);
 5657 
 5658   format %{ "[$reg + $off (32-bit)]" %}
 5659   interface(MEMORY_INTER) %{
 5660     base($reg);
 5661     index(0x4);
 5662     scale(0x0);
 5663     disp($off);
 5664   %}
 5665 %}
 5666 
 5667 // Indirect Memory Plus Index Register Plus Offset Operand
 5668 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5669 %{
 5670   constraint(ALLOC_IN_RC(ptr_reg));
 5671   match(AddP (AddP reg lreg) off);
 5672 
 5673   op_cost(10);
 5674   format %{"[$reg + $off + $lreg]" %}
 5675   interface(MEMORY_INTER) %{
 5676     base($reg);
 5677     index($lreg);
 5678     scale(0x0);
 5679     disp($off);
 5680   %}
 5681 %}
 5682 
 5683 // Indirect Memory Plus Index Register Plus Offset Operand
 5684 operand indIndex(any_RegP reg, rRegL lreg)
 5685 %{
 5686   constraint(ALLOC_IN_RC(ptr_reg));
 5687   match(AddP reg lreg);
 5688 
 5689   op_cost(10);
 5690   format %{"[$reg + $lreg]" %}
 5691   interface(MEMORY_INTER) %{
 5692     base($reg);
 5693     index($lreg);
 5694     scale(0x0);
 5695     disp(0x0);
 5696   %}
 5697 %}
 5698 
 5699 // Indirect Memory Times Scale Plus Index Register
 5700 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5701 %{
 5702   constraint(ALLOC_IN_RC(ptr_reg));
 5703   match(AddP reg (LShiftL lreg scale));
 5704 
 5705   op_cost(10);
 5706   format %{"[$reg + $lreg << $scale]" %}
 5707   interface(MEMORY_INTER) %{
 5708     base($reg);
 5709     index($lreg);
 5710     scale($scale);
 5711     disp(0x0);
 5712   %}
 5713 %}
 5714 
 5715 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5716 %{
 5717   constraint(ALLOC_IN_RC(ptr_reg));
 5718   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5719   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5720 
 5721   op_cost(10);
 5722   format %{"[$reg + pos $idx << $scale]" %}
 5723   interface(MEMORY_INTER) %{
 5724     base($reg);
 5725     index($idx);
 5726     scale($scale);
 5727     disp(0x0);
 5728   %}
 5729 %}
 5730 
 5731 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5732 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5733 %{
 5734   constraint(ALLOC_IN_RC(ptr_reg));
 5735   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5736 
 5737   op_cost(10);
 5738   format %{"[$reg + $off + $lreg << $scale]" %}
 5739   interface(MEMORY_INTER) %{
 5740     base($reg);
 5741     index($lreg);
 5742     scale($scale);
 5743     disp($off);
 5744   %}
 5745 %}
 5746 
 5747 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5748 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5749 %{
 5750   constraint(ALLOC_IN_RC(ptr_reg));
 5751   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5752   match(AddP (AddP reg (ConvI2L idx)) off);
 5753 
 5754   op_cost(10);
 5755   format %{"[$reg + $off + $idx]" %}
 5756   interface(MEMORY_INTER) %{
 5757     base($reg);
 5758     index($idx);
 5759     scale(0x0);
 5760     disp($off);
 5761   %}
 5762 %}
 5763 
 5764 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5765 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5766 %{
 5767   constraint(ALLOC_IN_RC(ptr_reg));
 5768   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5769   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5770 
 5771   op_cost(10);
 5772   format %{"[$reg + $off + $idx << $scale]" %}
 5773   interface(MEMORY_INTER) %{
 5774     base($reg);
 5775     index($idx);
 5776     scale($scale);
 5777     disp($off);
 5778   %}
 5779 %}
 5780 
 5781 // Indirect Narrow Oop Plus Offset Operand
 5782 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5783 // we can't free r12 even with CompressedOops::base() == nullptr.
 5784 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5785   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5786   constraint(ALLOC_IN_RC(ptr_reg));
 5787   match(AddP (DecodeN reg) off);
 5788 
 5789   op_cost(10);
 5790   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5791   interface(MEMORY_INTER) %{
 5792     base(0xc); // R12
 5793     index($reg);
 5794     scale(0x3);
 5795     disp($off);
 5796   %}
 5797 %}
 5798 
 5799 // Indirect Memory Operand
 5800 operand indirectNarrow(rRegN reg)
 5801 %{
 5802   predicate(CompressedOops::shift() == 0);
 5803   constraint(ALLOC_IN_RC(ptr_reg));
 5804   match(DecodeN reg);
 5805 
 5806   format %{ "[$reg]" %}
 5807   interface(MEMORY_INTER) %{
 5808     base($reg);
 5809     index(0x4);
 5810     scale(0x0);
 5811     disp(0x0);
 5812   %}
 5813 %}
 5814 
 5815 // Indirect Memory Plus Short Offset Operand
 5816 operand indOffset8Narrow(rRegN reg, immL8 off)
 5817 %{
 5818   predicate(CompressedOops::shift() == 0);
 5819   constraint(ALLOC_IN_RC(ptr_reg));
 5820   match(AddP (DecodeN reg) off);
 5821 
 5822   format %{ "[$reg + $off (8-bit)]" %}
 5823   interface(MEMORY_INTER) %{
 5824     base($reg);
 5825     index(0x4);
 5826     scale(0x0);
 5827     disp($off);
 5828   %}
 5829 %}
 5830 
 5831 // Indirect Memory Plus Long Offset Operand
 5832 operand indOffset32Narrow(rRegN reg, immL32 off)
 5833 %{
 5834   predicate(CompressedOops::shift() == 0);
 5835   constraint(ALLOC_IN_RC(ptr_reg));
 5836   match(AddP (DecodeN reg) off);
 5837 
 5838   format %{ "[$reg + $off (32-bit)]" %}
 5839   interface(MEMORY_INTER) %{
 5840     base($reg);
 5841     index(0x4);
 5842     scale(0x0);
 5843     disp($off);
 5844   %}
 5845 %}
 5846 
 5847 // Indirect Memory Plus Index Register Plus Offset Operand
 5848 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5849 %{
 5850   predicate(CompressedOops::shift() == 0);
 5851   constraint(ALLOC_IN_RC(ptr_reg));
 5852   match(AddP (AddP (DecodeN reg) lreg) off);
 5853 
 5854   op_cost(10);
 5855   format %{"[$reg + $off + $lreg]" %}
 5856   interface(MEMORY_INTER) %{
 5857     base($reg);
 5858     index($lreg);
 5859     scale(0x0);
 5860     disp($off);
 5861   %}
 5862 %}
 5863 
 5864 // Indirect Memory Plus Index Register Plus Offset Operand
 5865 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5866 %{
 5867   predicate(CompressedOops::shift() == 0);
 5868   constraint(ALLOC_IN_RC(ptr_reg));
 5869   match(AddP (DecodeN reg) lreg);
 5870 
 5871   op_cost(10);
 5872   format %{"[$reg + $lreg]" %}
 5873   interface(MEMORY_INTER) %{
 5874     base($reg);
 5875     index($lreg);
 5876     scale(0x0);
 5877     disp(0x0);
 5878   %}
 5879 %}
 5880 
 5881 // Indirect Memory Times Scale Plus Index Register
 5882 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5883 %{
 5884   predicate(CompressedOops::shift() == 0);
 5885   constraint(ALLOC_IN_RC(ptr_reg));
 5886   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5887 
 5888   op_cost(10);
 5889   format %{"[$reg + $lreg << $scale]" %}
 5890   interface(MEMORY_INTER) %{
 5891     base($reg);
 5892     index($lreg);
 5893     scale($scale);
 5894     disp(0x0);
 5895   %}
 5896 %}
 5897 
 5898 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5899 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5900 %{
 5901   predicate(CompressedOops::shift() == 0);
 5902   constraint(ALLOC_IN_RC(ptr_reg));
 5903   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5904 
 5905   op_cost(10);
 5906   format %{"[$reg + $off + $lreg << $scale]" %}
 5907   interface(MEMORY_INTER) %{
 5908     base($reg);
 5909     index($lreg);
 5910     scale($scale);
 5911     disp($off);
 5912   %}
 5913 %}
 5914 
 5915 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5916 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5917 %{
 5918   constraint(ALLOC_IN_RC(ptr_reg));
 5919   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5920   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5921 
 5922   op_cost(10);
 5923   format %{"[$reg + $off + $idx]" %}
 5924   interface(MEMORY_INTER) %{
 5925     base($reg);
 5926     index($idx);
 5927     scale(0x0);
 5928     disp($off);
 5929   %}
 5930 %}
 5931 
 5932 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5933 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 5934 %{
 5935   constraint(ALLOC_IN_RC(ptr_reg));
 5936   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5937   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 5938 
 5939   op_cost(10);
 5940   format %{"[$reg + $off + $idx << $scale]" %}
 5941   interface(MEMORY_INTER) %{
 5942     base($reg);
 5943     index($idx);
 5944     scale($scale);
 5945     disp($off);
 5946   %}
 5947 %}
 5948 
 5949 //----------Special Memory Operands--------------------------------------------
 5950 // Stack Slot Operand - This operand is used for loading and storing temporary
 5951 //                      values on the stack where a match requires a value to
 5952 //                      flow through memory.
 5953 operand stackSlotP(sRegP reg)
 5954 %{
 5955   constraint(ALLOC_IN_RC(stack_slots));
 5956   // No match rule because this operand is only generated in matching
 5957 
 5958   format %{ "[$reg]" %}
 5959   interface(MEMORY_INTER) %{
 5960     base(0x4);   // RSP
 5961     index(0x4);  // No Index
 5962     scale(0x0);  // No Scale
 5963     disp($reg);  // Stack Offset
 5964   %}
 5965 %}
 5966 
 5967 operand stackSlotI(sRegI reg)
 5968 %{
 5969   constraint(ALLOC_IN_RC(stack_slots));
 5970   // No match rule because this operand is only generated in matching
 5971 
 5972   format %{ "[$reg]" %}
 5973   interface(MEMORY_INTER) %{
 5974     base(0x4);   // RSP
 5975     index(0x4);  // No Index
 5976     scale(0x0);  // No Scale
 5977     disp($reg);  // Stack Offset
 5978   %}
 5979 %}
 5980 
 5981 operand stackSlotF(sRegF reg)
 5982 %{
 5983   constraint(ALLOC_IN_RC(stack_slots));
 5984   // No match rule because this operand is only generated in matching
 5985 
 5986   format %{ "[$reg]" %}
 5987   interface(MEMORY_INTER) %{
 5988     base(0x4);   // RSP
 5989     index(0x4);  // No Index
 5990     scale(0x0);  // No Scale
 5991     disp($reg);  // Stack Offset
 5992   %}
 5993 %}
 5994 
 5995 operand stackSlotD(sRegD reg)
 5996 %{
 5997   constraint(ALLOC_IN_RC(stack_slots));
 5998   // No match rule because this operand is only generated in matching
 5999 
 6000   format %{ "[$reg]" %}
 6001   interface(MEMORY_INTER) %{
 6002     base(0x4);   // RSP
 6003     index(0x4);  // No Index
 6004     scale(0x0);  // No Scale
 6005     disp($reg);  // Stack Offset
 6006   %}
 6007 %}
 6008 operand stackSlotL(sRegL reg)
 6009 %{
 6010   constraint(ALLOC_IN_RC(stack_slots));
 6011   // No match rule because this operand is only generated in matching
 6012 
 6013   format %{ "[$reg]" %}
 6014   interface(MEMORY_INTER) %{
 6015     base(0x4);   // RSP
 6016     index(0x4);  // No Index
 6017     scale(0x0);  // No Scale
 6018     disp($reg);  // Stack Offset
 6019   %}
 6020 %}
 6021 
 6022 //----------Conditional Branch Operands----------------------------------------
 6023 // Comparison Op  - This is the operation of the comparison, and is limited to
 6024 //                  the following set of codes:
 6025 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 6026 //
 6027 // Other attributes of the comparison, such as unsignedness, are specified
 6028 // by the comparison instruction that sets a condition code flags register.
 6029 // That result is represented by a flags operand whose subtype is appropriate
 6030 // to the unsignedness (etc.) of the comparison.
 6031 //
 6032 // Later, the instruction which matches both the Comparison Op (a Bool) and
 6033 // the flags (produced by the Cmp) specifies the coding of the comparison op
 6034 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 6035 
 6036 // Comparison Code
 6037 operand cmpOp()
 6038 %{
 6039   match(Bool);
 6040 
 6041   format %{ "" %}
 6042   interface(COND_INTER) %{
 6043     equal(0x4, "e");
 6044     not_equal(0x5, "ne");
 6045     less(0xc, "l");
 6046     greater_equal(0xd, "ge");
 6047     less_equal(0xe, "le");
 6048     greater(0xf, "g");
 6049     overflow(0x0, "o");
 6050     no_overflow(0x1, "no");
 6051   %}
 6052 %}
 6053 
 6054 // Comparison Code, unsigned compare.  Used by FP also, with
 6055 // C2 (unordered) turned into GT or LT already.  The other bits
 6056 // C0 and C3 are turned into Carry & Zero flags.
 6057 operand cmpOpU()
 6058 %{
 6059   match(Bool);
 6060 
 6061   format %{ "" %}
 6062   interface(COND_INTER) %{
 6063     equal(0x4, "e");
 6064     not_equal(0x5, "ne");
 6065     less(0x2, "b");
 6066     greater_equal(0x3, "ae");
 6067     less_equal(0x6, "be");
 6068     greater(0x7, "a");
 6069     overflow(0x0, "o");
 6070     no_overflow(0x1, "no");
 6071   %}
 6072 %}
 6073 
 6074 
 6075 // Floating comparisons that don't require any fixup for the unordered case,
 6076 // If both inputs of the comparison are the same, ZF is always set so we
 6077 // don't need to use cmpOpUCF2 for eq/ne
 6078 operand cmpOpUCF() %{
 6079   match(Bool);
 6080   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6081             (n->as_Bool()->_test._test == BoolTest::lt ||
 6082              n->as_Bool()->_test._test == BoolTest::ge ||
 6083              n->as_Bool()->_test._test == BoolTest::le ||
 6084              n->as_Bool()->_test._test == BoolTest::gt ||
 6085              n->in(1)->in(1) == n->in(1)->in(2)));
 6086   format %{ "" %}
 6087   interface(COND_INTER) %{
 6088     equal(0xb, "np");
 6089     not_equal(0xa, "p");
 6090     less(0x2, "b");
 6091     greater_equal(0x3, "ae");
 6092     less_equal(0x6, "be");
 6093     greater(0x7, "a");
 6094     overflow(0x0, "o");
 6095     no_overflow(0x1, "no");
 6096   %}
 6097 %}
 6098 
 6099 
 6100 // Floating comparisons that can be fixed up with extra conditional jumps
 6101 operand cmpOpUCF2() %{
 6102   match(Bool);
 6103   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6104             (n->as_Bool()->_test._test == BoolTest::ne ||
 6105              n->as_Bool()->_test._test == BoolTest::eq) &&
 6106             n->in(1)->in(1) != n->in(1)->in(2));
 6107   format %{ "" %}
 6108   interface(COND_INTER) %{
 6109     equal(0x4, "e");
 6110     not_equal(0x5, "ne");
 6111     less(0x2, "b");
 6112     greater_equal(0x3, "ae");
 6113     less_equal(0x6, "be");
 6114     greater(0x7, "a");
 6115     overflow(0x0, "o");
 6116     no_overflow(0x1, "no");
 6117   %}
 6118 %}
 6119 
 6120 
 6121 // Floating point comparisons that set condition flags to test more directly,
 6122 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
 6123 // are used for L (<) and LE (<=) conditions. It's important to convert these
 6124 // latter conditions to ones that use unsigned tests before passing into an
 6125 // instruction because the preceding comparison might be based on a three way
 6126 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
 6127 operand cmpOpUCFE()
 6128 %{
 6129   match(Bool);
 6130   predicate((UseAPX && VM_Version::supports_avx10_2()) &&
 6131             (n->as_Bool()->_test._test == BoolTest::ne ||
 6132              n->as_Bool()->_test._test == BoolTest::eq ||
 6133              n->as_Bool()->_test._test == BoolTest::lt ||
 6134              n->as_Bool()->_test._test == BoolTest::ge ||
 6135              n->as_Bool()->_test._test == BoolTest::le ||
 6136              n->as_Bool()->_test._test == BoolTest::gt));
 6137 
 6138   format %{ "" %}
 6139   interface(COND_INTER) %{
 6140     equal(0x4, "e");
 6141     not_equal(0x5, "ne");
 6142     less(0x2, "b");
 6143     greater_equal(0x3, "ae");
 6144     less_equal(0x6, "be");
 6145     greater(0x7, "a");
 6146     overflow(0x0, "o");
 6147     no_overflow(0x1, "no");
 6148   %}
 6149 %}
 6150 
 6151 // Operands for bound floating pointer register arguments
 6152 operand rxmm0() %{
 6153   constraint(ALLOC_IN_RC(xmm0_reg));
 6154   match(VecX);
 6155   format%{%}
 6156   interface(REG_INTER);
 6157 %}
 6158 
 6159 // Vectors
 6160 
 6161 // Dummy generic vector class. Should be used for all vector operands.
 6162 // Replaced with vec[SDXYZ] during post-selection pass.
 6163 operand vec() %{
 6164   constraint(ALLOC_IN_RC(dynamic));
 6165   match(VecX);
 6166   match(VecY);
 6167   match(VecZ);
 6168   match(VecS);
 6169   match(VecD);
 6170 
 6171   format %{ %}
 6172   interface(REG_INTER);
 6173 %}
 6174 
 6175 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6176 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6177 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6178 // runtime code generation via reg_class_dynamic.
 6179 operand legVec() %{
 6180   constraint(ALLOC_IN_RC(dynamic));
 6181   match(VecX);
 6182   match(VecY);
 6183   match(VecZ);
 6184   match(VecS);
 6185   match(VecD);
 6186 
 6187   format %{ %}
 6188   interface(REG_INTER);
 6189 %}
 6190 
 6191 // Replaces vec during post-selection cleanup. See above.
 6192 operand vecS() %{
 6193   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6194   match(VecS);
 6195 
 6196   format %{ %}
 6197   interface(REG_INTER);
 6198 %}
 6199 
 6200 // Replaces legVec during post-selection cleanup. See above.
 6201 operand legVecS() %{
 6202   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6203   match(VecS);
 6204 
 6205   format %{ %}
 6206   interface(REG_INTER);
 6207 %}
 6208 
 6209 // Replaces vec during post-selection cleanup. See above.
 6210 operand vecD() %{
 6211   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6212   match(VecD);
 6213 
 6214   format %{ %}
 6215   interface(REG_INTER);
 6216 %}
 6217 
 6218 // Replaces legVec during post-selection cleanup. See above.
 6219 operand legVecD() %{
 6220   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6221   match(VecD);
 6222 
 6223   format %{ %}
 6224   interface(REG_INTER);
 6225 %}
 6226 
 6227 // Replaces vec during post-selection cleanup. See above.
 6228 operand vecX() %{
 6229   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6230   match(VecX);
 6231 
 6232   format %{ %}
 6233   interface(REG_INTER);
 6234 %}
 6235 
 6236 // Replaces legVec during post-selection cleanup. See above.
 6237 operand legVecX() %{
 6238   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6239   match(VecX);
 6240 
 6241   format %{ %}
 6242   interface(REG_INTER);
 6243 %}
 6244 
 6245 // Replaces vec during post-selection cleanup. See above.
 6246 operand vecY() %{
 6247   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6248   match(VecY);
 6249 
 6250   format %{ %}
 6251   interface(REG_INTER);
 6252 %}
 6253 
 6254 // Replaces legVec during post-selection cleanup. See above.
 6255 operand legVecY() %{
 6256   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6257   match(VecY);
 6258 
 6259   format %{ %}
 6260   interface(REG_INTER);
 6261 %}
 6262 
 6263 // Replaces vec during post-selection cleanup. See above.
 6264 operand vecZ() %{
 6265   constraint(ALLOC_IN_RC(vectorz_reg));
 6266   match(VecZ);
 6267 
 6268   format %{ %}
 6269   interface(REG_INTER);
 6270 %}
 6271 
 6272 // Replaces legVec during post-selection cleanup. See above.
 6273 operand legVecZ() %{
 6274   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6275   match(VecZ);
 6276 
 6277   format %{ %}
 6278   interface(REG_INTER);
 6279 %}
 6280 
 6281 //----------OPERAND CLASSES----------------------------------------------------
 6282 // Operand Classes are groups of operands that are used as to simplify
 6283 // instruction definitions by not requiring the AD writer to specify separate
 6284 // instructions for every form of operand when the instruction accepts
 6285 // multiple operand types with the same basic encoding and format.  The classic
 6286 // case of this is memory operands.
 6287 
 6288 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6289                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6290                indCompressedOopOffset,
 6291                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6292                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6293                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6294 
 6295 //----------PIPELINE-----------------------------------------------------------
 6296 // Rules which define the behavior of the target architectures pipeline.
 6297 pipeline %{
 6298 
 6299 //----------ATTRIBUTES---------------------------------------------------------
 6300 attributes %{
 6301   variable_size_instructions;        // Fixed size instructions
 6302   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6303   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6304   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6305   instruction_fetch_units = 1;       // of 16 bytes
 6306 %}
 6307 
 6308 //----------RESOURCES----------------------------------------------------------
 6309 // Resources are the functional units available to the machine
 6310 
 6311 // Generic P2/P3 pipeline
 6312 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6313 // 3 instructions decoded per cycle.
 6314 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6315 // 3 ALU op, only ALU0 handles mul instructions.
 6316 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6317            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6318            BR, FPU,
 6319            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6320 
 6321 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6322 // Pipeline Description specifies the stages in the machine's pipeline
 6323 
 6324 // Generic P2/P3 pipeline
 6325 pipe_desc(S0, S1, S2, S3, S4, S5);
 6326 
 6327 //----------PIPELINE CLASSES---------------------------------------------------
 6328 // Pipeline Classes describe the stages in which input and output are
 6329 // referenced by the hardware pipeline.
 6330 
 6331 // Naming convention: ialu or fpu
 6332 // Then: _reg
 6333 // Then: _reg if there is a 2nd register
 6334 // Then: _long if it's a pair of instructions implementing a long
 6335 // Then: _fat if it requires the big decoder
 6336 //   Or: _mem if it requires the big decoder and a memory unit.
 6337 
 6338 // Integer ALU reg operation
 6339 pipe_class ialu_reg(rRegI dst)
 6340 %{
 6341     single_instruction;
 6342     dst    : S4(write);
 6343     dst    : S3(read);
 6344     DECODE : S0;        // any decoder
 6345     ALU    : S3;        // any alu
 6346 %}
 6347 
 6348 // Long ALU reg operation
 6349 pipe_class ialu_reg_long(rRegL dst)
 6350 %{
 6351     instruction_count(2);
 6352     dst    : S4(write);
 6353     dst    : S3(read);
 6354     DECODE : S0(2);     // any 2 decoders
 6355     ALU    : S3(2);     // both alus
 6356 %}
 6357 
 6358 // Integer ALU reg operation using big decoder
 6359 pipe_class ialu_reg_fat(rRegI dst)
 6360 %{
 6361     single_instruction;
 6362     dst    : S4(write);
 6363     dst    : S3(read);
 6364     D0     : S0;        // big decoder only
 6365     ALU    : S3;        // any alu
 6366 %}
 6367 
 6368 // Integer ALU reg-reg operation
 6369 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6370 %{
 6371     single_instruction;
 6372     dst    : S4(write);
 6373     src    : S3(read);
 6374     DECODE : S0;        // any decoder
 6375     ALU    : S3;        // any alu
 6376 %}
 6377 
 6378 // Integer ALU reg-reg operation
 6379 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6380 %{
 6381     single_instruction;
 6382     dst    : S4(write);
 6383     src    : S3(read);
 6384     D0     : S0;        // big decoder only
 6385     ALU    : S3;        // any alu
 6386 %}
 6387 
 6388 // Integer ALU reg-mem operation
 6389 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6390 %{
 6391     single_instruction;
 6392     dst    : S5(write);
 6393     mem    : S3(read);
 6394     D0     : S0;        // big decoder only
 6395     ALU    : S4;        // any alu
 6396     MEM    : S3;        // any mem
 6397 %}
 6398 
 6399 // Integer mem operation (prefetch)
 6400 pipe_class ialu_mem(memory mem)
 6401 %{
 6402     single_instruction;
 6403     mem    : S3(read);
 6404     D0     : S0;        // big decoder only
 6405     MEM    : S3;        // any mem
 6406 %}
 6407 
 6408 // Integer Store to Memory
 6409 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6410 %{
 6411     single_instruction;
 6412     mem    : S3(read);
 6413     src    : S5(read);
 6414     D0     : S0;        // big decoder only
 6415     ALU    : S4;        // any alu
 6416     MEM    : S3;
 6417 %}
 6418 
 6419 // // Long Store to Memory
 6420 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6421 // %{
 6422 //     instruction_count(2);
 6423 //     mem    : S3(read);
 6424 //     src    : S5(read);
 6425 //     D0     : S0(2);          // big decoder only; twice
 6426 //     ALU    : S4(2);     // any 2 alus
 6427 //     MEM    : S3(2);  // Both mems
 6428 // %}
 6429 
 6430 // Integer Store to Memory
 6431 pipe_class ialu_mem_imm(memory mem)
 6432 %{
 6433     single_instruction;
 6434     mem    : S3(read);
 6435     D0     : S0;        // big decoder only
 6436     ALU    : S4;        // any alu
 6437     MEM    : S3;
 6438 %}
 6439 
 6440 // Integer ALU0 reg-reg operation
 6441 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6442 %{
 6443     single_instruction;
 6444     dst    : S4(write);
 6445     src    : S3(read);
 6446     D0     : S0;        // Big decoder only
 6447     ALU0   : S3;        // only alu0
 6448 %}
 6449 
 6450 // Integer ALU0 reg-mem operation
 6451 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6452 %{
 6453     single_instruction;
 6454     dst    : S5(write);
 6455     mem    : S3(read);
 6456     D0     : S0;        // big decoder only
 6457     ALU0   : S4;        // ALU0 only
 6458     MEM    : S3;        // any mem
 6459 %}
 6460 
 6461 // Integer ALU reg-reg operation
 6462 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6463 %{
 6464     single_instruction;
 6465     cr     : S4(write);
 6466     src1   : S3(read);
 6467     src2   : S3(read);
 6468     DECODE : S0;        // any decoder
 6469     ALU    : S3;        // any alu
 6470 %}
 6471 
 6472 // Integer ALU reg-imm operation
 6473 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6474 %{
 6475     single_instruction;
 6476     cr     : S4(write);
 6477     src1   : S3(read);
 6478     DECODE : S0;        // any decoder
 6479     ALU    : S3;        // any alu
 6480 %}
 6481 
 6482 // Integer ALU reg-mem operation
 6483 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6484 %{
 6485     single_instruction;
 6486     cr     : S4(write);
 6487     src1   : S3(read);
 6488     src2   : S3(read);
 6489     D0     : S0;        // big decoder only
 6490     ALU    : S4;        // any alu
 6491     MEM    : S3;
 6492 %}
 6493 
 6494 // Conditional move reg-reg
 6495 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6496 %{
 6497     instruction_count(4);
 6498     y      : S4(read);
 6499     q      : S3(read);
 6500     p      : S3(read);
 6501     DECODE : S0(4);     // any decoder
 6502 %}
 6503 
 6504 // Conditional move reg-reg
 6505 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6506 %{
 6507     single_instruction;
 6508     dst    : S4(write);
 6509     src    : S3(read);
 6510     cr     : S3(read);
 6511     DECODE : S0;        // any decoder
 6512 %}
 6513 
 6514 // Conditional move reg-mem
 6515 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6516 %{
 6517     single_instruction;
 6518     dst    : S4(write);
 6519     src    : S3(read);
 6520     cr     : S3(read);
 6521     DECODE : S0;        // any decoder
 6522     MEM    : S3;
 6523 %}
 6524 
 6525 // Conditional move reg-reg long
 6526 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6527 %{
 6528     single_instruction;
 6529     dst    : S4(write);
 6530     src    : S3(read);
 6531     cr     : S3(read);
 6532     DECODE : S0(2);     // any 2 decoders
 6533 %}
 6534 
 6535 // Float reg-reg operation
 6536 pipe_class fpu_reg(regD dst)
 6537 %{
 6538     instruction_count(2);
 6539     dst    : S3(read);
 6540     DECODE : S0(2);     // any 2 decoders
 6541     FPU    : S3;
 6542 %}
 6543 
 6544 // Float reg-reg operation
 6545 pipe_class fpu_reg_reg(regD dst, regD src)
 6546 %{
 6547     instruction_count(2);
 6548     dst    : S4(write);
 6549     src    : S3(read);
 6550     DECODE : S0(2);     // any 2 decoders
 6551     FPU    : S3;
 6552 %}
 6553 
 6554 // Float reg-reg operation
 6555 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6556 %{
 6557     instruction_count(3);
 6558     dst    : S4(write);
 6559     src1   : S3(read);
 6560     src2   : S3(read);
 6561     DECODE : S0(3);     // any 3 decoders
 6562     FPU    : S3(2);
 6563 %}
 6564 
 6565 // Float reg-reg operation
 6566 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6567 %{
 6568     instruction_count(4);
 6569     dst    : S4(write);
 6570     src1   : S3(read);
 6571     src2   : S3(read);
 6572     src3   : S3(read);
 6573     DECODE : S0(4);     // any 3 decoders
 6574     FPU    : S3(2);
 6575 %}
 6576 
 6577 // Float reg-reg operation
 6578 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6579 %{
 6580     instruction_count(4);
 6581     dst    : S4(write);
 6582     src1   : S3(read);
 6583     src2   : S3(read);
 6584     src3   : S3(read);
 6585     DECODE : S1(3);     // any 3 decoders
 6586     D0     : S0;        // Big decoder only
 6587     FPU    : S3(2);
 6588     MEM    : S3;
 6589 %}
 6590 
 6591 // Float reg-mem operation
 6592 pipe_class fpu_reg_mem(regD dst, memory mem)
 6593 %{
 6594     instruction_count(2);
 6595     dst    : S5(write);
 6596     mem    : S3(read);
 6597     D0     : S0;        // big decoder only
 6598     DECODE : S1;        // any decoder for FPU POP
 6599     FPU    : S4;
 6600     MEM    : S3;        // any mem
 6601 %}
 6602 
 6603 // Float reg-mem operation
 6604 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6605 %{
 6606     instruction_count(3);
 6607     dst    : S5(write);
 6608     src1   : S3(read);
 6609     mem    : S3(read);
 6610     D0     : S0;        // big decoder only
 6611     DECODE : S1(2);     // any decoder for FPU POP
 6612     FPU    : S4;
 6613     MEM    : S3;        // any mem
 6614 %}
 6615 
 6616 // Float mem-reg operation
 6617 pipe_class fpu_mem_reg(memory mem, regD src)
 6618 %{
 6619     instruction_count(2);
 6620     src    : S5(read);
 6621     mem    : S3(read);
 6622     DECODE : S0;        // any decoder for FPU PUSH
 6623     D0     : S1;        // big decoder only
 6624     FPU    : S4;
 6625     MEM    : S3;        // any mem
 6626 %}
 6627 
 6628 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6629 %{
 6630     instruction_count(3);
 6631     src1   : S3(read);
 6632     src2   : S3(read);
 6633     mem    : S3(read);
 6634     DECODE : S0(2);     // any decoder for FPU PUSH
 6635     D0     : S1;        // big decoder only
 6636     FPU    : S4;
 6637     MEM    : S3;        // any mem
 6638 %}
 6639 
 6640 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6641 %{
 6642     instruction_count(3);
 6643     src1   : S3(read);
 6644     src2   : S3(read);
 6645     mem    : S4(read);
 6646     DECODE : S0;        // any decoder for FPU PUSH
 6647     D0     : S0(2);     // big decoder only
 6648     FPU    : S4;
 6649     MEM    : S3(2);     // any mem
 6650 %}
 6651 
 6652 pipe_class fpu_mem_mem(memory dst, memory src1)
 6653 %{
 6654     instruction_count(2);
 6655     src1   : S3(read);
 6656     dst    : S4(read);
 6657     D0     : S0(2);     // big decoder only
 6658     MEM    : S3(2);     // any mem
 6659 %}
 6660 
 6661 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6662 %{
 6663     instruction_count(3);
 6664     src1   : S3(read);
 6665     src2   : S3(read);
 6666     dst    : S4(read);
 6667     D0     : S0(3);     // big decoder only
 6668     FPU    : S4;
 6669     MEM    : S3(3);     // any mem
 6670 %}
 6671 
 6672 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6673 %{
 6674     instruction_count(3);
 6675     src1   : S4(read);
 6676     mem    : S4(read);
 6677     DECODE : S0;        // any decoder for FPU PUSH
 6678     D0     : S0(2);     // big decoder only
 6679     FPU    : S4;
 6680     MEM    : S3(2);     // any mem
 6681 %}
 6682 
 6683 // Float load constant
 6684 pipe_class fpu_reg_con(regD dst)
 6685 %{
 6686     instruction_count(2);
 6687     dst    : S5(write);
 6688     D0     : S0;        // big decoder only for the load
 6689     DECODE : S1;        // any decoder for FPU POP
 6690     FPU    : S4;
 6691     MEM    : S3;        // any mem
 6692 %}
 6693 
 6694 // Float load constant
 6695 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6696 %{
 6697     instruction_count(3);
 6698     dst    : S5(write);
 6699     src    : S3(read);
 6700     D0     : S0;        // big decoder only for the load
 6701     DECODE : S1(2);     // any decoder for FPU POP
 6702     FPU    : S4;
 6703     MEM    : S3;        // any mem
 6704 %}
 6705 
 6706 // UnConditional branch
 6707 pipe_class pipe_jmp(label labl)
 6708 %{
 6709     single_instruction;
 6710     BR   : S3;
 6711 %}
 6712 
 6713 // Conditional branch
 6714 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6715 %{
 6716     single_instruction;
 6717     cr    : S1(read);
 6718     BR    : S3;
 6719 %}
 6720 
 6721 // Allocation idiom
 6722 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6723 %{
 6724     instruction_count(1); force_serialization;
 6725     fixed_latency(6);
 6726     heap_ptr : S3(read);
 6727     DECODE   : S0(3);
 6728     D0       : S2;
 6729     MEM      : S3;
 6730     ALU      : S3(2);
 6731     dst      : S5(write);
 6732     BR       : S5;
 6733 %}
 6734 
 6735 // Generic big/slow expanded idiom
 6736 pipe_class pipe_slow()
 6737 %{
 6738     instruction_count(10); multiple_bundles; force_serialization;
 6739     fixed_latency(100);
 6740     D0  : S0(2);
 6741     MEM : S3(2);
 6742 %}
 6743 
 6744 // The real do-nothing guy
 6745 pipe_class empty()
 6746 %{
 6747     instruction_count(0);
 6748 %}
 6749 
 6750 // Define the class for the Nop node
 6751 define
 6752 %{
 6753    MachNop = empty;
 6754 %}
 6755 
 6756 %}
 6757 
 6758 //----------INSTRUCTIONS-------------------------------------------------------
 6759 //
 6760 // match      -- States which machine-independent subtree may be replaced
 6761 //               by this instruction.
 6762 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6763 //               selection to identify a minimum cost tree of machine
 6764 //               instructions that matches a tree of machine-independent
 6765 //               instructions.
 6766 // format     -- A string providing the disassembly for this instruction.
 6767 //               The value of an instruction's operand may be inserted
 6768 //               by referring to it with a '$' prefix.
 6769 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6770 //               to within an encode class as $primary, $secondary, and $tertiary
 6771 //               rrspectively.  The primary opcode is commonly used to
 6772 //               indicate the type of machine instruction, while secondary
 6773 //               and tertiary are often used for prefix options or addressing
 6774 //               modes.
 6775 // ins_encode -- A list of encode classes with parameters. The encode class
 6776 //               name must have been defined in an 'enc_class' specification
 6777 //               in the encode section of the architecture description.
 6778 
 6779 // ============================================================================
 6780 
 6781 instruct ShouldNotReachHere() %{
 6782   match(Halt);
 6783   format %{ "stop\t# ShouldNotReachHere" %}
 6784   ins_encode %{
 6785     if (is_reachable()) {
 6786       const char* str = __ code_string(_halt_reason);
 6787       __ stop(str);
 6788     }
 6789   %}
 6790   ins_pipe(pipe_slow);
 6791 %}
 6792 
 6793 // ============================================================================
 6794 
 6795 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6796 // Load Float
 6797 instruct MoveF2VL(vlRegF dst, regF src) %{
 6798   match(Set dst src);
 6799   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6800   ins_encode %{
 6801     ShouldNotReachHere();
 6802   %}
 6803   ins_pipe( fpu_reg_reg );
 6804 %}
 6805 
 6806 // Load Float
 6807 instruct MoveF2LEG(legRegF dst, regF src) %{
 6808   match(Set dst src);
 6809   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6810   ins_encode %{
 6811     ShouldNotReachHere();
 6812   %}
 6813   ins_pipe( fpu_reg_reg );
 6814 %}
 6815 
 6816 // Load Float
 6817 instruct MoveVL2F(regF dst, vlRegF src) %{
 6818   match(Set dst src);
 6819   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6820   ins_encode %{
 6821     ShouldNotReachHere();
 6822   %}
 6823   ins_pipe( fpu_reg_reg );
 6824 %}
 6825 
 6826 // Load Float
 6827 instruct MoveLEG2F(regF dst, legRegF src) %{
 6828   match(Set dst src);
 6829   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6830   ins_encode %{
 6831     ShouldNotReachHere();
 6832   %}
 6833   ins_pipe( fpu_reg_reg );
 6834 %}
 6835 
 6836 // Load Double
 6837 instruct MoveD2VL(vlRegD dst, regD src) %{
 6838   match(Set dst src);
 6839   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6840   ins_encode %{
 6841     ShouldNotReachHere();
 6842   %}
 6843   ins_pipe( fpu_reg_reg );
 6844 %}
 6845 
 6846 // Load Double
 6847 instruct MoveD2LEG(legRegD dst, regD src) %{
 6848   match(Set dst src);
 6849   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6850   ins_encode %{
 6851     ShouldNotReachHere();
 6852   %}
 6853   ins_pipe( fpu_reg_reg );
 6854 %}
 6855 
 6856 // Load Double
 6857 instruct MoveVL2D(regD dst, vlRegD src) %{
 6858   match(Set dst src);
 6859   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6860   ins_encode %{
 6861     ShouldNotReachHere();
 6862   %}
 6863   ins_pipe( fpu_reg_reg );
 6864 %}
 6865 
 6866 // Load Double
 6867 instruct MoveLEG2D(regD dst, legRegD src) %{
 6868   match(Set dst src);
 6869   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6870   ins_encode %{
 6871     ShouldNotReachHere();
 6872   %}
 6873   ins_pipe( fpu_reg_reg );
 6874 %}
 6875 
 6876 //----------Load/Store/Move Instructions---------------------------------------
 6877 //----------Load Instructions--------------------------------------------------
 6878 
 6879 // Load Byte (8 bit signed)
 6880 instruct loadB(rRegI dst, memory mem)
 6881 %{
 6882   match(Set dst (LoadB mem));
 6883 
 6884   ins_cost(125);
 6885   format %{ "movsbl  $dst, $mem\t# byte" %}
 6886 
 6887   ins_encode %{
 6888     __ movsbl($dst$$Register, $mem$$Address);
 6889   %}
 6890 
 6891   ins_pipe(ialu_reg_mem);
 6892 %}
 6893 
 6894 // Load Byte (8 bit signed) into Long Register
 6895 instruct loadB2L(rRegL dst, memory mem)
 6896 %{
 6897   match(Set dst (ConvI2L (LoadB mem)));
 6898 
 6899   ins_cost(125);
 6900   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6901 
 6902   ins_encode %{
 6903     __ movsbq($dst$$Register, $mem$$Address);
 6904   %}
 6905 
 6906   ins_pipe(ialu_reg_mem);
 6907 %}
 6908 
 6909 // Load Unsigned Byte (8 bit UNsigned)
 6910 instruct loadUB(rRegI dst, memory mem)
 6911 %{
 6912   match(Set dst (LoadUB mem));
 6913 
 6914   ins_cost(125);
 6915   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6916 
 6917   ins_encode %{
 6918     __ movzbl($dst$$Register, $mem$$Address);
 6919   %}
 6920 
 6921   ins_pipe(ialu_reg_mem);
 6922 %}
 6923 
 6924 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6925 instruct loadUB2L(rRegL dst, memory mem)
 6926 %{
 6927   match(Set dst (ConvI2L (LoadUB mem)));
 6928 
 6929   ins_cost(125);
 6930   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 6931 
 6932   ins_encode %{
 6933     __ movzbq($dst$$Register, $mem$$Address);
 6934   %}
 6935 
 6936   ins_pipe(ialu_reg_mem);
 6937 %}
 6938 
 6939 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 6940 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 6941   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 6942   effect(KILL cr);
 6943 
 6944   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 6945             "andl    $dst, right_n_bits($mask, 8)" %}
 6946   ins_encode %{
 6947     Register Rdst = $dst$$Register;
 6948     __ movzbq(Rdst, $mem$$Address);
 6949     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 6950   %}
 6951   ins_pipe(ialu_reg_mem);
 6952 %}
 6953 
 6954 // Load Short (16 bit signed)
 6955 instruct loadS(rRegI dst, memory mem)
 6956 %{
 6957   match(Set dst (LoadS mem));
 6958 
 6959   ins_cost(125);
 6960   format %{ "movswl $dst, $mem\t# short" %}
 6961 
 6962   ins_encode %{
 6963     __ movswl($dst$$Register, $mem$$Address);
 6964   %}
 6965 
 6966   ins_pipe(ialu_reg_mem);
 6967 %}
 6968 
 6969 // Load Short (16 bit signed) to Byte (8 bit signed)
 6970 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6971   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 6972 
 6973   ins_cost(125);
 6974   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 6975   ins_encode %{
 6976     __ movsbl($dst$$Register, $mem$$Address);
 6977   %}
 6978   ins_pipe(ialu_reg_mem);
 6979 %}
 6980 
 6981 // Load Short (16 bit signed) into Long Register
 6982 instruct loadS2L(rRegL dst, memory mem)
 6983 %{
 6984   match(Set dst (ConvI2L (LoadS mem)));
 6985 
 6986   ins_cost(125);
 6987   format %{ "movswq $dst, $mem\t# short -> long" %}
 6988 
 6989   ins_encode %{
 6990     __ movswq($dst$$Register, $mem$$Address);
 6991   %}
 6992 
 6993   ins_pipe(ialu_reg_mem);
 6994 %}
 6995 
 6996 // Load Unsigned Short/Char (16 bit UNsigned)
 6997 instruct loadUS(rRegI dst, memory mem)
 6998 %{
 6999   match(Set dst (LoadUS mem));
 7000 
 7001   ins_cost(125);
 7002   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 7003 
 7004   ins_encode %{
 7005     __ movzwl($dst$$Register, $mem$$Address);
 7006   %}
 7007 
 7008   ins_pipe(ialu_reg_mem);
 7009 %}
 7010 
 7011 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 7012 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7013   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 7014 
 7015   ins_cost(125);
 7016   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 7017   ins_encode %{
 7018     __ movsbl($dst$$Register, $mem$$Address);
 7019   %}
 7020   ins_pipe(ialu_reg_mem);
 7021 %}
 7022 
 7023 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 7024 instruct loadUS2L(rRegL dst, memory mem)
 7025 %{
 7026   match(Set dst (ConvI2L (LoadUS mem)));
 7027 
 7028   ins_cost(125);
 7029   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 7030 
 7031   ins_encode %{
 7032     __ movzwq($dst$$Register, $mem$$Address);
 7033   %}
 7034 
 7035   ins_pipe(ialu_reg_mem);
 7036 %}
 7037 
 7038 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 7039 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7040   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7041 
 7042   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 7043   ins_encode %{
 7044     __ movzbq($dst$$Register, $mem$$Address);
 7045   %}
 7046   ins_pipe(ialu_reg_mem);
 7047 %}
 7048 
 7049 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 7050 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7051   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7052   effect(KILL cr);
 7053 
 7054   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 7055             "andl    $dst, right_n_bits($mask, 16)" %}
 7056   ins_encode %{
 7057     Register Rdst = $dst$$Register;
 7058     __ movzwq(Rdst, $mem$$Address);
 7059     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 7060   %}
 7061   ins_pipe(ialu_reg_mem);
 7062 %}
 7063 
 7064 // Load Integer
 7065 instruct loadI(rRegI dst, memory mem)
 7066 %{
 7067   match(Set dst (LoadI mem));
 7068 
 7069   ins_cost(125);
 7070   format %{ "movl    $dst, $mem\t# int" %}
 7071 
 7072   ins_encode %{
 7073     __ movl($dst$$Register, $mem$$Address);
 7074   %}
 7075 
 7076   ins_pipe(ialu_reg_mem);
 7077 %}
 7078 
 7079 // Load Integer (32 bit signed) to Byte (8 bit signed)
 7080 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7081   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 7082 
 7083   ins_cost(125);
 7084   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 7085   ins_encode %{
 7086     __ movsbl($dst$$Register, $mem$$Address);
 7087   %}
 7088   ins_pipe(ialu_reg_mem);
 7089 %}
 7090 
 7091 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 7092 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 7093   match(Set dst (AndI (LoadI mem) mask));
 7094 
 7095   ins_cost(125);
 7096   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 7097   ins_encode %{
 7098     __ movzbl($dst$$Register, $mem$$Address);
 7099   %}
 7100   ins_pipe(ialu_reg_mem);
 7101 %}
 7102 
 7103 // Load Integer (32 bit signed) to Short (16 bit signed)
 7104 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 7105   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 7106 
 7107   ins_cost(125);
 7108   format %{ "movswl  $dst, $mem\t# int -> short" %}
 7109   ins_encode %{
 7110     __ movswl($dst$$Register, $mem$$Address);
 7111   %}
 7112   ins_pipe(ialu_reg_mem);
 7113 %}
 7114 
 7115 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 7116 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 7117   match(Set dst (AndI (LoadI mem) mask));
 7118 
 7119   ins_cost(125);
 7120   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7121   ins_encode %{
 7122     __ movzwl($dst$$Register, $mem$$Address);
 7123   %}
 7124   ins_pipe(ialu_reg_mem);
 7125 %}
 7126 
 7127 // Load Integer into Long Register
 7128 instruct loadI2L(rRegL dst, memory mem)
 7129 %{
 7130   match(Set dst (ConvI2L (LoadI mem)));
 7131 
 7132   ins_cost(125);
 7133   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7134 
 7135   ins_encode %{
 7136     __ movslq($dst$$Register, $mem$$Address);
 7137   %}
 7138 
 7139   ins_pipe(ialu_reg_mem);
 7140 %}
 7141 
 7142 // Load Integer with mask 0xFF into Long Register
 7143 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7144   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7145 
 7146   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7147   ins_encode %{
 7148     __ movzbq($dst$$Register, $mem$$Address);
 7149   %}
 7150   ins_pipe(ialu_reg_mem);
 7151 %}
 7152 
 7153 // Load Integer with mask 0xFFFF into Long Register
 7154 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7155   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7156 
 7157   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7158   ins_encode %{
 7159     __ movzwq($dst$$Register, $mem$$Address);
 7160   %}
 7161   ins_pipe(ialu_reg_mem);
 7162 %}
 7163 
 7164 // Load Integer with a 31-bit mask into Long Register
 7165 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7166   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7167   effect(KILL cr);
 7168 
 7169   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7170             "andl    $dst, $mask" %}
 7171   ins_encode %{
 7172     Register Rdst = $dst$$Register;
 7173     __ movl(Rdst, $mem$$Address);
 7174     __ andl(Rdst, $mask$$constant);
 7175   %}
 7176   ins_pipe(ialu_reg_mem);
 7177 %}
 7178 
 7179 // Load Unsigned Integer into Long Register
 7180 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7181 %{
 7182   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7183 
 7184   ins_cost(125);
 7185   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7186 
 7187   ins_encode %{
 7188     __ movl($dst$$Register, $mem$$Address);
 7189   %}
 7190 
 7191   ins_pipe(ialu_reg_mem);
 7192 %}
 7193 
 7194 // Load Long
 7195 instruct loadL(rRegL dst, memory mem)
 7196 %{
 7197   match(Set dst (LoadL mem));
 7198 
 7199   ins_cost(125);
 7200   format %{ "movq    $dst, $mem\t# long" %}
 7201 
 7202   ins_encode %{
 7203     __ movq($dst$$Register, $mem$$Address);
 7204   %}
 7205 
 7206   ins_pipe(ialu_reg_mem); // XXX
 7207 %}
 7208 
 7209 // Load Range
 7210 instruct loadRange(rRegI dst, memory mem)
 7211 %{
 7212   match(Set dst (LoadRange mem));
 7213 
 7214   ins_cost(125); // XXX
 7215   format %{ "movl    $dst, $mem\t# range" %}
 7216   ins_encode %{
 7217     __ movl($dst$$Register, $mem$$Address);
 7218   %}
 7219   ins_pipe(ialu_reg_mem);
 7220 %}
 7221 
 7222 // Load Pointer
 7223 instruct loadP(rRegP dst, memory mem)
 7224 %{
 7225   match(Set dst (LoadP mem));
 7226   predicate(n->as_Load()->barrier_data() == 0);
 7227 
 7228   ins_cost(125); // XXX
 7229   format %{ "movq    $dst, $mem\t# ptr" %}
 7230   ins_encode %{
 7231     __ movq($dst$$Register, $mem$$Address);
 7232   %}
 7233   ins_pipe(ialu_reg_mem); // XXX
 7234 %}
 7235 
 7236 // Load Compressed Pointer
 7237 instruct loadN(rRegN dst, memory mem)
 7238 %{
 7239    predicate(n->as_Load()->barrier_data() == 0);
 7240    match(Set dst (LoadN mem));
 7241 
 7242    ins_cost(125); // XXX
 7243    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7244    ins_encode %{
 7245      __ movl($dst$$Register, $mem$$Address);
 7246    %}
 7247    ins_pipe(ialu_reg_mem); // XXX
 7248 %}
 7249 
 7250 
 7251 // Load Klass Pointer
 7252 instruct loadKlass(rRegP dst, memory mem)
 7253 %{
 7254   match(Set dst (LoadKlass mem));
 7255 
 7256   ins_cost(125); // XXX
 7257   format %{ "movq    $dst, $mem\t# class" %}
 7258   ins_encode %{
 7259     __ movq($dst$$Register, $mem$$Address);
 7260   %}
 7261   ins_pipe(ialu_reg_mem); // XXX
 7262 %}
 7263 
 7264 // Load narrow Klass Pointer
 7265 instruct loadNKlass(rRegN dst, memory mem)
 7266 %{
 7267   predicate(!UseCompactObjectHeaders);
 7268   match(Set dst (LoadNKlass mem));
 7269 
 7270   ins_cost(125); // XXX
 7271   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7272   ins_encode %{
 7273     __ movl($dst$$Register, $mem$$Address);
 7274   %}
 7275   ins_pipe(ialu_reg_mem); // XXX
 7276 %}
 7277 
 7278 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7279 %{
 7280   predicate(UseCompactObjectHeaders);
 7281   match(Set dst (LoadNKlass mem));
 7282   effect(KILL cr);
 7283   ins_cost(125);
 7284   format %{
 7285     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7286     "shrl    $dst, markWord::klass_shift_at_offset"
 7287   %}
 7288   ins_encode %{
 7289     if (UseAPX) {
 7290       __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
 7291     }
 7292     else {
 7293       __ movl($dst$$Register, $mem$$Address);
 7294       __ shrl($dst$$Register, markWord::klass_shift_at_offset);
 7295     }
 7296   %}
 7297   ins_pipe(ialu_reg_mem);
 7298 %}
 7299 
 7300 // Load Float
 7301 instruct loadF(regF dst, memory mem)
 7302 %{
 7303   match(Set dst (LoadF mem));
 7304 
 7305   ins_cost(145); // XXX
 7306   format %{ "movss   $dst, $mem\t# float" %}
 7307   ins_encode %{
 7308     __ movflt($dst$$XMMRegister, $mem$$Address);
 7309   %}
 7310   ins_pipe(pipe_slow); // XXX
 7311 %}
 7312 
 7313 // Load Double
 7314 instruct loadD_partial(regD dst, memory mem)
 7315 %{
 7316   predicate(!UseXmmLoadAndClearUpper);
 7317   match(Set dst (LoadD mem));
 7318 
 7319   ins_cost(145); // XXX
 7320   format %{ "movlpd  $dst, $mem\t# double" %}
 7321   ins_encode %{
 7322     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7323   %}
 7324   ins_pipe(pipe_slow); // XXX
 7325 %}
 7326 
 7327 instruct loadD(regD dst, memory mem)
 7328 %{
 7329   predicate(UseXmmLoadAndClearUpper);
 7330   match(Set dst (LoadD mem));
 7331 
 7332   ins_cost(145); // XXX
 7333   format %{ "movsd   $dst, $mem\t# double" %}
 7334   ins_encode %{
 7335     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7336   %}
 7337   ins_pipe(pipe_slow); // XXX
 7338 %}
 7339 
 7340 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
 7341 %{
 7342   match(Set dst con);
 7343 
 7344   format %{ "leaq  $dst, $con\t# AOT Runtime Constants Address" %}
 7345 
 7346   ins_encode %{
 7347     __ load_aotrc_address($dst$$Register, (address)$con$$constant);
 7348   %}
 7349 
 7350   ins_pipe(ialu_reg_fat);
 7351 %}
 7352 
 7353 // max = java.lang.Math.max(float a, float b)
 7354 instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
 7355   predicate(VM_Version::supports_avx10_2());
 7356   match(Set dst (MaxF a b));
 7357   format %{ "maxF $dst, $a, $b" %}
 7358   ins_encode %{
 7359     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
 7360   %}
 7361   ins_pipe( pipe_slow );
 7362 %}
 7363 
 7364 // max = java.lang.Math.max(float a, float b)
 7365 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7366   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7367   match(Set dst (MaxF a b));
 7368   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7369   format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7370   ins_encode %{
 7371     __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7372   %}
 7373   ins_pipe( pipe_slow );
 7374 %}
 7375 
 7376 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7377   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7378   match(Set dst (MaxF a b));
 7379   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7380 
 7381   format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
 7382   ins_encode %{
 7383     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7384                     false /*min*/, true /*single*/);
 7385   %}
 7386   ins_pipe( pipe_slow );
 7387 %}
 7388 
 7389 // max = java.lang.Math.max(double a, double b)
 7390 instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
 7391   predicate(VM_Version::supports_avx10_2());
 7392   match(Set dst (MaxD a b));
 7393   format %{ "maxD $dst, $a, $b" %}
 7394   ins_encode %{
 7395     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
 7396   %}
 7397   ins_pipe( pipe_slow );
 7398 %}
 7399 
 7400 // max = java.lang.Math.max(double a, double b)
 7401 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7402   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7403   match(Set dst (MaxD a b));
 7404   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7405   format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7406   ins_encode %{
 7407     __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7408   %}
 7409   ins_pipe( pipe_slow );
 7410 %}
 7411 
 7412 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7413   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7414   match(Set dst (MaxD a b));
 7415   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7416 
 7417   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7418   ins_encode %{
 7419     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7420                     false /*min*/, false /*single*/);
 7421   %}
 7422   ins_pipe( pipe_slow );
 7423 %}
 7424 
 7425 // max = java.lang.Math.min(float a, float b)
 7426 instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
 7427   predicate(VM_Version::supports_avx10_2());
 7428   match(Set dst (MinF a b));
 7429   format %{ "minF $dst, $a, $b" %}
 7430   ins_encode %{
 7431     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
 7432   %}
 7433   ins_pipe( pipe_slow );
 7434 %}
 7435 
 7436 // min = java.lang.Math.min(float a, float b)
 7437 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7438   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7439   match(Set dst (MinF a b));
 7440   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7441   format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7442   ins_encode %{
 7443     __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7444   %}
 7445   ins_pipe( pipe_slow );
 7446 %}
 7447 
 7448 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7449   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7450   match(Set dst (MinF a b));
 7451   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7452 
 7453   format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7454   ins_encode %{
 7455     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7456                     true /*min*/, true /*single*/);
 7457   %}
 7458   ins_pipe( pipe_slow );
 7459 %}
 7460 
 7461 // max = java.lang.Math.min(double a, double b)
 7462 instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
 7463   predicate(VM_Version::supports_avx10_2());
 7464   match(Set dst (MinD a b));
 7465   format %{ "minD $dst, $a, $b" %}
 7466   ins_encode %{
 7467     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
 7468   %}
 7469   ins_pipe( pipe_slow );
 7470 %}
 7471 
 7472 // min = java.lang.Math.min(double a, double b)
 7473 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7474   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7475   match(Set dst (MinD a b));
 7476   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7477     format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7478   ins_encode %{
 7479     __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7480   %}
 7481   ins_pipe( pipe_slow );
 7482 %}
 7483 
 7484 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7485   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7486   match(Set dst (MinD a b));
 7487   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7488 
 7489   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7490   ins_encode %{
 7491     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7492                     true /*min*/, false /*single*/);
 7493   %}
 7494   ins_pipe( pipe_slow );
 7495 %}
 7496 
 7497 // Load Effective Address
 7498 instruct leaP8(rRegP dst, indOffset8 mem)
 7499 %{
 7500   match(Set dst mem);
 7501 
 7502   ins_cost(110); // XXX
 7503   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7504   ins_encode %{
 7505     __ leaq($dst$$Register, $mem$$Address);
 7506   %}
 7507   ins_pipe(ialu_reg_reg_fat);
 7508 %}
 7509 
 7510 instruct leaP32(rRegP dst, indOffset32 mem)
 7511 %{
 7512   match(Set dst mem);
 7513 
 7514   ins_cost(110);
 7515   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7516   ins_encode %{
 7517     __ leaq($dst$$Register, $mem$$Address);
 7518   %}
 7519   ins_pipe(ialu_reg_reg_fat);
 7520 %}
 7521 
 7522 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7523 %{
 7524   match(Set dst mem);
 7525 
 7526   ins_cost(110);
 7527   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7528   ins_encode %{
 7529     __ leaq($dst$$Register, $mem$$Address);
 7530   %}
 7531   ins_pipe(ialu_reg_reg_fat);
 7532 %}
 7533 
 7534 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7535 %{
 7536   match(Set dst mem);
 7537 
 7538   ins_cost(110);
 7539   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7540   ins_encode %{
 7541     __ leaq($dst$$Register, $mem$$Address);
 7542   %}
 7543   ins_pipe(ialu_reg_reg_fat);
 7544 %}
 7545 
 7546 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7547 %{
 7548   match(Set dst mem);
 7549 
 7550   ins_cost(110);
 7551   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7552   ins_encode %{
 7553     __ leaq($dst$$Register, $mem$$Address);
 7554   %}
 7555   ins_pipe(ialu_reg_reg_fat);
 7556 %}
 7557 
 7558 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7559 %{
 7560   match(Set dst mem);
 7561 
 7562   ins_cost(110);
 7563   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7564   ins_encode %{
 7565     __ leaq($dst$$Register, $mem$$Address);
 7566   %}
 7567   ins_pipe(ialu_reg_reg_fat);
 7568 %}
 7569 
 7570 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7571 %{
 7572   match(Set dst mem);
 7573 
 7574   ins_cost(110);
 7575   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7576   ins_encode %{
 7577     __ leaq($dst$$Register, $mem$$Address);
 7578   %}
 7579   ins_pipe(ialu_reg_reg_fat);
 7580 %}
 7581 
 7582 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7583 %{
 7584   match(Set dst mem);
 7585 
 7586   ins_cost(110);
 7587   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7588   ins_encode %{
 7589     __ leaq($dst$$Register, $mem$$Address);
 7590   %}
 7591   ins_pipe(ialu_reg_reg_fat);
 7592 %}
 7593 
 7594 // Load Effective Address which uses Narrow (32-bits) oop
 7595 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7596 %{
 7597   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7598   match(Set dst mem);
 7599 
 7600   ins_cost(110);
 7601   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7602   ins_encode %{
 7603     __ leaq($dst$$Register, $mem$$Address);
 7604   %}
 7605   ins_pipe(ialu_reg_reg_fat);
 7606 %}
 7607 
 7608 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7609 %{
 7610   predicate(CompressedOops::shift() == 0);
 7611   match(Set dst mem);
 7612 
 7613   ins_cost(110); // XXX
 7614   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7615   ins_encode %{
 7616     __ leaq($dst$$Register, $mem$$Address);
 7617   %}
 7618   ins_pipe(ialu_reg_reg_fat);
 7619 %}
 7620 
 7621 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7622 %{
 7623   predicate(CompressedOops::shift() == 0);
 7624   match(Set dst mem);
 7625 
 7626   ins_cost(110);
 7627   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7628   ins_encode %{
 7629     __ leaq($dst$$Register, $mem$$Address);
 7630   %}
 7631   ins_pipe(ialu_reg_reg_fat);
 7632 %}
 7633 
 7634 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7635 %{
 7636   predicate(CompressedOops::shift() == 0);
 7637   match(Set dst mem);
 7638 
 7639   ins_cost(110);
 7640   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7641   ins_encode %{
 7642     __ leaq($dst$$Register, $mem$$Address);
 7643   %}
 7644   ins_pipe(ialu_reg_reg_fat);
 7645 %}
 7646 
 7647 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7648 %{
 7649   predicate(CompressedOops::shift() == 0);
 7650   match(Set dst mem);
 7651 
 7652   ins_cost(110);
 7653   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7654   ins_encode %{
 7655     __ leaq($dst$$Register, $mem$$Address);
 7656   %}
 7657   ins_pipe(ialu_reg_reg_fat);
 7658 %}
 7659 
 7660 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7661 %{
 7662   predicate(CompressedOops::shift() == 0);
 7663   match(Set dst mem);
 7664 
 7665   ins_cost(110);
 7666   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7667   ins_encode %{
 7668     __ leaq($dst$$Register, $mem$$Address);
 7669   %}
 7670   ins_pipe(ialu_reg_reg_fat);
 7671 %}
 7672 
 7673 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7674 %{
 7675   predicate(CompressedOops::shift() == 0);
 7676   match(Set dst mem);
 7677 
 7678   ins_cost(110);
 7679   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7680   ins_encode %{
 7681     __ leaq($dst$$Register, $mem$$Address);
 7682   %}
 7683   ins_pipe(ialu_reg_reg_fat);
 7684 %}
 7685 
 7686 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7687 %{
 7688   predicate(CompressedOops::shift() == 0);
 7689   match(Set dst mem);
 7690 
 7691   ins_cost(110);
 7692   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7693   ins_encode %{
 7694     __ leaq($dst$$Register, $mem$$Address);
 7695   %}
 7696   ins_pipe(ialu_reg_reg_fat);
 7697 %}
 7698 
 7699 instruct loadConI(rRegI dst, immI src)
 7700 %{
 7701   match(Set dst src);
 7702 
 7703   format %{ "movl    $dst, $src\t# int" %}
 7704   ins_encode %{
 7705     __ movl($dst$$Register, $src$$constant);
 7706   %}
 7707   ins_pipe(ialu_reg_fat); // XXX
 7708 %}
 7709 
 7710 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7711 %{
 7712   match(Set dst src);
 7713   effect(KILL cr);
 7714 
 7715   ins_cost(50);
 7716   format %{ "xorl    $dst, $dst\t# int" %}
 7717   ins_encode %{
 7718     __ xorl($dst$$Register, $dst$$Register);
 7719   %}
 7720   ins_pipe(ialu_reg);
 7721 %}
 7722 
 7723 instruct loadConL(rRegL dst, immL src)
 7724 %{
 7725   match(Set dst src);
 7726 
 7727   ins_cost(150);
 7728   format %{ "movq    $dst, $src\t# long" %}
 7729   ins_encode %{
 7730     __ mov64($dst$$Register, $src$$constant);
 7731   %}
 7732   ins_pipe(ialu_reg);
 7733 %}
 7734 
 7735 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7736 %{
 7737   match(Set dst src);
 7738   effect(KILL cr);
 7739 
 7740   ins_cost(50);
 7741   format %{ "xorl    $dst, $dst\t# long" %}
 7742   ins_encode %{
 7743     __ xorl($dst$$Register, $dst$$Register);
 7744   %}
 7745   ins_pipe(ialu_reg); // XXX
 7746 %}
 7747 
 7748 instruct loadConUL32(rRegL dst, immUL32 src)
 7749 %{
 7750   match(Set dst src);
 7751 
 7752   ins_cost(60);
 7753   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7754   ins_encode %{
 7755     __ movl($dst$$Register, $src$$constant);
 7756   %}
 7757   ins_pipe(ialu_reg);
 7758 %}
 7759 
 7760 instruct loadConL32(rRegL dst, immL32 src)
 7761 %{
 7762   match(Set dst src);
 7763 
 7764   ins_cost(70);
 7765   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7766   ins_encode %{
 7767     __ movq($dst$$Register, $src$$constant);
 7768   %}
 7769   ins_pipe(ialu_reg);
 7770 %}
 7771 
 7772 instruct loadConP(rRegP dst, immP con) %{
 7773   match(Set dst con);
 7774 
 7775   format %{ "movq    $dst, $con\t# ptr" %}
 7776   ins_encode %{
 7777     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7778   %}
 7779   ins_pipe(ialu_reg_fat); // XXX
 7780 %}
 7781 
 7782 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7783 %{
 7784   match(Set dst src);
 7785   effect(KILL cr);
 7786 
 7787   ins_cost(50);
 7788   format %{ "xorl    $dst, $dst\t# ptr" %}
 7789   ins_encode %{
 7790     __ xorl($dst$$Register, $dst$$Register);
 7791   %}
 7792   ins_pipe(ialu_reg);
 7793 %}
 7794 
 7795 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7796 %{
 7797   match(Set dst src);
 7798   effect(KILL cr);
 7799 
 7800   ins_cost(60);
 7801   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7802   ins_encode %{
 7803     __ movl($dst$$Register, $src$$constant);
 7804   %}
 7805   ins_pipe(ialu_reg);
 7806 %}
 7807 
 7808 instruct loadConF(regF dst, immF con) %{
 7809   match(Set dst con);
 7810   ins_cost(125);
 7811   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7812   ins_encode %{
 7813     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7814   %}
 7815   ins_pipe(pipe_slow);
 7816 %}
 7817 
 7818 instruct loadConH(regF dst, immH con) %{
 7819   match(Set dst con);
 7820   ins_cost(125);
 7821   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7822   ins_encode %{
 7823     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7824   %}
 7825   ins_pipe(pipe_slow);
 7826 %}
 7827 
 7828 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7829   match(Set dst src);
 7830   effect(KILL cr);
 7831   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7832   ins_encode %{
 7833     __ xorq($dst$$Register, $dst$$Register);
 7834   %}
 7835   ins_pipe(ialu_reg);
 7836 %}
 7837 
 7838 instruct loadConN(rRegN dst, immN src) %{
 7839   match(Set dst src);
 7840 
 7841   ins_cost(125);
 7842   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7843   ins_encode %{
 7844     address con = (address)$src$$constant;
 7845     if (con == nullptr) {
 7846       ShouldNotReachHere();
 7847     } else {
 7848       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7849     }
 7850   %}
 7851   ins_pipe(ialu_reg_fat); // XXX
 7852 %}
 7853 
 7854 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7855   match(Set dst src);
 7856 
 7857   ins_cost(125);
 7858   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7859   ins_encode %{
 7860     address con = (address)$src$$constant;
 7861     if (con == nullptr) {
 7862       ShouldNotReachHere();
 7863     } else {
 7864       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7865     }
 7866   %}
 7867   ins_pipe(ialu_reg_fat); // XXX
 7868 %}
 7869 
 7870 instruct loadConF0(regF dst, immF0 src)
 7871 %{
 7872   match(Set dst src);
 7873   ins_cost(100);
 7874 
 7875   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7876   ins_encode %{
 7877     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7878   %}
 7879   ins_pipe(pipe_slow);
 7880 %}
 7881 
 7882 // Use the same format since predicate() can not be used here.
 7883 instruct loadConD(regD dst, immD con) %{
 7884   match(Set dst con);
 7885   ins_cost(125);
 7886   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7887   ins_encode %{
 7888     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7889   %}
 7890   ins_pipe(pipe_slow);
 7891 %}
 7892 
 7893 instruct loadConD0(regD dst, immD0 src)
 7894 %{
 7895   match(Set dst src);
 7896   ins_cost(100);
 7897 
 7898   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7899   ins_encode %{
 7900     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7901   %}
 7902   ins_pipe(pipe_slow);
 7903 %}
 7904 
 7905 instruct loadSSI(rRegI dst, stackSlotI src)
 7906 %{
 7907   match(Set dst src);
 7908 
 7909   ins_cost(125);
 7910   format %{ "movl    $dst, $src\t# int stk" %}
 7911   ins_encode %{
 7912     __ movl($dst$$Register, $src$$Address);
 7913   %}
 7914   ins_pipe(ialu_reg_mem);
 7915 %}
 7916 
 7917 instruct loadSSL(rRegL dst, stackSlotL src)
 7918 %{
 7919   match(Set dst src);
 7920 
 7921   ins_cost(125);
 7922   format %{ "movq    $dst, $src\t# long stk" %}
 7923   ins_encode %{
 7924     __ movq($dst$$Register, $src$$Address);
 7925   %}
 7926   ins_pipe(ialu_reg_mem);
 7927 %}
 7928 
 7929 instruct loadSSP(rRegP dst, stackSlotP src)
 7930 %{
 7931   match(Set dst src);
 7932 
 7933   ins_cost(125);
 7934   format %{ "movq    $dst, $src\t# ptr stk" %}
 7935   ins_encode %{
 7936     __ movq($dst$$Register, $src$$Address);
 7937   %}
 7938   ins_pipe(ialu_reg_mem);
 7939 %}
 7940 
 7941 instruct loadSSF(regF dst, stackSlotF src)
 7942 %{
 7943   match(Set dst src);
 7944 
 7945   ins_cost(125);
 7946   format %{ "movss   $dst, $src\t# float stk" %}
 7947   ins_encode %{
 7948     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 7949   %}
 7950   ins_pipe(pipe_slow); // XXX
 7951 %}
 7952 
 7953 // Use the same format since predicate() can not be used here.
 7954 instruct loadSSD(regD dst, stackSlotD src)
 7955 %{
 7956   match(Set dst src);
 7957 
 7958   ins_cost(125);
 7959   format %{ "movsd   $dst, $src\t# double stk" %}
 7960   ins_encode  %{
 7961     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 7962   %}
 7963   ins_pipe(pipe_slow); // XXX
 7964 %}
 7965 
 7966 // Prefetch instructions for allocation.
 7967 // Must be safe to execute with invalid address (cannot fault).
 7968 
 7969 instruct prefetchAlloc( memory mem ) %{
 7970   predicate(AllocatePrefetchInstr==3);
 7971   match(PrefetchAllocation mem);
 7972   ins_cost(125);
 7973 
 7974   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 7975   ins_encode %{
 7976     __ prefetchw($mem$$Address);
 7977   %}
 7978   ins_pipe(ialu_mem);
 7979 %}
 7980 
 7981 instruct prefetchAllocNTA( memory mem ) %{
 7982   predicate(AllocatePrefetchInstr==0);
 7983   match(PrefetchAllocation mem);
 7984   ins_cost(125);
 7985 
 7986   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 7987   ins_encode %{
 7988     __ prefetchnta($mem$$Address);
 7989   %}
 7990   ins_pipe(ialu_mem);
 7991 %}
 7992 
 7993 instruct prefetchAllocT0( memory mem ) %{
 7994   predicate(AllocatePrefetchInstr==1);
 7995   match(PrefetchAllocation mem);
 7996   ins_cost(125);
 7997 
 7998   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 7999   ins_encode %{
 8000     __ prefetcht0($mem$$Address);
 8001   %}
 8002   ins_pipe(ialu_mem);
 8003 %}
 8004 
 8005 instruct prefetchAllocT2( memory mem ) %{
 8006   predicate(AllocatePrefetchInstr==2);
 8007   match(PrefetchAllocation mem);
 8008   ins_cost(125);
 8009 
 8010   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 8011   ins_encode %{
 8012     __ prefetcht2($mem$$Address);
 8013   %}
 8014   ins_pipe(ialu_mem);
 8015 %}
 8016 
 8017 //----------Store Instructions-------------------------------------------------
 8018 
 8019 // Store Byte
 8020 instruct storeB(memory mem, rRegI src)
 8021 %{
 8022   match(Set mem (StoreB mem src));
 8023 
 8024   ins_cost(125); // XXX
 8025   format %{ "movb    $mem, $src\t# byte" %}
 8026   ins_encode %{
 8027     __ movb($mem$$Address, $src$$Register);
 8028   %}
 8029   ins_pipe(ialu_mem_reg);
 8030 %}
 8031 
 8032 // Store Char/Short
 8033 instruct storeC(memory mem, rRegI src)
 8034 %{
 8035   match(Set mem (StoreC mem src));
 8036 
 8037   ins_cost(125); // XXX
 8038   format %{ "movw    $mem, $src\t# char/short" %}
 8039   ins_encode %{
 8040     __ movw($mem$$Address, $src$$Register);
 8041   %}
 8042   ins_pipe(ialu_mem_reg);
 8043 %}
 8044 
 8045 // Store Integer
 8046 instruct storeI(memory mem, rRegI src)
 8047 %{
 8048   match(Set mem (StoreI mem src));
 8049 
 8050   ins_cost(125); // XXX
 8051   format %{ "movl    $mem, $src\t# int" %}
 8052   ins_encode %{
 8053     __ movl($mem$$Address, $src$$Register);
 8054   %}
 8055   ins_pipe(ialu_mem_reg);
 8056 %}
 8057 
 8058 // Store Long
 8059 instruct storeL(memory mem, rRegL src)
 8060 %{
 8061   match(Set mem (StoreL mem src));
 8062 
 8063   ins_cost(125); // XXX
 8064   format %{ "movq    $mem, $src\t# long" %}
 8065   ins_encode %{
 8066     __ movq($mem$$Address, $src$$Register);
 8067   %}
 8068   ins_pipe(ialu_mem_reg); // XXX
 8069 %}
 8070 
 8071 // Store Pointer
 8072 instruct storeP(memory mem, any_RegP src)
 8073 %{
 8074   predicate(n->as_Store()->barrier_data() == 0);
 8075   match(Set mem (StoreP mem src));
 8076 
 8077   ins_cost(125); // XXX
 8078   format %{ "movq    $mem, $src\t# ptr" %}
 8079   ins_encode %{
 8080     __ movq($mem$$Address, $src$$Register);
 8081   %}
 8082   ins_pipe(ialu_mem_reg);
 8083 %}
 8084 
 8085 instruct storeImmP0(memory mem, immP0 zero)
 8086 %{
 8087   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 8088   match(Set mem (StoreP mem zero));
 8089 
 8090   ins_cost(125); // XXX
 8091   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 8092   ins_encode %{
 8093     __ movq($mem$$Address, r12);
 8094   %}
 8095   ins_pipe(ialu_mem_reg);
 8096 %}
 8097 
 8098 // Store Null Pointer, mark word, or other simple pointer constant.
 8099 instruct storeImmP(memory mem, immP31 src)
 8100 %{
 8101   predicate(n->as_Store()->barrier_data() == 0);
 8102   match(Set mem (StoreP mem src));
 8103 
 8104   ins_cost(150); // XXX
 8105   format %{ "movq    $mem, $src\t# ptr" %}
 8106   ins_encode %{
 8107     __ movq($mem$$Address, $src$$constant);
 8108   %}
 8109   ins_pipe(ialu_mem_imm);
 8110 %}
 8111 
 8112 // Store Compressed Pointer
 8113 instruct storeN(memory mem, rRegN src)
 8114 %{
 8115   predicate(n->as_Store()->barrier_data() == 0);
 8116   match(Set mem (StoreN mem src));
 8117 
 8118   ins_cost(125); // XXX
 8119   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8120   ins_encode %{
 8121     __ movl($mem$$Address, $src$$Register);
 8122   %}
 8123   ins_pipe(ialu_mem_reg);
 8124 %}
 8125 
 8126 instruct storeNKlass(memory mem, rRegN src)
 8127 %{
 8128   match(Set mem (StoreNKlass mem src));
 8129 
 8130   ins_cost(125); // XXX
 8131   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8132   ins_encode %{
 8133     __ movl($mem$$Address, $src$$Register);
 8134   %}
 8135   ins_pipe(ialu_mem_reg);
 8136 %}
 8137 
 8138 instruct storeImmN0(memory mem, immN0 zero)
 8139 %{
 8140   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8141   match(Set mem (StoreN mem zero));
 8142 
 8143   ins_cost(125); // XXX
 8144   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8145   ins_encode %{
 8146     __ movl($mem$$Address, r12);
 8147   %}
 8148   ins_pipe(ialu_mem_reg);
 8149 %}
 8150 
 8151 instruct storeImmN(memory mem, immN src)
 8152 %{
 8153   predicate(n->as_Store()->barrier_data() == 0);
 8154   match(Set mem (StoreN mem src));
 8155 
 8156   ins_cost(150); // XXX
 8157   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8158   ins_encode %{
 8159     address con = (address)$src$$constant;
 8160     if (con == nullptr) {
 8161       __ movl($mem$$Address, 0);
 8162     } else {
 8163       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8164     }
 8165   %}
 8166   ins_pipe(ialu_mem_imm);
 8167 %}
 8168 
 8169 instruct storeImmNKlass(memory mem, immNKlass src)
 8170 %{
 8171   match(Set mem (StoreNKlass mem src));
 8172 
 8173   ins_cost(150); // XXX
 8174   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8175   ins_encode %{
 8176     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8177   %}
 8178   ins_pipe(ialu_mem_imm);
 8179 %}
 8180 
 8181 // Store Integer Immediate
 8182 instruct storeImmI0(memory mem, immI_0 zero)
 8183 %{
 8184   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8185   match(Set mem (StoreI mem zero));
 8186 
 8187   ins_cost(125); // XXX
 8188   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8189   ins_encode %{
 8190     __ movl($mem$$Address, r12);
 8191   %}
 8192   ins_pipe(ialu_mem_reg);
 8193 %}
 8194 
 8195 instruct storeImmI(memory mem, immI src)
 8196 %{
 8197   match(Set mem (StoreI mem src));
 8198 
 8199   ins_cost(150);
 8200   format %{ "movl    $mem, $src\t# int" %}
 8201   ins_encode %{
 8202     __ movl($mem$$Address, $src$$constant);
 8203   %}
 8204   ins_pipe(ialu_mem_imm);
 8205 %}
 8206 
 8207 // Store Long Immediate
 8208 instruct storeImmL0(memory mem, immL0 zero)
 8209 %{
 8210   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8211   match(Set mem (StoreL mem zero));
 8212 
 8213   ins_cost(125); // XXX
 8214   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8215   ins_encode %{
 8216     __ movq($mem$$Address, r12);
 8217   %}
 8218   ins_pipe(ialu_mem_reg);
 8219 %}
 8220 
 8221 instruct storeImmL(memory mem, immL32 src)
 8222 %{
 8223   match(Set mem (StoreL mem src));
 8224 
 8225   ins_cost(150);
 8226   format %{ "movq    $mem, $src\t# long" %}
 8227   ins_encode %{
 8228     __ movq($mem$$Address, $src$$constant);
 8229   %}
 8230   ins_pipe(ialu_mem_imm);
 8231 %}
 8232 
 8233 // Store Short/Char Immediate
 8234 instruct storeImmC0(memory mem, immI_0 zero)
 8235 %{
 8236   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8237   match(Set mem (StoreC mem zero));
 8238 
 8239   ins_cost(125); // XXX
 8240   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8241   ins_encode %{
 8242     __ movw($mem$$Address, r12);
 8243   %}
 8244   ins_pipe(ialu_mem_reg);
 8245 %}
 8246 
 8247 instruct storeImmI16(memory mem, immI16 src)
 8248 %{
 8249   predicate(UseStoreImmI16);
 8250   match(Set mem (StoreC mem src));
 8251 
 8252   ins_cost(150);
 8253   format %{ "movw    $mem, $src\t# short/char" %}
 8254   ins_encode %{
 8255     __ movw($mem$$Address, $src$$constant);
 8256   %}
 8257   ins_pipe(ialu_mem_imm);
 8258 %}
 8259 
 8260 // Store Byte Immediate
 8261 instruct storeImmB0(memory mem, immI_0 zero)
 8262 %{
 8263   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8264   match(Set mem (StoreB mem zero));
 8265 
 8266   ins_cost(125); // XXX
 8267   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8268   ins_encode %{
 8269     __ movb($mem$$Address, r12);
 8270   %}
 8271   ins_pipe(ialu_mem_reg);
 8272 %}
 8273 
 8274 instruct storeImmB(memory mem, immI8 src)
 8275 %{
 8276   match(Set mem (StoreB mem src));
 8277 
 8278   ins_cost(150); // XXX
 8279   format %{ "movb    $mem, $src\t# byte" %}
 8280   ins_encode %{
 8281     __ movb($mem$$Address, $src$$constant);
 8282   %}
 8283   ins_pipe(ialu_mem_imm);
 8284 %}
 8285 
 8286 // Store Float
 8287 instruct storeF(memory mem, regF src)
 8288 %{
 8289   match(Set mem (StoreF mem src));
 8290 
 8291   ins_cost(95); // XXX
 8292   format %{ "movss   $mem, $src\t# float" %}
 8293   ins_encode %{
 8294     __ movflt($mem$$Address, $src$$XMMRegister);
 8295   %}
 8296   ins_pipe(pipe_slow); // XXX
 8297 %}
 8298 
 8299 // Store immediate Float value (it is faster than store from XMM register)
 8300 instruct storeF0(memory mem, immF0 zero)
 8301 %{
 8302   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8303   match(Set mem (StoreF mem zero));
 8304 
 8305   ins_cost(25); // XXX
 8306   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8307   ins_encode %{
 8308     __ movl($mem$$Address, r12);
 8309   %}
 8310   ins_pipe(ialu_mem_reg);
 8311 %}
 8312 
 8313 instruct storeF_imm(memory mem, immF src)
 8314 %{
 8315   match(Set mem (StoreF mem src));
 8316 
 8317   ins_cost(50);
 8318   format %{ "movl    $mem, $src\t# float" %}
 8319   ins_encode %{
 8320     __ movl($mem$$Address, jint_cast($src$$constant));
 8321   %}
 8322   ins_pipe(ialu_mem_imm);
 8323 %}
 8324 
 8325 // Store Double
 8326 instruct storeD(memory mem, regD src)
 8327 %{
 8328   match(Set mem (StoreD mem src));
 8329 
 8330   ins_cost(95); // XXX
 8331   format %{ "movsd   $mem, $src\t# double" %}
 8332   ins_encode %{
 8333     __ movdbl($mem$$Address, $src$$XMMRegister);
 8334   %}
 8335   ins_pipe(pipe_slow); // XXX
 8336 %}
 8337 
 8338 // Store immediate double 0.0 (it is faster than store from XMM register)
 8339 instruct storeD0_imm(memory mem, immD0 src)
 8340 %{
 8341   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8342   match(Set mem (StoreD mem src));
 8343 
 8344   ins_cost(50);
 8345   format %{ "movq    $mem, $src\t# double 0." %}
 8346   ins_encode %{
 8347     __ movq($mem$$Address, $src$$constant);
 8348   %}
 8349   ins_pipe(ialu_mem_imm);
 8350 %}
 8351 
 8352 instruct storeD0(memory mem, immD0 zero)
 8353 %{
 8354   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8355   match(Set mem (StoreD mem zero));
 8356 
 8357   ins_cost(25); // XXX
 8358   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8359   ins_encode %{
 8360     __ movq($mem$$Address, r12);
 8361   %}
 8362   ins_pipe(ialu_mem_reg);
 8363 %}
 8364 
 8365 instruct storeSSI(stackSlotI dst, rRegI src)
 8366 %{
 8367   match(Set dst src);
 8368 
 8369   ins_cost(100);
 8370   format %{ "movl    $dst, $src\t# int stk" %}
 8371   ins_encode %{
 8372     __ movl($dst$$Address, $src$$Register);
 8373   %}
 8374   ins_pipe( ialu_mem_reg );
 8375 %}
 8376 
 8377 instruct storeSSL(stackSlotL dst, rRegL src)
 8378 %{
 8379   match(Set dst src);
 8380 
 8381   ins_cost(100);
 8382   format %{ "movq    $dst, $src\t# long stk" %}
 8383   ins_encode %{
 8384     __ movq($dst$$Address, $src$$Register);
 8385   %}
 8386   ins_pipe(ialu_mem_reg);
 8387 %}
 8388 
 8389 instruct storeSSP(stackSlotP dst, rRegP src)
 8390 %{
 8391   match(Set dst src);
 8392 
 8393   ins_cost(100);
 8394   format %{ "movq    $dst, $src\t# ptr stk" %}
 8395   ins_encode %{
 8396     __ movq($dst$$Address, $src$$Register);
 8397   %}
 8398   ins_pipe(ialu_mem_reg);
 8399 %}
 8400 
 8401 instruct storeSSF(stackSlotF dst, regF src)
 8402 %{
 8403   match(Set dst src);
 8404 
 8405   ins_cost(95); // XXX
 8406   format %{ "movss   $dst, $src\t# float stk" %}
 8407   ins_encode %{
 8408     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8409   %}
 8410   ins_pipe(pipe_slow); // XXX
 8411 %}
 8412 
 8413 instruct storeSSD(stackSlotD dst, regD src)
 8414 %{
 8415   match(Set dst src);
 8416 
 8417   ins_cost(95); // XXX
 8418   format %{ "movsd   $dst, $src\t# double stk" %}
 8419   ins_encode %{
 8420     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8421   %}
 8422   ins_pipe(pipe_slow); // XXX
 8423 %}
 8424 
 8425 instruct cacheWB(indirect addr)
 8426 %{
 8427   predicate(VM_Version::supports_data_cache_line_flush());
 8428   match(CacheWB addr);
 8429 
 8430   ins_cost(100);
 8431   format %{"cache wb $addr" %}
 8432   ins_encode %{
 8433     assert($addr->index_position() < 0, "should be");
 8434     assert($addr$$disp == 0, "should be");
 8435     __ cache_wb(Address($addr$$base$$Register, 0));
 8436   %}
 8437   ins_pipe(pipe_slow); // XXX
 8438 %}
 8439 
 8440 instruct cacheWBPreSync()
 8441 %{
 8442   predicate(VM_Version::supports_data_cache_line_flush());
 8443   match(CacheWBPreSync);
 8444 
 8445   ins_cost(100);
 8446   format %{"cache wb presync" %}
 8447   ins_encode %{
 8448     __ cache_wbsync(true);
 8449   %}
 8450   ins_pipe(pipe_slow); // XXX
 8451 %}
 8452 
 8453 instruct cacheWBPostSync()
 8454 %{
 8455   predicate(VM_Version::supports_data_cache_line_flush());
 8456   match(CacheWBPostSync);
 8457 
 8458   ins_cost(100);
 8459   format %{"cache wb postsync" %}
 8460   ins_encode %{
 8461     __ cache_wbsync(false);
 8462   %}
 8463   ins_pipe(pipe_slow); // XXX
 8464 %}
 8465 
 8466 //----------BSWAP Instructions-------------------------------------------------
 8467 instruct bytes_reverse_int(rRegI dst) %{
 8468   match(Set dst (ReverseBytesI dst));
 8469 
 8470   format %{ "bswapl  $dst" %}
 8471   ins_encode %{
 8472     __ bswapl($dst$$Register);
 8473   %}
 8474   ins_pipe( ialu_reg );
 8475 %}
 8476 
 8477 instruct bytes_reverse_long(rRegL dst) %{
 8478   match(Set dst (ReverseBytesL dst));
 8479 
 8480   format %{ "bswapq  $dst" %}
 8481   ins_encode %{
 8482     __ bswapq($dst$$Register);
 8483   %}
 8484   ins_pipe( ialu_reg);
 8485 %}
 8486 
 8487 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8488   match(Set dst (ReverseBytesUS dst));
 8489   effect(KILL cr);
 8490 
 8491   format %{ "bswapl  $dst\n\t"
 8492             "shrl    $dst,16\n\t" %}
 8493   ins_encode %{
 8494     __ bswapl($dst$$Register);
 8495     __ shrl($dst$$Register, 16);
 8496   %}
 8497   ins_pipe( ialu_reg );
 8498 %}
 8499 
 8500 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8501   match(Set dst (ReverseBytesS dst));
 8502   effect(KILL cr);
 8503 
 8504   format %{ "bswapl  $dst\n\t"
 8505             "sar     $dst,16\n\t" %}
 8506   ins_encode %{
 8507     __ bswapl($dst$$Register);
 8508     __ sarl($dst$$Register, 16);
 8509   %}
 8510   ins_pipe( ialu_reg );
 8511 %}
 8512 
 8513 //---------- Zeros Count Instructions ------------------------------------------
 8514 
 8515 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8516   predicate(UseCountLeadingZerosInstruction);
 8517   match(Set dst (CountLeadingZerosI src));
 8518   effect(KILL cr);
 8519 
 8520   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8521   ins_encode %{
 8522     __ lzcntl($dst$$Register, $src$$Register);
 8523   %}
 8524   ins_pipe(ialu_reg);
 8525 %}
 8526 
 8527 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8528   predicate(UseCountLeadingZerosInstruction);
 8529   match(Set dst (CountLeadingZerosI (LoadI src)));
 8530   effect(KILL cr);
 8531   ins_cost(175);
 8532   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8533   ins_encode %{
 8534     __ lzcntl($dst$$Register, $src$$Address);
 8535   %}
 8536   ins_pipe(ialu_reg_mem);
 8537 %}
 8538 
 8539 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8540   predicate(!UseCountLeadingZerosInstruction);
 8541   match(Set dst (CountLeadingZerosI src));
 8542   effect(KILL cr);
 8543 
 8544   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8545             "jnz     skip\n\t"
 8546             "movl    $dst, -1\n"
 8547       "skip:\n\t"
 8548             "negl    $dst\n\t"
 8549             "addl    $dst, 31" %}
 8550   ins_encode %{
 8551     Register Rdst = $dst$$Register;
 8552     Register Rsrc = $src$$Register;
 8553     Label skip;
 8554     __ bsrl(Rdst, Rsrc);
 8555     __ jccb(Assembler::notZero, skip);
 8556     __ movl(Rdst, -1);
 8557     __ bind(skip);
 8558     __ negl(Rdst);
 8559     __ addl(Rdst, BitsPerInt - 1);
 8560   %}
 8561   ins_pipe(ialu_reg);
 8562 %}
 8563 
 8564 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8565   predicate(UseCountLeadingZerosInstruction);
 8566   match(Set dst (CountLeadingZerosL src));
 8567   effect(KILL cr);
 8568 
 8569   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8570   ins_encode %{
 8571     __ lzcntq($dst$$Register, $src$$Register);
 8572   %}
 8573   ins_pipe(ialu_reg);
 8574 %}
 8575 
 8576 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8577   predicate(UseCountLeadingZerosInstruction);
 8578   match(Set dst (CountLeadingZerosL (LoadL src)));
 8579   effect(KILL cr);
 8580   ins_cost(175);
 8581   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8582   ins_encode %{
 8583     __ lzcntq($dst$$Register, $src$$Address);
 8584   %}
 8585   ins_pipe(ialu_reg_mem);
 8586 %}
 8587 
 8588 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8589   predicate(!UseCountLeadingZerosInstruction);
 8590   match(Set dst (CountLeadingZerosL src));
 8591   effect(KILL cr);
 8592 
 8593   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8594             "jnz     skip\n\t"
 8595             "movl    $dst, -1\n"
 8596       "skip:\n\t"
 8597             "negl    $dst\n\t"
 8598             "addl    $dst, 63" %}
 8599   ins_encode %{
 8600     Register Rdst = $dst$$Register;
 8601     Register Rsrc = $src$$Register;
 8602     Label skip;
 8603     __ bsrq(Rdst, Rsrc);
 8604     __ jccb(Assembler::notZero, skip);
 8605     __ movl(Rdst, -1);
 8606     __ bind(skip);
 8607     __ negl(Rdst);
 8608     __ addl(Rdst, BitsPerLong - 1);
 8609   %}
 8610   ins_pipe(ialu_reg);
 8611 %}
 8612 
 8613 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8614   predicate(UseCountTrailingZerosInstruction);
 8615   match(Set dst (CountTrailingZerosI src));
 8616   effect(KILL cr);
 8617 
 8618   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8619   ins_encode %{
 8620     __ tzcntl($dst$$Register, $src$$Register);
 8621   %}
 8622   ins_pipe(ialu_reg);
 8623 %}
 8624 
 8625 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8626   predicate(UseCountTrailingZerosInstruction);
 8627   match(Set dst (CountTrailingZerosI (LoadI src)));
 8628   effect(KILL cr);
 8629   ins_cost(175);
 8630   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8631   ins_encode %{
 8632     __ tzcntl($dst$$Register, $src$$Address);
 8633   %}
 8634   ins_pipe(ialu_reg_mem);
 8635 %}
 8636 
 8637 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8638   predicate(!UseCountTrailingZerosInstruction);
 8639   match(Set dst (CountTrailingZerosI src));
 8640   effect(KILL cr);
 8641 
 8642   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8643             "jnz     done\n\t"
 8644             "movl    $dst, 32\n"
 8645       "done:" %}
 8646   ins_encode %{
 8647     Register Rdst = $dst$$Register;
 8648     Label done;
 8649     __ bsfl(Rdst, $src$$Register);
 8650     __ jccb(Assembler::notZero, done);
 8651     __ movl(Rdst, BitsPerInt);
 8652     __ bind(done);
 8653   %}
 8654   ins_pipe(ialu_reg);
 8655 %}
 8656 
 8657 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8658   predicate(UseCountTrailingZerosInstruction);
 8659   match(Set dst (CountTrailingZerosL src));
 8660   effect(KILL cr);
 8661 
 8662   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8663   ins_encode %{
 8664     __ tzcntq($dst$$Register, $src$$Register);
 8665   %}
 8666   ins_pipe(ialu_reg);
 8667 %}
 8668 
 8669 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8670   predicate(UseCountTrailingZerosInstruction);
 8671   match(Set dst (CountTrailingZerosL (LoadL src)));
 8672   effect(KILL cr);
 8673   ins_cost(175);
 8674   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8675   ins_encode %{
 8676     __ tzcntq($dst$$Register, $src$$Address);
 8677   %}
 8678   ins_pipe(ialu_reg_mem);
 8679 %}
 8680 
 8681 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8682   predicate(!UseCountTrailingZerosInstruction);
 8683   match(Set dst (CountTrailingZerosL src));
 8684   effect(KILL cr);
 8685 
 8686   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8687             "jnz     done\n\t"
 8688             "movl    $dst, 64\n"
 8689       "done:" %}
 8690   ins_encode %{
 8691     Register Rdst = $dst$$Register;
 8692     Label done;
 8693     __ bsfq(Rdst, $src$$Register);
 8694     __ jccb(Assembler::notZero, done);
 8695     __ movl(Rdst, BitsPerLong);
 8696     __ bind(done);
 8697   %}
 8698   ins_pipe(ialu_reg);
 8699 %}
 8700 
 8701 //--------------- Reverse Operation Instructions ----------------
 8702 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8703   predicate(!VM_Version::supports_gfni());
 8704   match(Set dst (ReverseI src));
 8705   effect(TEMP dst, TEMP rtmp, KILL cr);
 8706   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8707   ins_encode %{
 8708     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8709   %}
 8710   ins_pipe( ialu_reg );
 8711 %}
 8712 
 8713 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8714   predicate(VM_Version::supports_gfni());
 8715   match(Set dst (ReverseI src));
 8716   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8717   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8718   ins_encode %{
 8719     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8720   %}
 8721   ins_pipe( ialu_reg );
 8722 %}
 8723 
 8724 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8725   predicate(!VM_Version::supports_gfni());
 8726   match(Set dst (ReverseL src));
 8727   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8728   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8729   ins_encode %{
 8730     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8731   %}
 8732   ins_pipe( ialu_reg );
 8733 %}
 8734 
 8735 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8736   predicate(VM_Version::supports_gfni());
 8737   match(Set dst (ReverseL src));
 8738   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8739   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8740   ins_encode %{
 8741     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8742   %}
 8743   ins_pipe( ialu_reg );
 8744 %}
 8745 
 8746 //---------- Population Count Instructions -------------------------------------
 8747 
 8748 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8749   predicate(UsePopCountInstruction);
 8750   match(Set dst (PopCountI src));
 8751   effect(KILL cr);
 8752 
 8753   format %{ "popcnt  $dst, $src" %}
 8754   ins_encode %{
 8755     __ popcntl($dst$$Register, $src$$Register);
 8756   %}
 8757   ins_pipe(ialu_reg);
 8758 %}
 8759 
 8760 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8761   predicate(UsePopCountInstruction);
 8762   match(Set dst (PopCountI (LoadI mem)));
 8763   effect(KILL cr);
 8764 
 8765   format %{ "popcnt  $dst, $mem" %}
 8766   ins_encode %{
 8767     __ popcntl($dst$$Register, $mem$$Address);
 8768   %}
 8769   ins_pipe(ialu_reg);
 8770 %}
 8771 
 8772 // Note: Long.bitCount(long) returns an int.
 8773 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8774   predicate(UsePopCountInstruction);
 8775   match(Set dst (PopCountL src));
 8776   effect(KILL cr);
 8777 
 8778   format %{ "popcnt  $dst, $src" %}
 8779   ins_encode %{
 8780     __ popcntq($dst$$Register, $src$$Register);
 8781   %}
 8782   ins_pipe(ialu_reg);
 8783 %}
 8784 
 8785 // Note: Long.bitCount(long) returns an int.
 8786 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8787   predicate(UsePopCountInstruction);
 8788   match(Set dst (PopCountL (LoadL mem)));
 8789   effect(KILL cr);
 8790 
 8791   format %{ "popcnt  $dst, $mem" %}
 8792   ins_encode %{
 8793     __ popcntq($dst$$Register, $mem$$Address);
 8794   %}
 8795   ins_pipe(ialu_reg);
 8796 %}
 8797 
 8798 
 8799 //----------MemBar Instructions-----------------------------------------------
 8800 // Memory barrier flavors
 8801 
 8802 instruct membar_acquire()
 8803 %{
 8804   match(MemBarAcquire);
 8805   match(LoadFence);
 8806   ins_cost(0);
 8807 
 8808   size(0);
 8809   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8810   ins_encode();
 8811   ins_pipe(empty);
 8812 %}
 8813 
 8814 instruct membar_acquire_lock()
 8815 %{
 8816   match(MemBarAcquireLock);
 8817   ins_cost(0);
 8818 
 8819   size(0);
 8820   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8821   ins_encode();
 8822   ins_pipe(empty);
 8823 %}
 8824 
 8825 instruct membar_release()
 8826 %{
 8827   match(MemBarRelease);
 8828   match(StoreFence);
 8829   ins_cost(0);
 8830 
 8831   size(0);
 8832   format %{ "MEMBAR-release ! (empty encoding)" %}
 8833   ins_encode();
 8834   ins_pipe(empty);
 8835 %}
 8836 
 8837 instruct membar_release_lock()
 8838 %{
 8839   match(MemBarReleaseLock);
 8840   ins_cost(0);
 8841 
 8842   size(0);
 8843   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8844   ins_encode();
 8845   ins_pipe(empty);
 8846 %}
 8847 
 8848 instruct membar_storeload(rFlagsReg cr) %{
 8849   match(MemBarStoreLoad);
 8850   effect(KILL cr);
 8851   ins_cost(400);
 8852 
 8853   format %{
 8854     $$template
 8855     $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
 8856   %}
 8857   ins_encode %{
 8858     __ membar(Assembler::StoreLoad);
 8859   %}
 8860   ins_pipe(pipe_slow);
 8861 %}
 8862 
 8863 instruct membar_volatile(rFlagsReg cr) %{
 8864   match(MemBarVolatile);
 8865   effect(KILL cr);
 8866   ins_cost(400);
 8867 
 8868   format %{
 8869     $$template
 8870     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8871   %}
 8872   ins_encode %{
 8873     __ membar(Assembler::StoreLoad);
 8874   %}
 8875   ins_pipe(pipe_slow);
 8876 %}
 8877 
 8878 instruct unnecessary_membar_volatile()
 8879 %{
 8880   match(MemBarVolatile);
 8881   predicate(Matcher::post_store_load_barrier(n));
 8882   ins_cost(0);
 8883 
 8884   size(0);
 8885   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8886   ins_encode();
 8887   ins_pipe(empty);
 8888 %}
 8889 
 8890 instruct membar_full(rFlagsReg cr) %{
 8891   match(MemBarFull);
 8892   effect(KILL cr);
 8893   ins_cost(400);
 8894 
 8895   format %{
 8896     $$template
 8897     $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
 8898   %}
 8899   ins_encode %{
 8900     __ membar(Assembler::StoreLoad);
 8901   %}
 8902   ins_pipe(pipe_slow);
 8903 %}
 8904 
 8905 instruct membar_storestore() %{
 8906   match(MemBarStoreStore);
 8907   match(StoreStoreFence);
 8908   ins_cost(0);
 8909 
 8910   size(0);
 8911   format %{ "MEMBAR-storestore (empty encoding)" %}
 8912   ins_encode( );
 8913   ins_pipe(empty);
 8914 %}
 8915 
 8916 //----------Move Instructions--------------------------------------------------
 8917 
 8918 instruct castX2P(rRegP dst, rRegL src)
 8919 %{
 8920   match(Set dst (CastX2P src));
 8921 
 8922   format %{ "movq    $dst, $src\t# long->ptr" %}
 8923   ins_encode %{
 8924     if ($dst$$reg != $src$$reg) {
 8925       __ movptr($dst$$Register, $src$$Register);
 8926     }
 8927   %}
 8928   ins_pipe(ialu_reg_reg); // XXX
 8929 %}
 8930 
 8931 instruct castP2X(rRegL dst, rRegP src)
 8932 %{
 8933   match(Set dst (CastP2X src));
 8934 
 8935   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8936   ins_encode %{
 8937     if ($dst$$reg != $src$$reg) {
 8938       __ movptr($dst$$Register, $src$$Register);
 8939     }
 8940   %}
 8941   ins_pipe(ialu_reg_reg); // XXX
 8942 %}
 8943 
 8944 // Convert oop into int for vectors alignment masking
 8945 instruct convP2I(rRegI dst, rRegP src)
 8946 %{
 8947   match(Set dst (ConvL2I (CastP2X src)));
 8948 
 8949   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8950   ins_encode %{
 8951     __ movl($dst$$Register, $src$$Register);
 8952   %}
 8953   ins_pipe(ialu_reg_reg); // XXX
 8954 %}
 8955 
 8956 // Convert compressed oop into int for vectors alignment masking
 8957 // in case of 32bit oops (heap < 4Gb).
 8958 instruct convN2I(rRegI dst, rRegN src)
 8959 %{
 8960   predicate(CompressedOops::shift() == 0);
 8961   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 8962 
 8963   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 8964   ins_encode %{
 8965     __ movl($dst$$Register, $src$$Register);
 8966   %}
 8967   ins_pipe(ialu_reg_reg); // XXX
 8968 %}
 8969 
 8970 // Convert oop pointer into compressed form
 8971 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 8972   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 8973   match(Set dst (EncodeP src));
 8974   effect(KILL cr);
 8975   format %{ "encode_heap_oop $dst,$src" %}
 8976   ins_encode %{
 8977     Register s = $src$$Register;
 8978     Register d = $dst$$Register;
 8979     if (s != d) {
 8980       __ movq(d, s);
 8981     }
 8982     __ encode_heap_oop(d);
 8983   %}
 8984   ins_pipe(ialu_reg_long);
 8985 %}
 8986 
 8987 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8988   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 8989   match(Set dst (EncodeP src));
 8990   effect(KILL cr);
 8991   format %{ "encode_heap_oop_not_null $dst,$src" %}
 8992   ins_encode %{
 8993     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 8994   %}
 8995   ins_pipe(ialu_reg_long);
 8996 %}
 8997 
 8998 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 8999   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 9000             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 9001   match(Set dst (DecodeN src));
 9002   effect(KILL cr);
 9003   format %{ "decode_heap_oop $dst,$src" %}
 9004   ins_encode %{
 9005     Register s = $src$$Register;
 9006     Register d = $dst$$Register;
 9007     if (s != d) {
 9008       __ movq(d, s);
 9009     }
 9010     __ decode_heap_oop(d);
 9011   %}
 9012   ins_pipe(ialu_reg_long);
 9013 %}
 9014 
 9015 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9016   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 9017             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 9018   match(Set dst (DecodeN src));
 9019   effect(KILL cr);
 9020   format %{ "decode_heap_oop_not_null $dst,$src" %}
 9021   ins_encode %{
 9022     Register s = $src$$Register;
 9023     Register d = $dst$$Register;
 9024     if (s != d) {
 9025       __ decode_heap_oop_not_null(d, s);
 9026     } else {
 9027       __ decode_heap_oop_not_null(d);
 9028     }
 9029   %}
 9030   ins_pipe(ialu_reg_long);
 9031 %}
 9032 
 9033 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9034   match(Set dst (EncodePKlass src));
 9035   effect(TEMP dst, KILL cr);
 9036   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 9037   ins_encode %{
 9038     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9039   %}
 9040   ins_pipe(ialu_reg_long);
 9041 %}
 9042 
 9043 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9044   match(Set dst (DecodeNKlass src));
 9045   effect(TEMP dst, KILL cr);
 9046   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 9047   ins_encode %{
 9048     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9049   %}
 9050   ins_pipe(ialu_reg_long);
 9051 %}
 9052 
 9053 //----------Conditional Move---------------------------------------------------
 9054 // Jump
 9055 // dummy instruction for generating temp registers
 9056 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 9057   match(Jump (LShiftL switch_val shift));
 9058   ins_cost(350);
 9059   predicate(false);
 9060   effect(TEMP dest);
 9061 
 9062   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9063             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 9064   ins_encode %{
 9065     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9066     // to do that and the compiler is using that register as one it can allocate.
 9067     // So we build it all by hand.
 9068     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 9069     // ArrayAddress dispatch(table, index);
 9070     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 9071     __ lea($dest$$Register, $constantaddress);
 9072     __ jmp(dispatch);
 9073   %}
 9074   ins_pipe(pipe_jmp);
 9075 %}
 9076 
 9077 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 9078   match(Jump (AddL (LShiftL switch_val shift) offset));
 9079   ins_cost(350);
 9080   effect(TEMP dest);
 9081 
 9082   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9083             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 9084   ins_encode %{
 9085     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9086     // to do that and the compiler is using that register as one it can allocate.
 9087     // So we build it all by hand.
 9088     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9089     // ArrayAddress dispatch(table, index);
 9090     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9091     __ lea($dest$$Register, $constantaddress);
 9092     __ jmp(dispatch);
 9093   %}
 9094   ins_pipe(pipe_jmp);
 9095 %}
 9096 
 9097 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 9098   match(Jump switch_val);
 9099   ins_cost(350);
 9100   effect(TEMP dest);
 9101 
 9102   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9103             "jmp     [$dest + $switch_val]\n\t" %}
 9104   ins_encode %{
 9105     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9106     // to do that and the compiler is using that register as one it can allocate.
 9107     // So we build it all by hand.
 9108     // Address index(noreg, switch_reg, Address::times_1);
 9109     // ArrayAddress dispatch(table, index);
 9110     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 9111     __ lea($dest$$Register, $constantaddress);
 9112     __ jmp(dispatch);
 9113   %}
 9114   ins_pipe(pipe_jmp);
 9115 %}
 9116 
 9117 // Conditional move
 9118 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 9119 %{
 9120   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9121   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9122 
 9123   ins_cost(100); // XXX
 9124   format %{ "setbn$cop $dst\t# signed, int" %}
 9125   ins_encode %{
 9126     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9127     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9128   %}
 9129   ins_pipe(ialu_reg);
 9130 %}
 9131 
 9132 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 9133 %{
 9134   predicate(!UseAPX);
 9135   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9136 
 9137   ins_cost(200); // XXX
 9138   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9139   ins_encode %{
 9140     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9141   %}
 9142   ins_pipe(pipe_cmov_reg);
 9143 %}
 9144 
 9145 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 9146 %{
 9147   predicate(UseAPX);
 9148   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9149 
 9150   ins_cost(200);
 9151   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9152   ins_encode %{
 9153     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9154   %}
 9155   ins_pipe(pipe_cmov_reg);
 9156 %}
 9157 
 9158 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 9159 %{
 9160   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9161   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9162 
 9163   ins_cost(100); // XXX
 9164   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9165   ins_encode %{
 9166     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9167     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9168   %}
 9169   ins_pipe(ialu_reg);
 9170 %}
 9171 
 9172 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9173   predicate(!UseAPX);
 9174   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9175 
 9176   ins_cost(200); // XXX
 9177   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9178   ins_encode %{
 9179     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9180   %}
 9181   ins_pipe(pipe_cmov_reg);
 9182 %}
 9183 
 9184 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9185   predicate(UseAPX);
 9186   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9187 
 9188   ins_cost(200);
 9189   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9190   ins_encode %{
 9191     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9192   %}
 9193   ins_pipe(pipe_cmov_reg);
 9194 %}
 9195 
 9196 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9197 %{
 9198   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9199   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9200 
 9201   ins_cost(100); // XXX
 9202   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9203   ins_encode %{
 9204     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9205     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9206   %}
 9207   ins_pipe(ialu_reg);
 9208 %}
 9209 
 9210 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9211 %{
 9212   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9213   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9214 
 9215   ins_cost(100); // XXX
 9216   format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
 9217   ins_encode %{
 9218     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9219     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9220   %}
 9221   ins_pipe(ialu_reg);
 9222 %}
 9223 
 9224 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9225   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9226 
 9227   ins_cost(200);
 9228   expand %{
 9229     cmovI_regU(cop, cr, dst, src);
 9230   %}
 9231 %}
 9232 
 9233 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
 9234   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9235 
 9236   ins_cost(200);
 9237   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9238   ins_encode %{
 9239     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9240   %}
 9241   ins_pipe(pipe_cmov_reg);
 9242 %}
 9243 
 9244 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9245   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9246   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9247 
 9248   ins_cost(200); // XXX
 9249   format %{ "cmovpl  $dst, $src\n\t"
 9250             "cmovnel $dst, $src" %}
 9251   ins_encode %{
 9252     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9253     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9254   %}
 9255   ins_pipe(pipe_cmov_reg);
 9256 %}
 9257 
 9258 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9259 // inputs of the CMove
 9260 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9261   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9262   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9263   effect(TEMP dst);
 9264 
 9265   ins_cost(200); // XXX
 9266   format %{ "cmovpl  $dst, $src\n\t"
 9267             "cmovnel $dst, $src" %}
 9268   ins_encode %{
 9269     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9270     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9271   %}
 9272   ins_pipe(pipe_cmov_reg);
 9273 %}
 9274 
 9275 // Conditional move
 9276 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9277   predicate(!UseAPX);
 9278   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9279 
 9280   ins_cost(250); // XXX
 9281   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9282   ins_encode %{
 9283     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9284   %}
 9285   ins_pipe(pipe_cmov_mem);
 9286 %}
 9287 
 9288 // Conditional move
 9289 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
 9290 %{
 9291   predicate(UseAPX);
 9292   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9293 
 9294   ins_cost(250);
 9295   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9296   ins_encode %{
 9297     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9298   %}
 9299   ins_pipe(pipe_cmov_mem);
 9300 %}
 9301 
 9302 // Conditional move
 9303 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9304 %{
 9305   predicate(!UseAPX);
 9306   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9307 
 9308   ins_cost(250); // XXX
 9309   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9310   ins_encode %{
 9311     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9312   %}
 9313   ins_pipe(pipe_cmov_mem);
 9314 %}
 9315 
 9316 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9317   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9318 
 9319   ins_cost(250);
 9320   expand %{
 9321     cmovI_memU(cop, cr, dst, src);
 9322   %}
 9323 %}
 9324 
 9325 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
 9326 %{
 9327   predicate(UseAPX);
 9328   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9329 
 9330   ins_cost(250);
 9331   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9332   ins_encode %{
 9333     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9334   %}
 9335   ins_pipe(pipe_cmov_mem);
 9336 %}
 9337 
 9338 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
 9339 %{
 9340   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9341 
 9342   ins_cost(250);
 9343   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9344   ins_encode %{
 9345     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9346   %}
 9347   ins_pipe(pipe_cmov_mem);
 9348 %}
 9349 
 9350 // Conditional move
 9351 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9352 %{
 9353   predicate(!UseAPX);
 9354   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9355 
 9356   ins_cost(200); // XXX
 9357   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9358   ins_encode %{
 9359     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9360   %}
 9361   ins_pipe(pipe_cmov_reg);
 9362 %}
 9363 
 9364 // Conditional move ndd
 9365 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9366 %{
 9367   predicate(UseAPX);
 9368   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9369 
 9370   ins_cost(200);
 9371   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9372   ins_encode %{
 9373     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9374   %}
 9375   ins_pipe(pipe_cmov_reg);
 9376 %}
 9377 
 9378 // Conditional move
 9379 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9380 %{
 9381   predicate(!UseAPX);
 9382   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9383 
 9384   ins_cost(200); // XXX
 9385   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9386   ins_encode %{
 9387     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9388   %}
 9389   ins_pipe(pipe_cmov_reg);
 9390 %}
 9391 
 9392 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9393   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9394 
 9395   ins_cost(200);
 9396   expand %{
 9397     cmovN_regU(cop, cr, dst, src);
 9398   %}
 9399 %}
 9400 
 9401 // Conditional move ndd
 9402 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9403 %{
 9404   predicate(UseAPX);
 9405   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9406 
 9407   ins_cost(200);
 9408   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9409   ins_encode %{
 9410     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9411   %}
 9412   ins_pipe(pipe_cmov_reg);
 9413 %}
 9414 
 9415 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
 9416   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9417 
 9418   ins_cost(200);
 9419   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
 9420   ins_encode %{
 9421     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9422   %}
 9423   ins_pipe(pipe_cmov_reg);
 9424 %}
 9425 
 9426 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9427   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9428   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9429 
 9430   ins_cost(200); // XXX
 9431   format %{ "cmovpl  $dst, $src\n\t"
 9432             "cmovnel $dst, $src" %}
 9433   ins_encode %{
 9434     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9435     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9436   %}
 9437   ins_pipe(pipe_cmov_reg);
 9438 %}
 9439 
 9440 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9441 // inputs of the CMove
 9442 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9443   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9444   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9445 
 9446   ins_cost(200); // XXX
 9447   format %{ "cmovpl  $dst, $src\n\t"
 9448             "cmovnel $dst, $src" %}
 9449   ins_encode %{
 9450     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9451     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9452   %}
 9453   ins_pipe(pipe_cmov_reg);
 9454 %}
 9455 
 9456 // Conditional move
 9457 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9458 %{
 9459   predicate(!UseAPX);
 9460   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9461 
 9462   ins_cost(200); // XXX
 9463   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9464   ins_encode %{
 9465     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9466   %}
 9467   ins_pipe(pipe_cmov_reg);  // XXX
 9468 %}
 9469 
 9470 // Conditional move ndd
 9471 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9472 %{
 9473   predicate(UseAPX);
 9474   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9475 
 9476   ins_cost(200);
 9477   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9478   ins_encode %{
 9479     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9480   %}
 9481   ins_pipe(pipe_cmov_reg);
 9482 %}
 9483 
 9484 // Conditional move
 9485 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9486 %{
 9487   predicate(!UseAPX);
 9488   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9489 
 9490   ins_cost(200); // XXX
 9491   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9492   ins_encode %{
 9493     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9494   %}
 9495   ins_pipe(pipe_cmov_reg); // XXX
 9496 %}
 9497 
 9498 // Conditional move ndd
 9499 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9500 %{
 9501   predicate(UseAPX);
 9502   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9503 
 9504   ins_cost(200);
 9505   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9506   ins_encode %{
 9507     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9508   %}
 9509   ins_pipe(pipe_cmov_reg);
 9510 %}
 9511 
 9512 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9513   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9514 
 9515   ins_cost(200);
 9516   expand %{
 9517     cmovP_regU(cop, cr, dst, src);
 9518   %}
 9519 %}
 9520 
 9521 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
 9522   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9523 
 9524   ins_cost(200);
 9525   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
 9526   ins_encode %{
 9527     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9528   %}
 9529   ins_pipe(pipe_cmov_reg);
 9530 %}
 9531 
 9532 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9533   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9534   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9535 
 9536   ins_cost(200); // XXX
 9537   format %{ "cmovpq  $dst, $src\n\t"
 9538             "cmovneq $dst, $src" %}
 9539   ins_encode %{
 9540     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9541     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9542   %}
 9543   ins_pipe(pipe_cmov_reg);
 9544 %}
 9545 
 9546 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9547 // inputs of the CMove
 9548 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9549   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9550   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9551 
 9552   ins_cost(200); // XXX
 9553   format %{ "cmovpq  $dst, $src\n\t"
 9554             "cmovneq $dst, $src" %}
 9555   ins_encode %{
 9556     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9557     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9558   %}
 9559   ins_pipe(pipe_cmov_reg);
 9560 %}
 9561 
 9562 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9563 %{
 9564   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9565   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9566 
 9567   ins_cost(100); // XXX
 9568   format %{ "setbn$cop $dst\t# signed, long" %}
 9569   ins_encode %{
 9570     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9571     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9572   %}
 9573   ins_pipe(ialu_reg);
 9574 %}
 9575 
 9576 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9577 %{
 9578   predicate(!UseAPX);
 9579   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9580 
 9581   ins_cost(200); // XXX
 9582   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9583   ins_encode %{
 9584     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9585   %}
 9586   ins_pipe(pipe_cmov_reg);  // XXX
 9587 %}
 9588 
 9589 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9590 %{
 9591   predicate(UseAPX);
 9592   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9593 
 9594   ins_cost(200);
 9595   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9596   ins_encode %{
 9597     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9598   %}
 9599   ins_pipe(pipe_cmov_reg);
 9600 %}
 9601 
 9602 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9603 %{
 9604   predicate(!UseAPX);
 9605   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9606 
 9607   ins_cost(200); // XXX
 9608   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9609   ins_encode %{
 9610     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9611   %}
 9612   ins_pipe(pipe_cmov_mem);  // XXX
 9613 %}
 9614 
 9615 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
 9616 %{
 9617   predicate(UseAPX);
 9618   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9619 
 9620   ins_cost(200);
 9621   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9622   ins_encode %{
 9623     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9624   %}
 9625   ins_pipe(pipe_cmov_mem);
 9626 %}
 9627 
 9628 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9629 %{
 9630   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9631   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9632 
 9633   ins_cost(100); // XXX
 9634   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9635   ins_encode %{
 9636     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9637     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9638   %}
 9639   ins_pipe(ialu_reg);
 9640 %}
 9641 
 9642 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9643 %{
 9644   predicate(!UseAPX);
 9645   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9646 
 9647   ins_cost(200); // XXX
 9648   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9649   ins_encode %{
 9650     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9651   %}
 9652   ins_pipe(pipe_cmov_reg); // XXX
 9653 %}
 9654 
 9655 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9656 %{
 9657   predicate(UseAPX);
 9658   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9659 
 9660   ins_cost(200);
 9661   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9662   ins_encode %{
 9663     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9664   %}
 9665   ins_pipe(pipe_cmov_reg);
 9666 %}
 9667 
 9668 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9669 %{
 9670   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9671   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9672 
 9673   ins_cost(100); // XXX
 9674   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9675   ins_encode %{
 9676     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9677     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9678   %}
 9679   ins_pipe(ialu_reg);
 9680 %}
 9681 
 9682 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9683 %{
 9684   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9685   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9686 
 9687   ins_cost(100); // XXX
 9688   format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
 9689   ins_encode %{
 9690     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9691     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9692   %}
 9693   ins_pipe(ialu_reg);
 9694 %}
 9695 
 9696 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9697   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9698 
 9699   ins_cost(200);
 9700   expand %{
 9701     cmovL_regU(cop, cr, dst, src);
 9702   %}
 9703 %}
 9704 
 9705 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
 9706 %{
 9707   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9708 
 9709   ins_cost(200);
 9710   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9711   ins_encode %{
 9712     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9713   %}
 9714   ins_pipe(pipe_cmov_reg);
 9715 %}
 9716 
 9717 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9718   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9719   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9720 
 9721   ins_cost(200); // XXX
 9722   format %{ "cmovpq  $dst, $src\n\t"
 9723             "cmovneq $dst, $src" %}
 9724   ins_encode %{
 9725     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9726     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9727   %}
 9728   ins_pipe(pipe_cmov_reg);
 9729 %}
 9730 
 9731 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9732 // inputs of the CMove
 9733 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9734   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9735   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9736 
 9737   ins_cost(200); // XXX
 9738   format %{ "cmovpq  $dst, $src\n\t"
 9739             "cmovneq $dst, $src" %}
 9740   ins_encode %{
 9741     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9742     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9743   %}
 9744   ins_pipe(pipe_cmov_reg);
 9745 %}
 9746 
 9747 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9748 %{
 9749   predicate(!UseAPX);
 9750   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9751 
 9752   ins_cost(200); // XXX
 9753   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9754   ins_encode %{
 9755     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9756   %}
 9757   ins_pipe(pipe_cmov_mem); // XXX
 9758 %}
 9759 
 9760 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9761   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9762 
 9763   ins_cost(200);
 9764   expand %{
 9765     cmovL_memU(cop, cr, dst, src);
 9766   %}
 9767 %}
 9768 
 9769 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
 9770 %{
 9771   predicate(UseAPX);
 9772   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9773 
 9774   ins_cost(200);
 9775   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9776   ins_encode %{
 9777     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9778   %}
 9779   ins_pipe(pipe_cmov_mem);
 9780 %}
 9781 
 9782 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
 9783 %{
 9784   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9785 
 9786   ins_cost(200);
 9787   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9788   ins_encode %{
 9789     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9790   %}
 9791   ins_pipe(pipe_cmov_mem);
 9792 %}
 9793 
 9794 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9795 %{
 9796   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9797 
 9798   ins_cost(200); // XXX
 9799   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9800             "movss     $dst, $src\n"
 9801     "skip:" %}
 9802   ins_encode %{
 9803     Label Lskip;
 9804     // Invert sense of branch from sense of CMOV
 9805     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9806     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9807     __ bind(Lskip);
 9808   %}
 9809   ins_pipe(pipe_slow);
 9810 %}
 9811 
 9812 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9813 %{
 9814   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9815 
 9816   ins_cost(200); // XXX
 9817   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9818             "movss     $dst, $src\n"
 9819     "skip:" %}
 9820   ins_encode %{
 9821     Label Lskip;
 9822     // Invert sense of branch from sense of CMOV
 9823     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9824     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9825     __ bind(Lskip);
 9826   %}
 9827   ins_pipe(pipe_slow);
 9828 %}
 9829 
 9830 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9831   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9832 
 9833   ins_cost(200);
 9834   expand %{
 9835     cmovF_regU(cop, cr, dst, src);
 9836   %}
 9837 %}
 9838 
 9839 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
 9840 %{
 9841   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9842 
 9843   ins_cost(200); // XXX
 9844   format %{ "jn$cop    skip\t# signed, unsigned cmove float\n\t"
 9845             "movss     $dst, $src\n"
 9846     "skip:" %}
 9847   ins_encode %{
 9848     Label Lskip;
 9849     // Invert sense of branch from sense of CMOV
 9850     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9851     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9852     __ bind(Lskip);
 9853   %}
 9854   ins_pipe(pipe_slow);
 9855 %}
 9856 
 9857 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9858 %{
 9859   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9860 
 9861   ins_cost(200); // XXX
 9862   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9863             "movsd     $dst, $src\n"
 9864     "skip:" %}
 9865   ins_encode %{
 9866     Label Lskip;
 9867     // Invert sense of branch from sense of CMOV
 9868     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9869     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9870     __ bind(Lskip);
 9871   %}
 9872   ins_pipe(pipe_slow);
 9873 %}
 9874 
 9875 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9876 %{
 9877   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9878 
 9879   ins_cost(200); // XXX
 9880   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9881             "movsd     $dst, $src\n"
 9882     "skip:" %}
 9883   ins_encode %{
 9884     Label Lskip;
 9885     // Invert sense of branch from sense of CMOV
 9886     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9887     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9888     __ bind(Lskip);
 9889   %}
 9890   ins_pipe(pipe_slow);
 9891 %}
 9892 
 9893 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9894   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9895 
 9896   ins_cost(200);
 9897   expand %{
 9898     cmovD_regU(cop, cr, dst, src);
 9899   %}
 9900 %}
 9901 
 9902 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
 9903 %{
 9904   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9905 
 9906   ins_cost(200); // XXX
 9907   format %{ "jn$cop    skip\t# signed, unsigned cmove double\n\t"
 9908             "movsd     $dst, $src\n"
 9909     "skip:" %}
 9910   ins_encode %{
 9911     Label Lskip;
 9912     // Invert sense of branch from sense of CMOV
 9913     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9914     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9915     __ bind(Lskip);
 9916   %}
 9917   ins_pipe(pipe_slow);
 9918 %}
 9919 
 9920 //----------Arithmetic Instructions--------------------------------------------
 9921 //----------Addition Instructions----------------------------------------------
 9922 
 9923 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9924 %{
 9925   predicate(!UseAPX);
 9926   match(Set dst (AddI dst src));
 9927   effect(KILL cr);
 9928   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9929   format %{ "addl    $dst, $src\t# int" %}
 9930   ins_encode %{
 9931     __ addl($dst$$Register, $src$$Register);
 9932   %}
 9933   ins_pipe(ialu_reg_reg);
 9934 %}
 9935 
 9936 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
 9937 %{
 9938   predicate(UseAPX);
 9939   match(Set dst (AddI src1 src2));
 9940   effect(KILL cr);
 9941   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
 9942 
 9943   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9944   ins_encode %{
 9945     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
 9946   %}
 9947   ins_pipe(ialu_reg_reg);
 9948 %}
 9949 
 9950 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9951 %{
 9952   predicate(!UseAPX);
 9953   match(Set dst (AddI dst src));
 9954   effect(KILL cr);
 9955   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9956 
 9957   format %{ "addl    $dst, $src\t# int" %}
 9958   ins_encode %{
 9959     __ addl($dst$$Register, $src$$constant);
 9960   %}
 9961   ins_pipe( ialu_reg );
 9962 %}
 9963 
 9964 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
 9965 %{
 9966   predicate(UseAPX);
 9967   match(Set dst (AddI src1 src2));
 9968   effect(KILL cr);
 9969   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
 9970 
 9971   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9972   ins_encode %{
 9973     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
 9974   %}
 9975   ins_pipe( ialu_reg );
 9976 %}
 9977 
 9978 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
 9979 %{
 9980   predicate(UseAPX);
 9981   match(Set dst (AddI (LoadI src1) src2));
 9982   effect(KILL cr);
 9983   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9984 
 9985   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9986   ins_encode %{
 9987     __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
 9988   %}
 9989   ins_pipe( ialu_reg );
 9990 %}
 9991 
 9992 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 9993 %{
 9994   predicate(!UseAPX);
 9995   match(Set dst (AddI dst (LoadI src)));
 9996   effect(KILL cr);
 9997   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9998 
 9999   ins_cost(150); // XXX
10000   format %{ "addl    $dst, $src\t# int" %}
10001   ins_encode %{
10002     __ addl($dst$$Register, $src$$Address);
10003   %}
10004   ins_pipe(ialu_reg_mem);
10005 %}
10006 
10007 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10008 %{
10009   predicate(UseAPX);
10010   match(Set dst (AddI src1 (LoadI src2)));
10011   effect(KILL cr);
10012   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10013 
10014   ins_cost(150);
10015   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10016   ins_encode %{
10017     __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10018   %}
10019   ins_pipe(ialu_reg_mem);
10020 %}
10021 
10022 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10023 %{
10024   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10025   effect(KILL cr);
10026   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10027 
10028   ins_cost(150); // XXX
10029   format %{ "addl    $dst, $src\t# int" %}
10030   ins_encode %{
10031     __ addl($dst$$Address, $src$$Register);
10032   %}
10033   ins_pipe(ialu_mem_reg);
10034 %}
10035 
10036 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10037 %{
10038   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10039   effect(KILL cr);
10040   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10041 
10042 
10043   ins_cost(125); // XXX
10044   format %{ "addl    $dst, $src\t# int" %}
10045   ins_encode %{
10046     __ addl($dst$$Address, $src$$constant);
10047   %}
10048   ins_pipe(ialu_mem_imm);
10049 %}
10050 
10051 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10052 %{
10053   predicate(!UseAPX && UseIncDec);
10054   match(Set dst (AddI dst src));
10055   effect(KILL cr);
10056 
10057   format %{ "incl    $dst\t# int" %}
10058   ins_encode %{
10059     __ incrementl($dst$$Register);
10060   %}
10061   ins_pipe(ialu_reg);
10062 %}
10063 
10064 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10065 %{
10066   predicate(UseAPX && UseIncDec);
10067   match(Set dst (AddI src val));
10068   effect(KILL cr);
10069   flag(PD::Flag_ndd_demotable_opr1);
10070 
10071   format %{ "eincl    $dst, $src\t# int ndd" %}
10072   ins_encode %{
10073     __ eincl($dst$$Register, $src$$Register, false);
10074   %}
10075   ins_pipe(ialu_reg);
10076 %}
10077 
10078 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10079 %{
10080   predicate(UseAPX && UseIncDec);
10081   match(Set dst (AddI (LoadI src) val));
10082   effect(KILL cr);
10083 
10084   format %{ "eincl    $dst, $src\t# int ndd" %}
10085   ins_encode %{
10086     __ eincl($dst$$Register, $src$$Address, false);
10087   %}
10088   ins_pipe(ialu_reg);
10089 %}
10090 
10091 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10092 %{
10093   predicate(UseIncDec);
10094   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10095   effect(KILL cr);
10096 
10097   ins_cost(125); // XXX
10098   format %{ "incl    $dst\t# int" %}
10099   ins_encode %{
10100     __ incrementl($dst$$Address);
10101   %}
10102   ins_pipe(ialu_mem_imm);
10103 %}
10104 
10105 // XXX why does that use AddI
10106 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10107 %{
10108   predicate(!UseAPX && UseIncDec);
10109   match(Set dst (AddI dst src));
10110   effect(KILL cr);
10111 
10112   format %{ "decl    $dst\t# int" %}
10113   ins_encode %{
10114     __ decrementl($dst$$Register);
10115   %}
10116   ins_pipe(ialu_reg);
10117 %}
10118 
10119 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10120 %{
10121   predicate(UseAPX && UseIncDec);
10122   match(Set dst (AddI src val));
10123   effect(KILL cr);
10124   flag(PD::Flag_ndd_demotable_opr1);
10125 
10126   format %{ "edecl    $dst, $src\t# int ndd" %}
10127   ins_encode %{
10128     __ edecl($dst$$Register, $src$$Register, false);
10129   %}
10130   ins_pipe(ialu_reg);
10131 %}
10132 
10133 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10134 %{
10135   predicate(UseAPX && UseIncDec);
10136   match(Set dst (AddI (LoadI src) val));
10137   effect(KILL cr);
10138 
10139   format %{ "edecl    $dst, $src\t# int ndd" %}
10140   ins_encode %{
10141     __ edecl($dst$$Register, $src$$Address, false);
10142   %}
10143   ins_pipe(ialu_reg);
10144 %}
10145 
10146 // XXX why does that use AddI
10147 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10148 %{
10149   predicate(UseIncDec);
10150   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10151   effect(KILL cr);
10152 
10153   ins_cost(125); // XXX
10154   format %{ "decl    $dst\t# int" %}
10155   ins_encode %{
10156     __ decrementl($dst$$Address);
10157   %}
10158   ins_pipe(ialu_mem_imm);
10159 %}
10160 
10161 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10162 %{
10163   predicate(VM_Version::supports_fast_2op_lea());
10164   match(Set dst (AddI (LShiftI index scale) disp));
10165 
10166   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10167   ins_encode %{
10168     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10169     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10170   %}
10171   ins_pipe(ialu_reg_reg);
10172 %}
10173 
10174 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10175 %{
10176   predicate(VM_Version::supports_fast_3op_lea());
10177   match(Set dst (AddI (AddI base index) disp));
10178 
10179   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10180   ins_encode %{
10181     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10182   %}
10183   ins_pipe(ialu_reg_reg);
10184 %}
10185 
10186 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10187 %{
10188   predicate(VM_Version::supports_fast_2op_lea());
10189   match(Set dst (AddI base (LShiftI index scale)));
10190 
10191   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10192   ins_encode %{
10193     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10194     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10195   %}
10196   ins_pipe(ialu_reg_reg);
10197 %}
10198 
10199 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10200 %{
10201   predicate(VM_Version::supports_fast_3op_lea());
10202   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10203 
10204   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10205   ins_encode %{
10206     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10207     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10208   %}
10209   ins_pipe(ialu_reg_reg);
10210 %}
10211 
10212 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10213 %{
10214   predicate(!UseAPX);
10215   match(Set dst (AddL dst src));
10216   effect(KILL cr);
10217   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10218 
10219   format %{ "addq    $dst, $src\t# long" %}
10220   ins_encode %{
10221     __ addq($dst$$Register, $src$$Register);
10222   %}
10223   ins_pipe(ialu_reg_reg);
10224 %}
10225 
10226 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10227 %{
10228   predicate(UseAPX);
10229   match(Set dst (AddL src1 src2));
10230   effect(KILL cr);
10231   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10232 
10233   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10234   ins_encode %{
10235     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10236   %}
10237   ins_pipe(ialu_reg_reg);
10238 %}
10239 
10240 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10241 %{
10242   predicate(!UseAPX);
10243   match(Set dst (AddL dst src));
10244   effect(KILL cr);
10245   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10246 
10247   format %{ "addq    $dst, $src\t# long" %}
10248   ins_encode %{
10249     __ addq($dst$$Register, $src$$constant);
10250   %}
10251   ins_pipe( ialu_reg );
10252 %}
10253 
10254 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10255 %{
10256   predicate(UseAPX);
10257   match(Set dst (AddL src1 src2));
10258   effect(KILL cr);
10259   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10260 
10261   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10262   ins_encode %{
10263     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10264   %}
10265   ins_pipe( ialu_reg );
10266 %}
10267 
10268 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10269 %{
10270   predicate(UseAPX);
10271   match(Set dst (AddL (LoadL src1) src2));
10272   effect(KILL cr);
10273   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10274 
10275   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10276   ins_encode %{
10277     __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10278   %}
10279   ins_pipe( ialu_reg );
10280 %}
10281 
10282 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10283 %{
10284   predicate(!UseAPX);
10285   match(Set dst (AddL dst (LoadL src)));
10286   effect(KILL cr);
10287   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10288 
10289   ins_cost(150); // XXX
10290   format %{ "addq    $dst, $src\t# long" %}
10291   ins_encode %{
10292     __ addq($dst$$Register, $src$$Address);
10293   %}
10294   ins_pipe(ialu_reg_mem);
10295 %}
10296 
10297 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10298 %{
10299   predicate(UseAPX);
10300   match(Set dst (AddL src1 (LoadL src2)));
10301   effect(KILL cr);
10302   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10303 
10304   ins_cost(150);
10305   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10306   ins_encode %{
10307     __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10308   %}
10309   ins_pipe(ialu_reg_mem);
10310 %}
10311 
10312 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10313 %{
10314   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10315   effect(KILL cr);
10316   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10317 
10318   ins_cost(150); // XXX
10319   format %{ "addq    $dst, $src\t# long" %}
10320   ins_encode %{
10321     __ addq($dst$$Address, $src$$Register);
10322   %}
10323   ins_pipe(ialu_mem_reg);
10324 %}
10325 
10326 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10327 %{
10328   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10329   effect(KILL cr);
10330   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10331 
10332   ins_cost(125); // XXX
10333   format %{ "addq    $dst, $src\t# long" %}
10334   ins_encode %{
10335     __ addq($dst$$Address, $src$$constant);
10336   %}
10337   ins_pipe(ialu_mem_imm);
10338 %}
10339 
10340 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10341 %{
10342   predicate(!UseAPX && UseIncDec);
10343   match(Set dst (AddL dst src));
10344   effect(KILL cr);
10345 
10346   format %{ "incq    $dst\t# long" %}
10347   ins_encode %{
10348     __ incrementq($dst$$Register);
10349   %}
10350   ins_pipe(ialu_reg);
10351 %}
10352 
10353 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10354 %{
10355   predicate(UseAPX && UseIncDec);
10356   match(Set dst (AddL src val));
10357   effect(KILL cr);
10358   flag(PD::Flag_ndd_demotable_opr1);
10359 
10360   format %{ "eincq    $dst, $src\t# long ndd" %}
10361   ins_encode %{
10362     __ eincq($dst$$Register, $src$$Register, false);
10363   %}
10364   ins_pipe(ialu_reg);
10365 %}
10366 
10367 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10368 %{
10369   predicate(UseAPX && UseIncDec);
10370   match(Set dst (AddL (LoadL src) val));
10371   effect(KILL cr);
10372 
10373   format %{ "eincq    $dst, $src\t# long ndd" %}
10374   ins_encode %{
10375     __ eincq($dst$$Register, $src$$Address, false);
10376   %}
10377   ins_pipe(ialu_reg);
10378 %}
10379 
10380 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10381 %{
10382   predicate(UseIncDec);
10383   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10384   effect(KILL cr);
10385 
10386   ins_cost(125); // XXX
10387   format %{ "incq    $dst\t# long" %}
10388   ins_encode %{
10389     __ incrementq($dst$$Address);
10390   %}
10391   ins_pipe(ialu_mem_imm);
10392 %}
10393 
10394 // XXX why does that use AddL
10395 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10396 %{
10397   predicate(!UseAPX && UseIncDec);
10398   match(Set dst (AddL dst src));
10399   effect(KILL cr);
10400 
10401   format %{ "decq    $dst\t# long" %}
10402   ins_encode %{
10403     __ decrementq($dst$$Register);
10404   %}
10405   ins_pipe(ialu_reg);
10406 %}
10407 
10408 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10409 %{
10410   predicate(UseAPX && UseIncDec);
10411   match(Set dst (AddL src val));
10412   effect(KILL cr);
10413   flag(PD::Flag_ndd_demotable_opr1);
10414 
10415   format %{ "edecq    $dst, $src\t# long ndd" %}
10416   ins_encode %{
10417     __ edecq($dst$$Register, $src$$Register, false);
10418   %}
10419   ins_pipe(ialu_reg);
10420 %}
10421 
10422 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10423 %{
10424   predicate(UseAPX && UseIncDec);
10425   match(Set dst (AddL (LoadL src) val));
10426   effect(KILL cr);
10427 
10428   format %{ "edecq    $dst, $src\t# long ndd" %}
10429   ins_encode %{
10430     __ edecq($dst$$Register, $src$$Address, false);
10431   %}
10432   ins_pipe(ialu_reg);
10433 %}
10434 
10435 // XXX why does that use AddL
10436 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10437 %{
10438   predicate(UseIncDec);
10439   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10440   effect(KILL cr);
10441 
10442   ins_cost(125); // XXX
10443   format %{ "decq    $dst\t# long" %}
10444   ins_encode %{
10445     __ decrementq($dst$$Address);
10446   %}
10447   ins_pipe(ialu_mem_imm);
10448 %}
10449 
10450 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10451 %{
10452   predicate(VM_Version::supports_fast_2op_lea());
10453   match(Set dst (AddL (LShiftL index scale) disp));
10454 
10455   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10456   ins_encode %{
10457     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10458     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10459   %}
10460   ins_pipe(ialu_reg_reg);
10461 %}
10462 
10463 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10464 %{
10465   predicate(VM_Version::supports_fast_3op_lea());
10466   match(Set dst (AddL (AddL base index) disp));
10467 
10468   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10469   ins_encode %{
10470     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10471   %}
10472   ins_pipe(ialu_reg_reg);
10473 %}
10474 
10475 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10476 %{
10477   predicate(VM_Version::supports_fast_2op_lea());
10478   match(Set dst (AddL base (LShiftL index scale)));
10479 
10480   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10481   ins_encode %{
10482     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10483     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10484   %}
10485   ins_pipe(ialu_reg_reg);
10486 %}
10487 
10488 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10489 %{
10490   predicate(VM_Version::supports_fast_3op_lea());
10491   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10492 
10493   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10494   ins_encode %{
10495     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10496     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10497   %}
10498   ins_pipe(ialu_reg_reg);
10499 %}
10500 
10501 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10502 %{
10503   match(Set dst (AddP dst src));
10504   effect(KILL cr);
10505   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10506 
10507   format %{ "addq    $dst, $src\t# ptr" %}
10508   ins_encode %{
10509     __ addq($dst$$Register, $src$$Register);
10510   %}
10511   ins_pipe(ialu_reg_reg);
10512 %}
10513 
10514 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10515 %{
10516   match(Set dst (AddP dst src));
10517   effect(KILL cr);
10518   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10519 
10520   format %{ "addq    $dst, $src\t# ptr" %}
10521   ins_encode %{
10522     __ addq($dst$$Register, $src$$constant);
10523   %}
10524   ins_pipe( ialu_reg );
10525 %}
10526 
10527 // XXX addP mem ops ????
10528 
10529 instruct checkCastPP(rRegP dst)
10530 %{
10531   match(Set dst (CheckCastPP dst));
10532 
10533   size(0);
10534   format %{ "# checkcastPP of $dst" %}
10535   ins_encode(/* empty encoding */);
10536   ins_pipe(empty);
10537 %}
10538 
10539 instruct castPP(rRegP dst)
10540 %{
10541   match(Set dst (CastPP dst));
10542 
10543   size(0);
10544   format %{ "# castPP of $dst" %}
10545   ins_encode(/* empty encoding */);
10546   ins_pipe(empty);
10547 %}
10548 
10549 instruct castII(rRegI dst)
10550 %{
10551   predicate(VerifyConstraintCasts == 0);
10552   match(Set dst (CastII dst));
10553 
10554   size(0);
10555   format %{ "# castII of $dst" %}
10556   ins_encode(/* empty encoding */);
10557   ins_cost(0);
10558   ins_pipe(empty);
10559 %}
10560 
10561 instruct castII_checked(rRegI dst, rFlagsReg cr)
10562 %{
10563   predicate(VerifyConstraintCasts > 0);
10564   match(Set dst (CastII dst));
10565 
10566   effect(KILL cr);
10567   format %{ "# cast_checked_II $dst" %}
10568   ins_encode %{
10569     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10570   %}
10571   ins_pipe(pipe_slow);
10572 %}
10573 
10574 instruct castLL(rRegL dst)
10575 %{
10576   predicate(VerifyConstraintCasts == 0);
10577   match(Set dst (CastLL dst));
10578 
10579   size(0);
10580   format %{ "# castLL of $dst" %}
10581   ins_encode(/* empty encoding */);
10582   ins_cost(0);
10583   ins_pipe(empty);
10584 %}
10585 
10586 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10587 %{
10588   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10589   match(Set dst (CastLL dst));
10590 
10591   effect(KILL cr);
10592   format %{ "# cast_checked_LL $dst" %}
10593   ins_encode %{
10594     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10595   %}
10596   ins_pipe(pipe_slow);
10597 %}
10598 
10599 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10600 %{
10601   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10602   match(Set dst (CastLL dst));
10603 
10604   effect(KILL cr, TEMP tmp);
10605   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10606   ins_encode %{
10607     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10608   %}
10609   ins_pipe(pipe_slow);
10610 %}
10611 
10612 instruct castFF(regF dst)
10613 %{
10614   match(Set dst (CastFF dst));
10615 
10616   size(0);
10617   format %{ "# castFF of $dst" %}
10618   ins_encode(/* empty encoding */);
10619   ins_cost(0);
10620   ins_pipe(empty);
10621 %}
10622 
10623 instruct castHH(regF dst)
10624 %{
10625   match(Set dst (CastHH dst));
10626 
10627   size(0);
10628   format %{ "# castHH of $dst" %}
10629   ins_encode(/* empty encoding */);
10630   ins_cost(0);
10631   ins_pipe(empty);
10632 %}
10633 
10634 instruct castDD(regD dst)
10635 %{
10636   match(Set dst (CastDD dst));
10637 
10638   size(0);
10639   format %{ "# castDD of $dst" %}
10640   ins_encode(/* empty encoding */);
10641   ins_cost(0);
10642   ins_pipe(empty);
10643 %}
10644 
10645 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10646 instruct compareAndSwapP(rRegI res,
10647                          memory mem_ptr,
10648                          rax_RegP oldval, rRegP newval,
10649                          rFlagsReg cr)
10650 %{
10651   predicate(n->as_LoadStore()->barrier_data() == 0);
10652   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10653   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10654   effect(KILL cr, KILL oldval);
10655 
10656   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10657             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10658             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10659   ins_encode %{
10660     __ lock();
10661     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10662     __ setcc(Assembler::equal, $res$$Register);
10663   %}
10664   ins_pipe( pipe_cmpxchg );
10665 %}
10666 
10667 instruct compareAndSwapL(rRegI res,
10668                          memory mem_ptr,
10669                          rax_RegL oldval, rRegL newval,
10670                          rFlagsReg cr)
10671 %{
10672   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10673   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10674   effect(KILL cr, KILL oldval);
10675 
10676   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10677             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10678             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10679   ins_encode %{
10680     __ lock();
10681     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10682     __ setcc(Assembler::equal, $res$$Register);
10683   %}
10684   ins_pipe( pipe_cmpxchg );
10685 %}
10686 
10687 instruct compareAndSwapI(rRegI res,
10688                          memory mem_ptr,
10689                          rax_RegI oldval, rRegI newval,
10690                          rFlagsReg cr)
10691 %{
10692   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10693   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10694   effect(KILL cr, KILL oldval);
10695 
10696   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10697             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10698             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10699   ins_encode %{
10700     __ lock();
10701     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10702     __ setcc(Assembler::equal, $res$$Register);
10703   %}
10704   ins_pipe( pipe_cmpxchg );
10705 %}
10706 
10707 instruct compareAndSwapB(rRegI res,
10708                          memory mem_ptr,
10709                          rax_RegI oldval, rRegI newval,
10710                          rFlagsReg cr)
10711 %{
10712   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10713   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10714   effect(KILL cr, KILL oldval);
10715 
10716   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10717             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10718             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10719   ins_encode %{
10720     __ lock();
10721     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10722     __ setcc(Assembler::equal, $res$$Register);
10723   %}
10724   ins_pipe( pipe_cmpxchg );
10725 %}
10726 
10727 instruct compareAndSwapS(rRegI res,
10728                          memory mem_ptr,
10729                          rax_RegI oldval, rRegI newval,
10730                          rFlagsReg cr)
10731 %{
10732   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10733   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10734   effect(KILL cr, KILL oldval);
10735 
10736   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10737             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10738             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10739   ins_encode %{
10740     __ lock();
10741     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10742     __ setcc(Assembler::equal, $res$$Register);
10743   %}
10744   ins_pipe( pipe_cmpxchg );
10745 %}
10746 
10747 instruct compareAndSwapN(rRegI res,
10748                           memory mem_ptr,
10749                           rax_RegN oldval, rRegN newval,
10750                           rFlagsReg cr) %{
10751   predicate(n->as_LoadStore()->barrier_data() == 0);
10752   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10753   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10754   effect(KILL cr, KILL oldval);
10755 
10756   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10757             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10758             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10759   ins_encode %{
10760     __ lock();
10761     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10762     __ setcc(Assembler::equal, $res$$Register);
10763   %}
10764   ins_pipe( pipe_cmpxchg );
10765 %}
10766 
10767 instruct compareAndExchangeB(
10768                          memory mem_ptr,
10769                          rax_RegI oldval, rRegI newval,
10770                          rFlagsReg cr)
10771 %{
10772   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10773   effect(KILL cr);
10774 
10775   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10776             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10777   ins_encode %{
10778     __ lock();
10779     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10780   %}
10781   ins_pipe( pipe_cmpxchg );
10782 %}
10783 
10784 instruct compareAndExchangeS(
10785                          memory mem_ptr,
10786                          rax_RegI oldval, rRegI newval,
10787                          rFlagsReg cr)
10788 %{
10789   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10790   effect(KILL cr);
10791 
10792   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10793             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10794   ins_encode %{
10795     __ lock();
10796     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10797   %}
10798   ins_pipe( pipe_cmpxchg );
10799 %}
10800 
10801 instruct compareAndExchangeI(
10802                          memory mem_ptr,
10803                          rax_RegI oldval, rRegI newval,
10804                          rFlagsReg cr)
10805 %{
10806   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10807   effect(KILL cr);
10808 
10809   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10810             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10811   ins_encode %{
10812     __ lock();
10813     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10814   %}
10815   ins_pipe( pipe_cmpxchg );
10816 %}
10817 
10818 instruct compareAndExchangeL(
10819                          memory mem_ptr,
10820                          rax_RegL oldval, rRegL newval,
10821                          rFlagsReg cr)
10822 %{
10823   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10824   effect(KILL cr);
10825 
10826   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10827             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10828   ins_encode %{
10829     __ lock();
10830     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10831   %}
10832   ins_pipe( pipe_cmpxchg );
10833 %}
10834 
10835 instruct compareAndExchangeN(
10836                           memory mem_ptr,
10837                           rax_RegN oldval, rRegN newval,
10838                           rFlagsReg cr) %{
10839   predicate(n->as_LoadStore()->barrier_data() == 0);
10840   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10841   effect(KILL cr);
10842 
10843   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10844             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10845   ins_encode %{
10846     __ lock();
10847     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10848   %}
10849   ins_pipe( pipe_cmpxchg );
10850 %}
10851 
10852 instruct compareAndExchangeP(
10853                          memory mem_ptr,
10854                          rax_RegP oldval, rRegP newval,
10855                          rFlagsReg cr)
10856 %{
10857   predicate(n->as_LoadStore()->barrier_data() == 0);
10858   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10859   effect(KILL cr);
10860 
10861   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10862             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10863   ins_encode %{
10864     __ lock();
10865     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10866   %}
10867   ins_pipe( pipe_cmpxchg );
10868 %}
10869 
10870 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10871   predicate(n->as_LoadStore()->result_not_used());
10872   match(Set dummy (GetAndAddB mem add));
10873   effect(KILL cr);
10874   format %{ "addb_lock   $mem, $add" %}
10875   ins_encode %{
10876     __ lock();
10877     __ addb($mem$$Address, $add$$Register);
10878   %}
10879   ins_pipe(pipe_cmpxchg);
10880 %}
10881 
10882 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10883   predicate(n->as_LoadStore()->result_not_used());
10884   match(Set dummy (GetAndAddB mem add));
10885   effect(KILL cr);
10886   format %{ "addb_lock   $mem, $add" %}
10887   ins_encode %{
10888     __ lock();
10889     __ addb($mem$$Address, $add$$constant);
10890   %}
10891   ins_pipe(pipe_cmpxchg);
10892 %}
10893 
10894 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10895   predicate(!n->as_LoadStore()->result_not_used());
10896   match(Set newval (GetAndAddB mem newval));
10897   effect(KILL cr);
10898   format %{ "xaddb_lock  $mem, $newval" %}
10899   ins_encode %{
10900     __ lock();
10901     __ xaddb($mem$$Address, $newval$$Register);
10902   %}
10903   ins_pipe(pipe_cmpxchg);
10904 %}
10905 
10906 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10907   predicate(n->as_LoadStore()->result_not_used());
10908   match(Set dummy (GetAndAddS mem add));
10909   effect(KILL cr);
10910   format %{ "addw_lock   $mem, $add" %}
10911   ins_encode %{
10912     __ lock();
10913     __ addw($mem$$Address, $add$$Register);
10914   %}
10915   ins_pipe(pipe_cmpxchg);
10916 %}
10917 
10918 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10919   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10920   match(Set dummy (GetAndAddS mem add));
10921   effect(KILL cr);
10922   format %{ "addw_lock   $mem, $add" %}
10923   ins_encode %{
10924     __ lock();
10925     __ addw($mem$$Address, $add$$constant);
10926   %}
10927   ins_pipe(pipe_cmpxchg);
10928 %}
10929 
10930 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10931   predicate(!n->as_LoadStore()->result_not_used());
10932   match(Set newval (GetAndAddS mem newval));
10933   effect(KILL cr);
10934   format %{ "xaddw_lock  $mem, $newval" %}
10935   ins_encode %{
10936     __ lock();
10937     __ xaddw($mem$$Address, $newval$$Register);
10938   %}
10939   ins_pipe(pipe_cmpxchg);
10940 %}
10941 
10942 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10943   predicate(n->as_LoadStore()->result_not_used());
10944   match(Set dummy (GetAndAddI mem add));
10945   effect(KILL cr);
10946   format %{ "addl_lock   $mem, $add" %}
10947   ins_encode %{
10948     __ lock();
10949     __ addl($mem$$Address, $add$$Register);
10950   %}
10951   ins_pipe(pipe_cmpxchg);
10952 %}
10953 
10954 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10955   predicate(n->as_LoadStore()->result_not_used());
10956   match(Set dummy (GetAndAddI mem add));
10957   effect(KILL cr);
10958   format %{ "addl_lock   $mem, $add" %}
10959   ins_encode %{
10960     __ lock();
10961     __ addl($mem$$Address, $add$$constant);
10962   %}
10963   ins_pipe(pipe_cmpxchg);
10964 %}
10965 
10966 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10967   predicate(!n->as_LoadStore()->result_not_used());
10968   match(Set newval (GetAndAddI mem newval));
10969   effect(KILL cr);
10970   format %{ "xaddl_lock  $mem, $newval" %}
10971   ins_encode %{
10972     __ lock();
10973     __ xaddl($mem$$Address, $newval$$Register);
10974   %}
10975   ins_pipe(pipe_cmpxchg);
10976 %}
10977 
10978 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10979   predicate(n->as_LoadStore()->result_not_used());
10980   match(Set dummy (GetAndAddL mem add));
10981   effect(KILL cr);
10982   format %{ "addq_lock   $mem, $add" %}
10983   ins_encode %{
10984     __ lock();
10985     __ addq($mem$$Address, $add$$Register);
10986   %}
10987   ins_pipe(pipe_cmpxchg);
10988 %}
10989 
10990 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10991   predicate(n->as_LoadStore()->result_not_used());
10992   match(Set dummy (GetAndAddL mem add));
10993   effect(KILL cr);
10994   format %{ "addq_lock   $mem, $add" %}
10995   ins_encode %{
10996     __ lock();
10997     __ addq($mem$$Address, $add$$constant);
10998   %}
10999   ins_pipe(pipe_cmpxchg);
11000 %}
11001 
11002 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11003   predicate(!n->as_LoadStore()->result_not_used());
11004   match(Set newval (GetAndAddL mem newval));
11005   effect(KILL cr);
11006   format %{ "xaddq_lock  $mem, $newval" %}
11007   ins_encode %{
11008     __ lock();
11009     __ xaddq($mem$$Address, $newval$$Register);
11010   %}
11011   ins_pipe(pipe_cmpxchg);
11012 %}
11013 
11014 instruct xchgB( memory mem, rRegI newval) %{
11015   match(Set newval (GetAndSetB mem newval));
11016   format %{ "XCHGB  $newval,[$mem]" %}
11017   ins_encode %{
11018     __ xchgb($newval$$Register, $mem$$Address);
11019   %}
11020   ins_pipe( pipe_cmpxchg );
11021 %}
11022 
11023 instruct xchgS( memory mem, rRegI newval) %{
11024   match(Set newval (GetAndSetS mem newval));
11025   format %{ "XCHGW  $newval,[$mem]" %}
11026   ins_encode %{
11027     __ xchgw($newval$$Register, $mem$$Address);
11028   %}
11029   ins_pipe( pipe_cmpxchg );
11030 %}
11031 
11032 instruct xchgI( memory mem, rRegI newval) %{
11033   match(Set newval (GetAndSetI mem newval));
11034   format %{ "XCHGL  $newval,[$mem]" %}
11035   ins_encode %{
11036     __ xchgl($newval$$Register, $mem$$Address);
11037   %}
11038   ins_pipe( pipe_cmpxchg );
11039 %}
11040 
11041 instruct xchgL( memory mem, rRegL newval) %{
11042   match(Set newval (GetAndSetL mem newval));
11043   format %{ "XCHGL  $newval,[$mem]" %}
11044   ins_encode %{
11045     __ xchgq($newval$$Register, $mem$$Address);
11046   %}
11047   ins_pipe( pipe_cmpxchg );
11048 %}
11049 
11050 instruct xchgP( memory mem, rRegP newval) %{
11051   match(Set newval (GetAndSetP mem newval));
11052   predicate(n->as_LoadStore()->barrier_data() == 0);
11053   format %{ "XCHGQ  $newval,[$mem]" %}
11054   ins_encode %{
11055     __ xchgq($newval$$Register, $mem$$Address);
11056   %}
11057   ins_pipe( pipe_cmpxchg );
11058 %}
11059 
11060 instruct xchgN( memory mem, rRegN newval) %{
11061   predicate(n->as_LoadStore()->barrier_data() == 0);
11062   match(Set newval (GetAndSetN mem newval));
11063   format %{ "XCHGL  $newval,$mem]" %}
11064   ins_encode %{
11065     __ xchgl($newval$$Register, $mem$$Address);
11066   %}
11067   ins_pipe( pipe_cmpxchg );
11068 %}
11069 
11070 //----------Abs Instructions-------------------------------------------
11071 
11072 // Integer Absolute Instructions
11073 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11074 %{
11075   match(Set dst (AbsI src));
11076   effect(TEMP dst, KILL cr);
11077   format %{ "xorl    $dst, $dst\t# abs int\n\t"
11078             "subl    $dst, $src\n\t"
11079             "cmovll  $dst, $src" %}
11080   ins_encode %{
11081     __ xorl($dst$$Register, $dst$$Register);
11082     __ subl($dst$$Register, $src$$Register);
11083     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11084   %}
11085 
11086   ins_pipe(ialu_reg_reg);
11087 %}
11088 
11089 // Long Absolute Instructions
11090 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11091 %{
11092   match(Set dst (AbsL src));
11093   effect(TEMP dst, KILL cr);
11094   format %{ "xorl    $dst, $dst\t# abs long\n\t"
11095             "subq    $dst, $src\n\t"
11096             "cmovlq  $dst, $src" %}
11097   ins_encode %{
11098     __ xorl($dst$$Register, $dst$$Register);
11099     __ subq($dst$$Register, $src$$Register);
11100     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11101   %}
11102 
11103   ins_pipe(ialu_reg_reg);
11104 %}
11105 
11106 //----------Subtraction Instructions-------------------------------------------
11107 
11108 // Integer Subtraction Instructions
11109 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11110 %{
11111   predicate(!UseAPX);
11112   match(Set dst (SubI dst src));
11113   effect(KILL cr);
11114   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11115 
11116   format %{ "subl    $dst, $src\t# int" %}
11117   ins_encode %{
11118     __ subl($dst$$Register, $src$$Register);
11119   %}
11120   ins_pipe(ialu_reg_reg);
11121 %}
11122 
11123 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11124 %{
11125   predicate(UseAPX);
11126   match(Set dst (SubI src1 src2));
11127   effect(KILL cr);
11128   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11129 
11130   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11131   ins_encode %{
11132     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11133   %}
11134   ins_pipe(ialu_reg_reg);
11135 %}
11136 
11137 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11138 %{
11139   predicate(UseAPX);
11140   match(Set dst (SubI src1 src2));
11141   effect(KILL cr);
11142   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11143 
11144   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11145   ins_encode %{
11146     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11147   %}
11148   ins_pipe(ialu_reg_reg);
11149 %}
11150 
11151 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11152 %{
11153   predicate(UseAPX);
11154   match(Set dst (SubI (LoadI src1) src2));
11155   effect(KILL cr);
11156   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11157 
11158   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11159   ins_encode %{
11160     __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11161   %}
11162   ins_pipe(ialu_reg_reg);
11163 %}
11164 
11165 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11166 %{
11167   predicate(!UseAPX);
11168   match(Set dst (SubI dst (LoadI src)));
11169   effect(KILL cr);
11170   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11171 
11172   ins_cost(150);
11173   format %{ "subl    $dst, $src\t# int" %}
11174   ins_encode %{
11175     __ subl($dst$$Register, $src$$Address);
11176   %}
11177   ins_pipe(ialu_reg_mem);
11178 %}
11179 
11180 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11181 %{
11182   predicate(UseAPX);
11183   match(Set dst (SubI src1 (LoadI src2)));
11184   effect(KILL cr);
11185   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11186 
11187   ins_cost(150);
11188   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11189   ins_encode %{
11190     __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11191   %}
11192   ins_pipe(ialu_reg_mem);
11193 %}
11194 
11195 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11196 %{
11197   predicate(UseAPX);
11198   match(Set dst (SubI (LoadI src1) src2));
11199   effect(KILL cr);
11200   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11201 
11202   ins_cost(150);
11203   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11204   ins_encode %{
11205     __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11206   %}
11207   ins_pipe(ialu_reg_mem);
11208 %}
11209 
11210 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11211 %{
11212   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11213   effect(KILL cr);
11214   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11215 
11216   ins_cost(150);
11217   format %{ "subl    $dst, $src\t# int" %}
11218   ins_encode %{
11219     __ subl($dst$$Address, $src$$Register);
11220   %}
11221   ins_pipe(ialu_mem_reg);
11222 %}
11223 
11224 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11225 %{
11226   predicate(!UseAPX);
11227   match(Set dst (SubL dst src));
11228   effect(KILL cr);
11229   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11230 
11231   format %{ "subq    $dst, $src\t# long" %}
11232   ins_encode %{
11233     __ subq($dst$$Register, $src$$Register);
11234   %}
11235   ins_pipe(ialu_reg_reg);
11236 %}
11237 
11238 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11239 %{
11240   predicate(UseAPX);
11241   match(Set dst (SubL src1 src2));
11242   effect(KILL cr);
11243   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11244 
11245   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11246   ins_encode %{
11247     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11248   %}
11249   ins_pipe(ialu_reg_reg);
11250 %}
11251 
11252 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11253 %{
11254   predicate(UseAPX);
11255   match(Set dst (SubL src1 src2));
11256   effect(KILL cr);
11257   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11258 
11259   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11260   ins_encode %{
11261     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11262   %}
11263   ins_pipe(ialu_reg_reg);
11264 %}
11265 
11266 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11267 %{
11268   predicate(UseAPX);
11269   match(Set dst (SubL (LoadL src1) src2));
11270   effect(KILL cr);
11271   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11272 
11273   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11274   ins_encode %{
11275     __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11276   %}
11277   ins_pipe(ialu_reg_reg);
11278 %}
11279 
11280 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11281 %{
11282   predicate(!UseAPX);
11283   match(Set dst (SubL dst (LoadL src)));
11284   effect(KILL cr);
11285   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11286 
11287   ins_cost(150);
11288   format %{ "subq    $dst, $src\t# long" %}
11289   ins_encode %{
11290     __ subq($dst$$Register, $src$$Address);
11291   %}
11292   ins_pipe(ialu_reg_mem);
11293 %}
11294 
11295 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11296 %{
11297   predicate(UseAPX);
11298   match(Set dst (SubL src1 (LoadL src2)));
11299   effect(KILL cr);
11300   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11301 
11302   ins_cost(150);
11303   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11304   ins_encode %{
11305     __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11306   %}
11307   ins_pipe(ialu_reg_mem);
11308 %}
11309 
11310 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11311 %{
11312   predicate(UseAPX);
11313   match(Set dst (SubL (LoadL src1) src2));
11314   effect(KILL cr);
11315   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11316 
11317   ins_cost(150);
11318   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11319   ins_encode %{
11320     __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11321   %}
11322   ins_pipe(ialu_reg_mem);
11323 %}
11324 
11325 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11326 %{
11327   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11328   effect(KILL cr);
11329   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11330 
11331   ins_cost(150);
11332   format %{ "subq    $dst, $src\t# long" %}
11333   ins_encode %{
11334     __ subq($dst$$Address, $src$$Register);
11335   %}
11336   ins_pipe(ialu_mem_reg);
11337 %}
11338 
11339 // Subtract from a pointer
11340 // XXX hmpf???
11341 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11342 %{
11343   match(Set dst (AddP dst (SubI zero src)));
11344   effect(KILL cr);
11345 
11346   format %{ "subq    $dst, $src\t# ptr - int" %}
11347   ins_encode %{
11348     __ subq($dst$$Register, $src$$Register);
11349   %}
11350   ins_pipe(ialu_reg_reg);
11351 %}
11352 
11353 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11354 %{
11355   predicate(!UseAPX);
11356   match(Set dst (SubI zero dst));
11357   effect(KILL cr);
11358   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11359 
11360   format %{ "negl    $dst\t# int" %}
11361   ins_encode %{
11362     __ negl($dst$$Register);
11363   %}
11364   ins_pipe(ialu_reg);
11365 %}
11366 
11367 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11368 %{
11369   predicate(UseAPX);
11370   match(Set dst (SubI zero src));
11371   effect(KILL cr);
11372   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11373 
11374   format %{ "enegl    $dst, $src\t# int ndd" %}
11375   ins_encode %{
11376     __ enegl($dst$$Register, $src$$Register, false);
11377   %}
11378   ins_pipe(ialu_reg);
11379 %}
11380 
11381 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11382 %{
11383   predicate(!UseAPX);
11384   match(Set dst (NegI dst));
11385   effect(KILL cr);
11386   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11387 
11388   format %{ "negl    $dst\t# int" %}
11389   ins_encode %{
11390     __ negl($dst$$Register);
11391   %}
11392   ins_pipe(ialu_reg);
11393 %}
11394 
11395 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11396 %{
11397   predicate(UseAPX);
11398   match(Set dst (NegI src));
11399   effect(KILL cr);
11400   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11401 
11402   format %{ "enegl    $dst, $src\t# int ndd" %}
11403   ins_encode %{
11404     __ enegl($dst$$Register, $src$$Register, false);
11405   %}
11406   ins_pipe(ialu_reg);
11407 %}
11408 
11409 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11410 %{
11411   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11412   effect(KILL cr);
11413   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11414 
11415   format %{ "negl    $dst\t# int" %}
11416   ins_encode %{
11417     __ negl($dst$$Address);
11418   %}
11419   ins_pipe(ialu_reg);
11420 %}
11421 
11422 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11423 %{
11424   predicate(!UseAPX);
11425   match(Set dst (SubL zero dst));
11426   effect(KILL cr);
11427   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11428 
11429   format %{ "negq    $dst\t# long" %}
11430   ins_encode %{
11431     __ negq($dst$$Register);
11432   %}
11433   ins_pipe(ialu_reg);
11434 %}
11435 
11436 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11437 %{
11438   predicate(UseAPX);
11439   match(Set dst (SubL zero src));
11440   effect(KILL cr);
11441   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11442 
11443   format %{ "enegq    $dst, $src\t# long ndd" %}
11444   ins_encode %{
11445     __ enegq($dst$$Register, $src$$Register, false);
11446   %}
11447   ins_pipe(ialu_reg);
11448 %}
11449 
11450 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11451 %{
11452   predicate(!UseAPX);
11453   match(Set dst (NegL dst));
11454   effect(KILL cr);
11455   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11456 
11457   format %{ "negq    $dst\t# int" %}
11458   ins_encode %{
11459     __ negq($dst$$Register);
11460   %}
11461   ins_pipe(ialu_reg);
11462 %}
11463 
11464 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11465 %{
11466   predicate(UseAPX);
11467   match(Set dst (NegL src));
11468   effect(KILL cr);
11469   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11470 
11471   format %{ "enegq    $dst, $src\t# long ndd" %}
11472   ins_encode %{
11473     __ enegq($dst$$Register, $src$$Register, false);
11474   %}
11475   ins_pipe(ialu_reg);
11476 %}
11477 
11478 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11479 %{
11480   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11481   effect(KILL cr);
11482   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11483 
11484   format %{ "negq    $dst\t# long" %}
11485   ins_encode %{
11486     __ negq($dst$$Address);
11487   %}
11488   ins_pipe(ialu_reg);
11489 %}
11490 
11491 //----------Multiplication/Division Instructions-------------------------------
11492 // Integer Multiplication Instructions
11493 // Multiply Register
11494 
11495 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11496 %{
11497   predicate(!UseAPX);
11498   match(Set dst (MulI dst src));
11499   effect(KILL cr);
11500 
11501   ins_cost(300);
11502   format %{ "imull   $dst, $src\t# int" %}
11503   ins_encode %{
11504     __ imull($dst$$Register, $src$$Register);
11505   %}
11506   ins_pipe(ialu_reg_reg_alu0);
11507 %}
11508 
11509 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11510 %{
11511   predicate(UseAPX);
11512   match(Set dst (MulI src1 src2));
11513   effect(KILL cr);
11514   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11515 
11516   ins_cost(300);
11517   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11518   ins_encode %{
11519     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11520   %}
11521   ins_pipe(ialu_reg_reg_alu0);
11522 %}
11523 
11524 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11525 %{
11526   match(Set dst (MulI src imm));
11527   effect(KILL cr);
11528 
11529   ins_cost(300);
11530   format %{ "imull   $dst, $src, $imm\t# int" %}
11531   ins_encode %{
11532     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11533   %}
11534   ins_pipe(ialu_reg_reg_alu0);
11535 %}
11536 
11537 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11538 %{
11539   predicate(!UseAPX);
11540   match(Set dst (MulI dst (LoadI src)));
11541   effect(KILL cr);
11542 
11543   ins_cost(350);
11544   format %{ "imull   $dst, $src\t# int" %}
11545   ins_encode %{
11546     __ imull($dst$$Register, $src$$Address);
11547   %}
11548   ins_pipe(ialu_reg_mem_alu0);
11549 %}
11550 
11551 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11552 %{
11553   predicate(UseAPX);
11554   match(Set dst (MulI src1 (LoadI src2)));
11555   effect(KILL cr);
11556   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11557 
11558   ins_cost(350);
11559   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11560   ins_encode %{
11561     __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11562   %}
11563   ins_pipe(ialu_reg_mem_alu0);
11564 %}
11565 
11566 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11567 %{
11568   match(Set dst (MulI (LoadI src) imm));
11569   effect(KILL cr);
11570 
11571   ins_cost(300);
11572   format %{ "imull   $dst, $src, $imm\t# int" %}
11573   ins_encode %{
11574     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11575   %}
11576   ins_pipe(ialu_reg_mem_alu0);
11577 %}
11578 
11579 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11580 %{
11581   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11582   effect(KILL cr, KILL src2);
11583 
11584   expand %{ mulI_rReg(dst, src1, cr);
11585            mulI_rReg(src2, src3, cr);
11586            addI_rReg(dst, src2, cr); %}
11587 %}
11588 
11589 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11590 %{
11591   predicate(!UseAPX);
11592   match(Set dst (MulL dst src));
11593   effect(KILL cr);
11594 
11595   ins_cost(300);
11596   format %{ "imulq   $dst, $src\t# long" %}
11597   ins_encode %{
11598     __ imulq($dst$$Register, $src$$Register);
11599   %}
11600   ins_pipe(ialu_reg_reg_alu0);
11601 %}
11602 
11603 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11604 %{
11605   predicate(UseAPX);
11606   match(Set dst (MulL src1 src2));
11607   effect(KILL cr);
11608   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11609 
11610   ins_cost(300);
11611   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11612   ins_encode %{
11613     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11614   %}
11615   ins_pipe(ialu_reg_reg_alu0);
11616 %}
11617 
11618 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11619 %{
11620   match(Set dst (MulL src imm));
11621   effect(KILL cr);
11622 
11623   ins_cost(300);
11624   format %{ "imulq   $dst, $src, $imm\t# long" %}
11625   ins_encode %{
11626     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11627   %}
11628   ins_pipe(ialu_reg_reg_alu0);
11629 %}
11630 
11631 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11632 %{
11633   predicate(!UseAPX);
11634   match(Set dst (MulL dst (LoadL src)));
11635   effect(KILL cr);
11636 
11637   ins_cost(350);
11638   format %{ "imulq   $dst, $src\t# long" %}
11639   ins_encode %{
11640     __ imulq($dst$$Register, $src$$Address);
11641   %}
11642   ins_pipe(ialu_reg_mem_alu0);
11643 %}
11644 
11645 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11646 %{
11647   predicate(UseAPX);
11648   match(Set dst (MulL src1 (LoadL src2)));
11649   effect(KILL cr);
11650   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11651 
11652   ins_cost(350);
11653   format %{ "eimulq   $dst, $src1, $src2 \t# long" %}
11654   ins_encode %{
11655     __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11656   %}
11657   ins_pipe(ialu_reg_mem_alu0);
11658 %}
11659 
11660 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11661 %{
11662   match(Set dst (MulL (LoadL src) imm));
11663   effect(KILL cr);
11664 
11665   ins_cost(300);
11666   format %{ "imulq   $dst, $src, $imm\t# long" %}
11667   ins_encode %{
11668     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11669   %}
11670   ins_pipe(ialu_reg_mem_alu0);
11671 %}
11672 
11673 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11674 %{
11675   match(Set dst (MulHiL src rax));
11676   effect(USE_KILL rax, KILL cr);
11677 
11678   ins_cost(300);
11679   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11680   ins_encode %{
11681     __ imulq($src$$Register);
11682   %}
11683   ins_pipe(ialu_reg_reg_alu0);
11684 %}
11685 
11686 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11687 %{
11688   match(Set dst (UMulHiL src rax));
11689   effect(USE_KILL rax, KILL cr);
11690 
11691   ins_cost(300);
11692   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11693   ins_encode %{
11694     __ mulq($src$$Register);
11695   %}
11696   ins_pipe(ialu_reg_reg_alu0);
11697 %}
11698 
11699 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11700                    rFlagsReg cr)
11701 %{
11702   match(Set rax (DivI rax div));
11703   effect(KILL rdx, KILL cr);
11704 
11705   ins_cost(30*100+10*100); // XXX
11706   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11707             "jne,s   normal\n\t"
11708             "xorl    rdx, rdx\n\t"
11709             "cmpl    $div, -1\n\t"
11710             "je,s    done\n"
11711     "normal: cdql\n\t"
11712             "idivl   $div\n"
11713     "done:"        %}
11714   ins_encode(cdql_enc(div));
11715   ins_pipe(ialu_reg_reg_alu0);
11716 %}
11717 
11718 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11719                    rFlagsReg cr)
11720 %{
11721   match(Set rax (DivL rax div));
11722   effect(KILL rdx, KILL cr);
11723 
11724   ins_cost(30*100+10*100); // XXX
11725   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11726             "cmpq    rax, rdx\n\t"
11727             "jne,s   normal\n\t"
11728             "xorl    rdx, rdx\n\t"
11729             "cmpq    $div, -1\n\t"
11730             "je,s    done\n"
11731     "normal: cdqq\n\t"
11732             "idivq   $div\n"
11733     "done:"        %}
11734   ins_encode(cdqq_enc(div));
11735   ins_pipe(ialu_reg_reg_alu0);
11736 %}
11737 
11738 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11739 %{
11740   match(Set rax (UDivI rax div));
11741   effect(KILL rdx, KILL cr);
11742 
11743   ins_cost(300);
11744   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11745   ins_encode %{
11746     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11747   %}
11748   ins_pipe(ialu_reg_reg_alu0);
11749 %}
11750 
11751 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11752 %{
11753   match(Set rax (UDivL rax div));
11754   effect(KILL rdx, KILL cr);
11755 
11756   ins_cost(300);
11757   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11758   ins_encode %{
11759      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11760   %}
11761   ins_pipe(ialu_reg_reg_alu0);
11762 %}
11763 
11764 // Integer DIVMOD with Register, both quotient and mod results
11765 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11766                              rFlagsReg cr)
11767 %{
11768   match(DivModI rax div);
11769   effect(KILL cr);
11770 
11771   ins_cost(30*100+10*100); // XXX
11772   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11773             "jne,s   normal\n\t"
11774             "xorl    rdx, rdx\n\t"
11775             "cmpl    $div, -1\n\t"
11776             "je,s    done\n"
11777     "normal: cdql\n\t"
11778             "idivl   $div\n"
11779     "done:"        %}
11780   ins_encode(cdql_enc(div));
11781   ins_pipe(pipe_slow);
11782 %}
11783 
11784 // Long DIVMOD with Register, both quotient and mod results
11785 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11786                              rFlagsReg cr)
11787 %{
11788   match(DivModL rax div);
11789   effect(KILL cr);
11790 
11791   ins_cost(30*100+10*100); // XXX
11792   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11793             "cmpq    rax, rdx\n\t"
11794             "jne,s   normal\n\t"
11795             "xorl    rdx, rdx\n\t"
11796             "cmpq    $div, -1\n\t"
11797             "je,s    done\n"
11798     "normal: cdqq\n\t"
11799             "idivq   $div\n"
11800     "done:"        %}
11801   ins_encode(cdqq_enc(div));
11802   ins_pipe(pipe_slow);
11803 %}
11804 
11805 // Unsigned integer DIVMOD with Register, both quotient and mod results
11806 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11807                               no_rax_rdx_RegI div, rFlagsReg cr)
11808 %{
11809   match(UDivModI rax div);
11810   effect(TEMP tmp, KILL cr);
11811 
11812   ins_cost(300);
11813   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11814             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11815           %}
11816   ins_encode %{
11817     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11818   %}
11819   ins_pipe(pipe_slow);
11820 %}
11821 
11822 // Unsigned long DIVMOD with Register, both quotient and mod results
11823 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11824                               no_rax_rdx_RegL div, rFlagsReg cr)
11825 %{
11826   match(UDivModL rax div);
11827   effect(TEMP tmp, KILL cr);
11828 
11829   ins_cost(300);
11830   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11831             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11832           %}
11833   ins_encode %{
11834     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11835   %}
11836   ins_pipe(pipe_slow);
11837 %}
11838 
11839 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11840                    rFlagsReg cr)
11841 %{
11842   match(Set rdx (ModI rax div));
11843   effect(KILL rax, KILL cr);
11844 
11845   ins_cost(300); // XXX
11846   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11847             "jne,s   normal\n\t"
11848             "xorl    rdx, rdx\n\t"
11849             "cmpl    $div, -1\n\t"
11850             "je,s    done\n"
11851     "normal: cdql\n\t"
11852             "idivl   $div\n"
11853     "done:"        %}
11854   ins_encode(cdql_enc(div));
11855   ins_pipe(ialu_reg_reg_alu0);
11856 %}
11857 
11858 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11859                    rFlagsReg cr)
11860 %{
11861   match(Set rdx (ModL rax div));
11862   effect(KILL rax, KILL cr);
11863 
11864   ins_cost(300); // XXX
11865   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11866             "cmpq    rax, rdx\n\t"
11867             "jne,s   normal\n\t"
11868             "xorl    rdx, rdx\n\t"
11869             "cmpq    $div, -1\n\t"
11870             "je,s    done\n"
11871     "normal: cdqq\n\t"
11872             "idivq   $div\n"
11873     "done:"        %}
11874   ins_encode(cdqq_enc(div));
11875   ins_pipe(ialu_reg_reg_alu0);
11876 %}
11877 
11878 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11879 %{
11880   match(Set rdx (UModI rax div));
11881   effect(KILL rax, KILL cr);
11882 
11883   ins_cost(300);
11884   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11885   ins_encode %{
11886     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11887   %}
11888   ins_pipe(ialu_reg_reg_alu0);
11889 %}
11890 
11891 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11892 %{
11893   match(Set rdx (UModL rax div));
11894   effect(KILL rax, KILL cr);
11895 
11896   ins_cost(300);
11897   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11898   ins_encode %{
11899     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11900   %}
11901   ins_pipe(ialu_reg_reg_alu0);
11902 %}
11903 
11904 // Integer Shift Instructions
11905 // Shift Left by one, two, three
11906 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11907 %{
11908   predicate(!UseAPX);
11909   match(Set dst (LShiftI dst shift));
11910   effect(KILL cr);
11911 
11912   format %{ "sall    $dst, $shift" %}
11913   ins_encode %{
11914     __ sall($dst$$Register, $shift$$constant);
11915   %}
11916   ins_pipe(ialu_reg);
11917 %}
11918 
11919 // Shift Left by one, two, three
11920 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11921 %{
11922   predicate(UseAPX);
11923   match(Set dst (LShiftI src shift));
11924   effect(KILL cr);
11925   flag(PD::Flag_ndd_demotable_opr1);
11926 
11927   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
11928   ins_encode %{
11929     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11930   %}
11931   ins_pipe(ialu_reg);
11932 %}
11933 
11934 // Shift Left by 8-bit immediate
11935 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11936 %{
11937   predicate(!UseAPX);
11938   match(Set dst (LShiftI dst shift));
11939   effect(KILL cr);
11940 
11941   format %{ "sall    $dst, $shift" %}
11942   ins_encode %{
11943     __ sall($dst$$Register, $shift$$constant);
11944   %}
11945   ins_pipe(ialu_reg);
11946 %}
11947 
11948 // Shift Left by 8-bit immediate
11949 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11950 %{
11951   predicate(UseAPX);
11952   match(Set dst (LShiftI src shift));
11953   effect(KILL cr);
11954   flag(PD::Flag_ndd_demotable_opr1);
11955 
11956   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11957   ins_encode %{
11958     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11959   %}
11960   ins_pipe(ialu_reg);
11961 %}
11962 
11963 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11964 %{
11965   predicate(UseAPX);
11966   match(Set dst (LShiftI (LoadI src) shift));
11967   effect(KILL cr);
11968 
11969   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11970   ins_encode %{
11971     __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11972   %}
11973   ins_pipe(ialu_reg);
11974 %}
11975 
11976 // Shift Left by 8-bit immediate
11977 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11978 %{
11979   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11980   effect(KILL cr);
11981 
11982   format %{ "sall    $dst, $shift" %}
11983   ins_encode %{
11984     __ sall($dst$$Address, $shift$$constant);
11985   %}
11986   ins_pipe(ialu_mem_imm);
11987 %}
11988 
11989 // Shift Left by variable
11990 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11991 %{
11992   predicate(!VM_Version::supports_bmi2());
11993   match(Set dst (LShiftI dst shift));
11994   effect(KILL cr);
11995 
11996   format %{ "sall    $dst, $shift" %}
11997   ins_encode %{
11998     __ sall($dst$$Register);
11999   %}
12000   ins_pipe(ialu_reg_reg);
12001 %}
12002 
12003 // Shift Left by variable
12004 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12005 %{
12006   predicate(!VM_Version::supports_bmi2());
12007   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12008   effect(KILL cr);
12009 
12010   format %{ "sall    $dst, $shift" %}
12011   ins_encode %{
12012     __ sall($dst$$Address);
12013   %}
12014   ins_pipe(ialu_mem_reg);
12015 %}
12016 
12017 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12018 %{
12019   predicate(VM_Version::supports_bmi2());
12020   match(Set dst (LShiftI src shift));
12021 
12022   format %{ "shlxl   $dst, $src, $shift" %}
12023   ins_encode %{
12024     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12025   %}
12026   ins_pipe(ialu_reg_reg);
12027 %}
12028 
12029 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12030 %{
12031   predicate(VM_Version::supports_bmi2());
12032   match(Set dst (LShiftI (LoadI src) shift));
12033   ins_cost(175);
12034   format %{ "shlxl   $dst, $src, $shift" %}
12035   ins_encode %{
12036     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12037   %}
12038   ins_pipe(ialu_reg_mem);
12039 %}
12040 
12041 // Arithmetic Shift Right by 8-bit immediate
12042 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12043 %{
12044   predicate(!UseAPX);
12045   match(Set dst (RShiftI dst shift));
12046   effect(KILL cr);
12047 
12048   format %{ "sarl    $dst, $shift" %}
12049   ins_encode %{
12050     __ sarl($dst$$Register, $shift$$constant);
12051   %}
12052   ins_pipe(ialu_mem_imm);
12053 %}
12054 
12055 // Arithmetic Shift Right by 8-bit immediate
12056 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12057 %{
12058   predicate(UseAPX);
12059   match(Set dst (RShiftI src shift));
12060   effect(KILL cr);
12061   flag(PD::Flag_ndd_demotable_opr1);
12062 
12063   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12064   ins_encode %{
12065     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12066   %}
12067   ins_pipe(ialu_mem_imm);
12068 %}
12069 
12070 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12071 %{
12072   predicate(UseAPX);
12073   match(Set dst (RShiftI (LoadI src) shift));
12074   effect(KILL cr);
12075 
12076   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12077   ins_encode %{
12078     __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12079   %}
12080   ins_pipe(ialu_mem_imm);
12081 %}
12082 
12083 // Arithmetic Shift Right by 8-bit immediate
12084 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12085 %{
12086   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12087   effect(KILL cr);
12088 
12089   format %{ "sarl    $dst, $shift" %}
12090   ins_encode %{
12091     __ sarl($dst$$Address, $shift$$constant);
12092   %}
12093   ins_pipe(ialu_mem_imm);
12094 %}
12095 
12096 // Arithmetic Shift Right by variable
12097 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12098 %{
12099   predicate(!VM_Version::supports_bmi2());
12100   match(Set dst (RShiftI dst shift));
12101   effect(KILL cr);
12102 
12103   format %{ "sarl    $dst, $shift" %}
12104   ins_encode %{
12105     __ sarl($dst$$Register);
12106   %}
12107   ins_pipe(ialu_reg_reg);
12108 %}
12109 
12110 // Arithmetic Shift Right by variable
12111 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12112 %{
12113   predicate(!VM_Version::supports_bmi2());
12114   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12115   effect(KILL cr);
12116 
12117   format %{ "sarl    $dst, $shift" %}
12118   ins_encode %{
12119     __ sarl($dst$$Address);
12120   %}
12121   ins_pipe(ialu_mem_reg);
12122 %}
12123 
12124 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12125 %{
12126   predicate(VM_Version::supports_bmi2());
12127   match(Set dst (RShiftI src shift));
12128 
12129   format %{ "sarxl   $dst, $src, $shift" %}
12130   ins_encode %{
12131     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12132   %}
12133   ins_pipe(ialu_reg_reg);
12134 %}
12135 
12136 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12137 %{
12138   predicate(VM_Version::supports_bmi2());
12139   match(Set dst (RShiftI (LoadI src) shift));
12140   ins_cost(175);
12141   format %{ "sarxl   $dst, $src, $shift" %}
12142   ins_encode %{
12143     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12144   %}
12145   ins_pipe(ialu_reg_mem);
12146 %}
12147 
12148 // Logical Shift Right by 8-bit immediate
12149 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12150 %{
12151   predicate(!UseAPX);
12152   match(Set dst (URShiftI dst shift));
12153   effect(KILL cr);
12154 
12155   format %{ "shrl    $dst, $shift" %}
12156   ins_encode %{
12157     __ shrl($dst$$Register, $shift$$constant);
12158   %}
12159   ins_pipe(ialu_reg);
12160 %}
12161 
12162 // Logical Shift Right by 8-bit immediate
12163 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12164 %{
12165   predicate(UseAPX);
12166   match(Set dst (URShiftI src shift));
12167   effect(KILL cr);
12168   flag(PD::Flag_ndd_demotable_opr1);
12169 
12170   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12171   ins_encode %{
12172     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12173   %}
12174   ins_pipe(ialu_reg);
12175 %}
12176 
12177 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12178 %{
12179   predicate(UseAPX);
12180   match(Set dst (URShiftI (LoadI src) shift));
12181   effect(KILL cr);
12182 
12183   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12184   ins_encode %{
12185     __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12186   %}
12187   ins_pipe(ialu_reg);
12188 %}
12189 
12190 // Logical Shift Right by 8-bit immediate
12191 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12192 %{
12193   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12194   effect(KILL cr);
12195 
12196   format %{ "shrl    $dst, $shift" %}
12197   ins_encode %{
12198     __ shrl($dst$$Address, $shift$$constant);
12199   %}
12200   ins_pipe(ialu_mem_imm);
12201 %}
12202 
12203 // Logical Shift Right by variable
12204 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12205 %{
12206   predicate(!VM_Version::supports_bmi2());
12207   match(Set dst (URShiftI dst shift));
12208   effect(KILL cr);
12209 
12210   format %{ "shrl    $dst, $shift" %}
12211   ins_encode %{
12212     __ shrl($dst$$Register);
12213   %}
12214   ins_pipe(ialu_reg_reg);
12215 %}
12216 
12217 // Logical Shift Right by variable
12218 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12219 %{
12220   predicate(!VM_Version::supports_bmi2());
12221   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12222   effect(KILL cr);
12223 
12224   format %{ "shrl    $dst, $shift" %}
12225   ins_encode %{
12226     __ shrl($dst$$Address);
12227   %}
12228   ins_pipe(ialu_mem_reg);
12229 %}
12230 
12231 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12232 %{
12233   predicate(VM_Version::supports_bmi2());
12234   match(Set dst (URShiftI src shift));
12235 
12236   format %{ "shrxl   $dst, $src, $shift" %}
12237   ins_encode %{
12238     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12239   %}
12240   ins_pipe(ialu_reg_reg);
12241 %}
12242 
12243 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12244 %{
12245   predicate(VM_Version::supports_bmi2());
12246   match(Set dst (URShiftI (LoadI src) shift));
12247   ins_cost(175);
12248   format %{ "shrxl   $dst, $src, $shift" %}
12249   ins_encode %{
12250     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12251   %}
12252   ins_pipe(ialu_reg_mem);
12253 %}
12254 
12255 // Long Shift Instructions
12256 // Shift Left by one, two, three
12257 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12258 %{
12259   predicate(!UseAPX);
12260   match(Set dst (LShiftL dst shift));
12261   effect(KILL cr);
12262 
12263   format %{ "salq    $dst, $shift" %}
12264   ins_encode %{
12265     __ salq($dst$$Register, $shift$$constant);
12266   %}
12267   ins_pipe(ialu_reg);
12268 %}
12269 
12270 // Shift Left by one, two, three
12271 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12272 %{
12273   predicate(UseAPX);
12274   match(Set dst (LShiftL src shift));
12275   effect(KILL cr);
12276   flag(PD::Flag_ndd_demotable_opr1);
12277 
12278   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12279   ins_encode %{
12280     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12281   %}
12282   ins_pipe(ialu_reg);
12283 %}
12284 
12285 // Shift Left by 8-bit immediate
12286 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12287 %{
12288   predicate(!UseAPX);
12289   match(Set dst (LShiftL dst shift));
12290   effect(KILL cr);
12291 
12292   format %{ "salq    $dst, $shift" %}
12293   ins_encode %{
12294     __ salq($dst$$Register, $shift$$constant);
12295   %}
12296   ins_pipe(ialu_reg);
12297 %}
12298 
12299 // Shift Left by 8-bit immediate
12300 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12301 %{
12302   predicate(UseAPX);
12303   match(Set dst (LShiftL src shift));
12304   effect(KILL cr);
12305   flag(PD::Flag_ndd_demotable_opr1);
12306 
12307   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12308   ins_encode %{
12309     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12310   %}
12311   ins_pipe(ialu_reg);
12312 %}
12313 
12314 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12315 %{
12316   predicate(UseAPX);
12317   match(Set dst (LShiftL (LoadL src) shift));
12318   effect(KILL cr);
12319 
12320   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12321   ins_encode %{
12322     __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12323   %}
12324   ins_pipe(ialu_reg);
12325 %}
12326 
12327 // Shift Left by 8-bit immediate
12328 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12329 %{
12330   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12331   effect(KILL cr);
12332 
12333   format %{ "salq    $dst, $shift" %}
12334   ins_encode %{
12335     __ salq($dst$$Address, $shift$$constant);
12336   %}
12337   ins_pipe(ialu_mem_imm);
12338 %}
12339 
12340 // Shift Left by variable
12341 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12342 %{
12343   predicate(!VM_Version::supports_bmi2());
12344   match(Set dst (LShiftL dst shift));
12345   effect(KILL cr);
12346 
12347   format %{ "salq    $dst, $shift" %}
12348   ins_encode %{
12349     __ salq($dst$$Register);
12350   %}
12351   ins_pipe(ialu_reg_reg);
12352 %}
12353 
12354 // Shift Left by variable
12355 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12356 %{
12357   predicate(!VM_Version::supports_bmi2());
12358   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12359   effect(KILL cr);
12360 
12361   format %{ "salq    $dst, $shift" %}
12362   ins_encode %{
12363     __ salq($dst$$Address);
12364   %}
12365   ins_pipe(ialu_mem_reg);
12366 %}
12367 
12368 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12369 %{
12370   predicate(VM_Version::supports_bmi2());
12371   match(Set dst (LShiftL src shift));
12372 
12373   format %{ "shlxq   $dst, $src, $shift" %}
12374   ins_encode %{
12375     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12376   %}
12377   ins_pipe(ialu_reg_reg);
12378 %}
12379 
12380 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12381 %{
12382   predicate(VM_Version::supports_bmi2());
12383   match(Set dst (LShiftL (LoadL src) shift));
12384   ins_cost(175);
12385   format %{ "shlxq   $dst, $src, $shift" %}
12386   ins_encode %{
12387     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12388   %}
12389   ins_pipe(ialu_reg_mem);
12390 %}
12391 
12392 // Arithmetic Shift Right by 8-bit immediate
12393 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12394 %{
12395   predicate(!UseAPX);
12396   match(Set dst (RShiftL dst shift));
12397   effect(KILL cr);
12398 
12399   format %{ "sarq    $dst, $shift" %}
12400   ins_encode %{
12401     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12402   %}
12403   ins_pipe(ialu_mem_imm);
12404 %}
12405 
12406 // Arithmetic Shift Right by 8-bit immediate
12407 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12408 %{
12409   predicate(UseAPX);
12410   match(Set dst (RShiftL src shift));
12411   effect(KILL cr);
12412   flag(PD::Flag_ndd_demotable_opr1);
12413 
12414   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12415   ins_encode %{
12416     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12417   %}
12418   ins_pipe(ialu_mem_imm);
12419 %}
12420 
12421 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12422 %{
12423   predicate(UseAPX);
12424   match(Set dst (RShiftL (LoadL src) shift));
12425   effect(KILL cr);
12426 
12427   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12428   ins_encode %{
12429     __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12430   %}
12431   ins_pipe(ialu_mem_imm);
12432 %}
12433 
12434 // Arithmetic Shift Right by 8-bit immediate
12435 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12436 %{
12437   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12438   effect(KILL cr);
12439 
12440   format %{ "sarq    $dst, $shift" %}
12441   ins_encode %{
12442     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12443   %}
12444   ins_pipe(ialu_mem_imm);
12445 %}
12446 
12447 // Arithmetic Shift Right by variable
12448 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12449 %{
12450   predicate(!VM_Version::supports_bmi2());
12451   match(Set dst (RShiftL dst shift));
12452   effect(KILL cr);
12453 
12454   format %{ "sarq    $dst, $shift" %}
12455   ins_encode %{
12456     __ sarq($dst$$Register);
12457   %}
12458   ins_pipe(ialu_reg_reg);
12459 %}
12460 
12461 // Arithmetic Shift Right by variable
12462 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12463 %{
12464   predicate(!VM_Version::supports_bmi2());
12465   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12466   effect(KILL cr);
12467 
12468   format %{ "sarq    $dst, $shift" %}
12469   ins_encode %{
12470     __ sarq($dst$$Address);
12471   %}
12472   ins_pipe(ialu_mem_reg);
12473 %}
12474 
12475 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12476 %{
12477   predicate(VM_Version::supports_bmi2());
12478   match(Set dst (RShiftL src shift));
12479 
12480   format %{ "sarxq   $dst, $src, $shift" %}
12481   ins_encode %{
12482     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12483   %}
12484   ins_pipe(ialu_reg_reg);
12485 %}
12486 
12487 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12488 %{
12489   predicate(VM_Version::supports_bmi2());
12490   match(Set dst (RShiftL (LoadL src) shift));
12491   ins_cost(175);
12492   format %{ "sarxq   $dst, $src, $shift" %}
12493   ins_encode %{
12494     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12495   %}
12496   ins_pipe(ialu_reg_mem);
12497 %}
12498 
12499 // Logical Shift Right by 8-bit immediate
12500 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12501 %{
12502   predicate(!UseAPX);
12503   match(Set dst (URShiftL dst shift));
12504   effect(KILL cr);
12505 
12506   format %{ "shrq    $dst, $shift" %}
12507   ins_encode %{
12508     __ shrq($dst$$Register, $shift$$constant);
12509   %}
12510   ins_pipe(ialu_reg);
12511 %}
12512 
12513 // Logical Shift Right by 8-bit immediate
12514 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12515 %{
12516   predicate(UseAPX);
12517   match(Set dst (URShiftL src shift));
12518   effect(KILL cr);
12519   flag(PD::Flag_ndd_demotable_opr1);
12520 
12521   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12522   ins_encode %{
12523     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12524   %}
12525   ins_pipe(ialu_reg);
12526 %}
12527 
12528 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12529 %{
12530   predicate(UseAPX);
12531   match(Set dst (URShiftL (LoadL src) shift));
12532   effect(KILL cr);
12533 
12534   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12535   ins_encode %{
12536     __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12537   %}
12538   ins_pipe(ialu_reg);
12539 %}
12540 
12541 // Logical Shift Right by 8-bit immediate
12542 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12543 %{
12544   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12545   effect(KILL cr);
12546 
12547   format %{ "shrq    $dst, $shift" %}
12548   ins_encode %{
12549     __ shrq($dst$$Address, $shift$$constant);
12550   %}
12551   ins_pipe(ialu_mem_imm);
12552 %}
12553 
12554 // Logical Shift Right by variable
12555 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12556 %{
12557   predicate(!VM_Version::supports_bmi2());
12558   match(Set dst (URShiftL dst shift));
12559   effect(KILL cr);
12560 
12561   format %{ "shrq    $dst, $shift" %}
12562   ins_encode %{
12563     __ shrq($dst$$Register);
12564   %}
12565   ins_pipe(ialu_reg_reg);
12566 %}
12567 
12568 // Logical Shift Right by variable
12569 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12570 %{
12571   predicate(!VM_Version::supports_bmi2());
12572   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12573   effect(KILL cr);
12574 
12575   format %{ "shrq    $dst, $shift" %}
12576   ins_encode %{
12577     __ shrq($dst$$Address);
12578   %}
12579   ins_pipe(ialu_mem_reg);
12580 %}
12581 
12582 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12583 %{
12584   predicate(VM_Version::supports_bmi2());
12585   match(Set dst (URShiftL src shift));
12586 
12587   format %{ "shrxq   $dst, $src, $shift" %}
12588   ins_encode %{
12589     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12590   %}
12591   ins_pipe(ialu_reg_reg);
12592 %}
12593 
12594 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12595 %{
12596   predicate(VM_Version::supports_bmi2());
12597   match(Set dst (URShiftL (LoadL src) shift));
12598   ins_cost(175);
12599   format %{ "shrxq   $dst, $src, $shift" %}
12600   ins_encode %{
12601     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12602   %}
12603   ins_pipe(ialu_reg_mem);
12604 %}
12605 
12606 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12607 // This idiom is used by the compiler for the i2b bytecode.
12608 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12609 %{
12610   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12611 
12612   format %{ "movsbl  $dst, $src\t# i2b" %}
12613   ins_encode %{
12614     __ movsbl($dst$$Register, $src$$Register);
12615   %}
12616   ins_pipe(ialu_reg_reg);
12617 %}
12618 
12619 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12620 // This idiom is used by the compiler the i2s bytecode.
12621 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12622 %{
12623   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12624 
12625   format %{ "movswl  $dst, $src\t# i2s" %}
12626   ins_encode %{
12627     __ movswl($dst$$Register, $src$$Register);
12628   %}
12629   ins_pipe(ialu_reg_reg);
12630 %}
12631 
12632 // ROL/ROR instructions
12633 
12634 // Rotate left by constant.
12635 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12636 %{
12637   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12638   match(Set dst (RotateLeft dst shift));
12639   effect(KILL cr);
12640   format %{ "roll    $dst, $shift" %}
12641   ins_encode %{
12642     __ roll($dst$$Register, $shift$$constant);
12643   %}
12644   ins_pipe(ialu_reg);
12645 %}
12646 
12647 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12648 %{
12649   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12650   match(Set dst (RotateLeft src shift));
12651   format %{ "rolxl   $dst, $src, $shift" %}
12652   ins_encode %{
12653     int shift = 32 - ($shift$$constant & 31);
12654     __ rorxl($dst$$Register, $src$$Register, shift);
12655   %}
12656   ins_pipe(ialu_reg_reg);
12657 %}
12658 
12659 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12660 %{
12661   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12662   match(Set dst (RotateLeft (LoadI src) shift));
12663   ins_cost(175);
12664   format %{ "rolxl   $dst, $src, $shift" %}
12665   ins_encode %{
12666     int shift = 32 - ($shift$$constant & 31);
12667     __ rorxl($dst$$Register, $src$$Address, shift);
12668   %}
12669   ins_pipe(ialu_reg_mem);
12670 %}
12671 
12672 // Rotate Left by variable
12673 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12674 %{
12675   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12676   match(Set dst (RotateLeft dst shift));
12677   effect(KILL cr);
12678   format %{ "roll    $dst, $shift" %}
12679   ins_encode %{
12680     __ roll($dst$$Register);
12681   %}
12682   ins_pipe(ialu_reg_reg);
12683 %}
12684 
12685 // Rotate Left by variable
12686 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12687 %{
12688   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12689   match(Set dst (RotateLeft src shift));
12690   effect(KILL cr);
12691   flag(PD::Flag_ndd_demotable_opr1);
12692 
12693   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12694   ins_encode %{
12695     __ eroll($dst$$Register, $src$$Register, false);
12696   %}
12697   ins_pipe(ialu_reg_reg);
12698 %}
12699 
12700 // Rotate Right by constant.
12701 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12702 %{
12703   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12704   match(Set dst (RotateRight dst shift));
12705   effect(KILL cr);
12706   format %{ "rorl    $dst, $shift" %}
12707   ins_encode %{
12708     __ rorl($dst$$Register, $shift$$constant);
12709   %}
12710   ins_pipe(ialu_reg);
12711 %}
12712 
12713 // Rotate Right by constant.
12714 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12715 %{
12716   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12717   match(Set dst (RotateRight src shift));
12718   format %{ "rorxl   $dst, $src, $shift" %}
12719   ins_encode %{
12720     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12721   %}
12722   ins_pipe(ialu_reg_reg);
12723 %}
12724 
12725 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12726 %{
12727   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12728   match(Set dst (RotateRight (LoadI src) shift));
12729   ins_cost(175);
12730   format %{ "rorxl   $dst, $src, $shift" %}
12731   ins_encode %{
12732     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12733   %}
12734   ins_pipe(ialu_reg_mem);
12735 %}
12736 
12737 // Rotate Right by variable
12738 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12739 %{
12740   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12741   match(Set dst (RotateRight dst shift));
12742   effect(KILL cr);
12743   format %{ "rorl    $dst, $shift" %}
12744   ins_encode %{
12745     __ rorl($dst$$Register);
12746   %}
12747   ins_pipe(ialu_reg_reg);
12748 %}
12749 
12750 // Rotate Right by variable
12751 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12752 %{
12753   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12754   match(Set dst (RotateRight src shift));
12755   effect(KILL cr);
12756   flag(PD::Flag_ndd_demotable_opr1);
12757 
12758   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12759   ins_encode %{
12760     __ erorl($dst$$Register, $src$$Register, false);
12761   %}
12762   ins_pipe(ialu_reg_reg);
12763 %}
12764 
12765 // Rotate Left by constant.
12766 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12767 %{
12768   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12769   match(Set dst (RotateLeft dst shift));
12770   effect(KILL cr);
12771   format %{ "rolq    $dst, $shift" %}
12772   ins_encode %{
12773     __ rolq($dst$$Register, $shift$$constant);
12774   %}
12775   ins_pipe(ialu_reg);
12776 %}
12777 
12778 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12779 %{
12780   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12781   match(Set dst (RotateLeft src shift));
12782   format %{ "rolxq   $dst, $src, $shift" %}
12783   ins_encode %{
12784     int shift = 64 - ($shift$$constant & 63);
12785     __ rorxq($dst$$Register, $src$$Register, shift);
12786   %}
12787   ins_pipe(ialu_reg_reg);
12788 %}
12789 
12790 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12791 %{
12792   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12793   match(Set dst (RotateLeft (LoadL src) shift));
12794   ins_cost(175);
12795   format %{ "rolxq   $dst, $src, $shift" %}
12796   ins_encode %{
12797     int shift = 64 - ($shift$$constant & 63);
12798     __ rorxq($dst$$Register, $src$$Address, shift);
12799   %}
12800   ins_pipe(ialu_reg_mem);
12801 %}
12802 
12803 // Rotate Left by variable
12804 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12805 %{
12806   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12807   match(Set dst (RotateLeft dst shift));
12808   effect(KILL cr);
12809 
12810   format %{ "rolq    $dst, $shift" %}
12811   ins_encode %{
12812     __ rolq($dst$$Register);
12813   %}
12814   ins_pipe(ialu_reg_reg);
12815 %}
12816 
12817 // Rotate Left by variable
12818 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12819 %{
12820   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12821   match(Set dst (RotateLeft src shift));
12822   effect(KILL cr);
12823   flag(PD::Flag_ndd_demotable_opr1);
12824 
12825   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12826   ins_encode %{
12827     __ erolq($dst$$Register, $src$$Register, false);
12828   %}
12829   ins_pipe(ialu_reg_reg);
12830 %}
12831 
12832 // Rotate Right by constant.
12833 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12834 %{
12835   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12836   match(Set dst (RotateRight dst shift));
12837   effect(KILL cr);
12838   format %{ "rorq    $dst, $shift" %}
12839   ins_encode %{
12840     __ rorq($dst$$Register, $shift$$constant);
12841   %}
12842   ins_pipe(ialu_reg);
12843 %}
12844 
12845 // Rotate Right by constant
12846 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12847 %{
12848   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12849   match(Set dst (RotateRight src shift));
12850   format %{ "rorxq   $dst, $src, $shift" %}
12851   ins_encode %{
12852     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12853   %}
12854   ins_pipe(ialu_reg_reg);
12855 %}
12856 
12857 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12858 %{
12859   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12860   match(Set dst (RotateRight (LoadL src) shift));
12861   ins_cost(175);
12862   format %{ "rorxq   $dst, $src, $shift" %}
12863   ins_encode %{
12864     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12865   %}
12866   ins_pipe(ialu_reg_mem);
12867 %}
12868 
12869 // Rotate Right by variable
12870 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12871 %{
12872   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12873   match(Set dst (RotateRight dst shift));
12874   effect(KILL cr);
12875   format %{ "rorq    $dst, $shift" %}
12876   ins_encode %{
12877     __ rorq($dst$$Register);
12878   %}
12879   ins_pipe(ialu_reg_reg);
12880 %}
12881 
12882 // Rotate Right by variable
12883 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12884 %{
12885   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12886   match(Set dst (RotateRight src shift));
12887   effect(KILL cr);
12888   flag(PD::Flag_ndd_demotable_opr1);
12889 
12890   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12891   ins_encode %{
12892     __ erorq($dst$$Register, $src$$Register, false);
12893   %}
12894   ins_pipe(ialu_reg_reg);
12895 %}
12896 
12897 //----------------------------- CompressBits/ExpandBits ------------------------
12898 
12899 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12900   predicate(n->bottom_type()->isa_long());
12901   match(Set dst (CompressBits src mask));
12902   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12903   ins_encode %{
12904     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12905   %}
12906   ins_pipe( pipe_slow );
12907 %}
12908 
12909 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12910   predicate(n->bottom_type()->isa_long());
12911   match(Set dst (ExpandBits src mask));
12912   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12913   ins_encode %{
12914     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12915   %}
12916   ins_pipe( pipe_slow );
12917 %}
12918 
12919 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12920   predicate(n->bottom_type()->isa_long());
12921   match(Set dst (CompressBits src (LoadL mask)));
12922   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12923   ins_encode %{
12924     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12925   %}
12926   ins_pipe( pipe_slow );
12927 %}
12928 
12929 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12930   predicate(n->bottom_type()->isa_long());
12931   match(Set dst (ExpandBits src (LoadL mask)));
12932   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12933   ins_encode %{
12934     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12935   %}
12936   ins_pipe( pipe_slow );
12937 %}
12938 
12939 
12940 // Logical Instructions
12941 
12942 // Integer Logical Instructions
12943 
12944 // And Instructions
12945 // And Register with Register
12946 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12947 %{
12948   predicate(!UseAPX);
12949   match(Set dst (AndI dst src));
12950   effect(KILL cr);
12951   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12952 
12953   format %{ "andl    $dst, $src\t# int" %}
12954   ins_encode %{
12955     __ andl($dst$$Register, $src$$Register);
12956   %}
12957   ins_pipe(ialu_reg_reg);
12958 %}
12959 
12960 // And Register with Register using New Data Destination (NDD)
12961 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12962 %{
12963   predicate(UseAPX);
12964   match(Set dst (AndI src1 src2));
12965   effect(KILL cr);
12966   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12967 
12968   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
12969   ins_encode %{
12970     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12971 
12972   %}
12973   ins_pipe(ialu_reg_reg);
12974 %}
12975 
12976 // And Register with Immediate 255
12977 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12978 %{
12979   match(Set dst (AndI src mask));
12980 
12981   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
12982   ins_encode %{
12983     __ movzbl($dst$$Register, $src$$Register);
12984   %}
12985   ins_pipe(ialu_reg);
12986 %}
12987 
12988 // And Register with Immediate 255 and promote to long
12989 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12990 %{
12991   match(Set dst (ConvI2L (AndI src mask)));
12992 
12993   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
12994   ins_encode %{
12995     __ movzbl($dst$$Register, $src$$Register);
12996   %}
12997   ins_pipe(ialu_reg);
12998 %}
12999 
13000 // And Register with Immediate 65535
13001 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13002 %{
13003   match(Set dst (AndI src mask));
13004 
13005   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
13006   ins_encode %{
13007     __ movzwl($dst$$Register, $src$$Register);
13008   %}
13009   ins_pipe(ialu_reg);
13010 %}
13011 
13012 // And Register with Immediate 65535 and promote to long
13013 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13014 %{
13015   match(Set dst (ConvI2L (AndI src mask)));
13016 
13017   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
13018   ins_encode %{
13019     __ movzwl($dst$$Register, $src$$Register);
13020   %}
13021   ins_pipe(ialu_reg);
13022 %}
13023 
13024 // Can skip int2long conversions after AND with small bitmask
13025 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13026 %{
13027   predicate(VM_Version::supports_bmi2());
13028   ins_cost(125);
13029   effect(TEMP tmp, KILL cr);
13030   match(Set dst (ConvI2L (AndI src mask)));
13031   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
13032   ins_encode %{
13033     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13034     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13035   %}
13036   ins_pipe(ialu_reg_reg);
13037 %}
13038 
13039 // And Register with Immediate
13040 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13041 %{
13042   predicate(!UseAPX);
13043   match(Set dst (AndI dst src));
13044   effect(KILL cr);
13045   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13046 
13047   format %{ "andl    $dst, $src\t# int" %}
13048   ins_encode %{
13049     __ andl($dst$$Register, $src$$constant);
13050   %}
13051   ins_pipe(ialu_reg);
13052 %}
13053 
13054 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13055 %{
13056   predicate(UseAPX);
13057   match(Set dst (AndI src1 src2));
13058   effect(KILL cr);
13059   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13060 
13061   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13062   ins_encode %{
13063     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13064   %}
13065   ins_pipe(ialu_reg);
13066 %}
13067 
13068 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13069 %{
13070   predicate(UseAPX);
13071   match(Set dst (AndI (LoadI src1) src2));
13072   effect(KILL cr);
13073   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13074 
13075   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13076   ins_encode %{
13077     __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13078   %}
13079   ins_pipe(ialu_reg);
13080 %}
13081 
13082 // And Register with Memory
13083 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13084 %{
13085   predicate(!UseAPX);
13086   match(Set dst (AndI dst (LoadI src)));
13087   effect(KILL cr);
13088   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13089 
13090   ins_cost(150);
13091   format %{ "andl    $dst, $src\t# int" %}
13092   ins_encode %{
13093     __ andl($dst$$Register, $src$$Address);
13094   %}
13095   ins_pipe(ialu_reg_mem);
13096 %}
13097 
13098 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13099 %{
13100   predicate(UseAPX);
13101   match(Set dst (AndI src1 (LoadI src2)));
13102   effect(KILL cr);
13103   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13104 
13105   ins_cost(150);
13106   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13107   ins_encode %{
13108     __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13109   %}
13110   ins_pipe(ialu_reg_mem);
13111 %}
13112 
13113 // And Memory with Register
13114 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13115 %{
13116   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13117   effect(KILL cr);
13118   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13119 
13120   ins_cost(150);
13121   format %{ "andb    $dst, $src\t# byte" %}
13122   ins_encode %{
13123     __ andb($dst$$Address, $src$$Register);
13124   %}
13125   ins_pipe(ialu_mem_reg);
13126 %}
13127 
13128 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13129 %{
13130   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13131   effect(KILL cr);
13132   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13133 
13134   ins_cost(150);
13135   format %{ "andl    $dst, $src\t# int" %}
13136   ins_encode %{
13137     __ andl($dst$$Address, $src$$Register);
13138   %}
13139   ins_pipe(ialu_mem_reg);
13140 %}
13141 
13142 // And Memory with Immediate
13143 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13144 %{
13145   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13146   effect(KILL cr);
13147   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13148 
13149   ins_cost(125);
13150   format %{ "andl    $dst, $src\t# int" %}
13151   ins_encode %{
13152     __ andl($dst$$Address, $src$$constant);
13153   %}
13154   ins_pipe(ialu_mem_imm);
13155 %}
13156 
13157 // BMI1 instructions
13158 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13159   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13160   predicate(UseBMI1Instructions);
13161   effect(KILL cr);
13162   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13163 
13164   ins_cost(125);
13165   format %{ "andnl  $dst, $src1, $src2" %}
13166 
13167   ins_encode %{
13168     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13169   %}
13170   ins_pipe(ialu_reg_mem);
13171 %}
13172 
13173 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13174   match(Set dst (AndI (XorI src1 minus_1) src2));
13175   predicate(UseBMI1Instructions);
13176   effect(KILL cr);
13177   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13178 
13179   format %{ "andnl  $dst, $src1, $src2" %}
13180 
13181   ins_encode %{
13182     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13183   %}
13184   ins_pipe(ialu_reg);
13185 %}
13186 
13187 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13188   match(Set dst (AndI (SubI imm_zero src) src));
13189   predicate(UseBMI1Instructions);
13190   effect(KILL cr);
13191   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13192 
13193   format %{ "blsil  $dst, $src" %}
13194 
13195   ins_encode %{
13196     __ blsil($dst$$Register, $src$$Register);
13197   %}
13198   ins_pipe(ialu_reg);
13199 %}
13200 
13201 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13202   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13203   predicate(UseBMI1Instructions);
13204   effect(KILL cr);
13205   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13206 
13207   ins_cost(125);
13208   format %{ "blsil  $dst, $src" %}
13209 
13210   ins_encode %{
13211     __ blsil($dst$$Register, $src$$Address);
13212   %}
13213   ins_pipe(ialu_reg_mem);
13214 %}
13215 
13216 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13217 %{
13218   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13219   predicate(UseBMI1Instructions);
13220   effect(KILL cr);
13221   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13222 
13223   ins_cost(125);
13224   format %{ "blsmskl $dst, $src" %}
13225 
13226   ins_encode %{
13227     __ blsmskl($dst$$Register, $src$$Address);
13228   %}
13229   ins_pipe(ialu_reg_mem);
13230 %}
13231 
13232 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13233 %{
13234   match(Set dst (XorI (AddI src minus_1) src));
13235   predicate(UseBMI1Instructions);
13236   effect(KILL cr);
13237   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13238 
13239   format %{ "blsmskl $dst, $src" %}
13240 
13241   ins_encode %{
13242     __ blsmskl($dst$$Register, $src$$Register);
13243   %}
13244 
13245   ins_pipe(ialu_reg);
13246 %}
13247 
13248 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13249 %{
13250   match(Set dst (AndI (AddI src minus_1) src) );
13251   predicate(UseBMI1Instructions);
13252   effect(KILL cr);
13253   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13254 
13255   format %{ "blsrl  $dst, $src" %}
13256 
13257   ins_encode %{
13258     __ blsrl($dst$$Register, $src$$Register);
13259   %}
13260 
13261   ins_pipe(ialu_reg_mem);
13262 %}
13263 
13264 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13265 %{
13266   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13267   predicate(UseBMI1Instructions);
13268   effect(KILL cr);
13269   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13270 
13271   ins_cost(125);
13272   format %{ "blsrl  $dst, $src" %}
13273 
13274   ins_encode %{
13275     __ blsrl($dst$$Register, $src$$Address);
13276   %}
13277 
13278   ins_pipe(ialu_reg);
13279 %}
13280 
13281 // Or Instructions
13282 // Or Register with Register
13283 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13284 %{
13285   predicate(!UseAPX);
13286   match(Set dst (OrI dst src));
13287   effect(KILL cr);
13288   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13289 
13290   format %{ "orl     $dst, $src\t# int" %}
13291   ins_encode %{
13292     __ orl($dst$$Register, $src$$Register);
13293   %}
13294   ins_pipe(ialu_reg_reg);
13295 %}
13296 
13297 // Or Register with Register using New Data Destination (NDD)
13298 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13299 %{
13300   predicate(UseAPX);
13301   match(Set dst (OrI src1 src2));
13302   effect(KILL cr);
13303   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13304 
13305   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13306   ins_encode %{
13307     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13308   %}
13309   ins_pipe(ialu_reg_reg);
13310 %}
13311 
13312 // Or Register with Immediate
13313 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13314 %{
13315   predicate(!UseAPX);
13316   match(Set dst (OrI dst src));
13317   effect(KILL cr);
13318   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13319 
13320   format %{ "orl     $dst, $src\t# int" %}
13321   ins_encode %{
13322     __ orl($dst$$Register, $src$$constant);
13323   %}
13324   ins_pipe(ialu_reg);
13325 %}
13326 
13327 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13328 %{
13329   predicate(UseAPX);
13330   match(Set dst (OrI src1 src2));
13331   effect(KILL cr);
13332   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13333 
13334   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13335   ins_encode %{
13336     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13337   %}
13338   ins_pipe(ialu_reg);
13339 %}
13340 
13341 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13342 %{
13343   predicate(UseAPX);
13344   match(Set dst (OrI src1 src2));
13345   effect(KILL cr);
13346   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13347 
13348   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
13349   ins_encode %{
13350     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13351   %}
13352   ins_pipe(ialu_reg);
13353 %}
13354 
13355 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13356 %{
13357   predicate(UseAPX);
13358   match(Set dst (OrI (LoadI src1) src2));
13359   effect(KILL cr);
13360   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13361 
13362   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13363   ins_encode %{
13364     __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13365   %}
13366   ins_pipe(ialu_reg);
13367 %}
13368 
13369 // Or Register with Memory
13370 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13371 %{
13372   predicate(!UseAPX);
13373   match(Set dst (OrI dst (LoadI src)));
13374   effect(KILL cr);
13375   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13376 
13377   ins_cost(150);
13378   format %{ "orl     $dst, $src\t# int" %}
13379   ins_encode %{
13380     __ orl($dst$$Register, $src$$Address);
13381   %}
13382   ins_pipe(ialu_reg_mem);
13383 %}
13384 
13385 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13386 %{
13387   predicate(UseAPX);
13388   match(Set dst (OrI src1 (LoadI src2)));
13389   effect(KILL cr);
13390   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13391 
13392   ins_cost(150);
13393   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13394   ins_encode %{
13395     __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13396   %}
13397   ins_pipe(ialu_reg_mem);
13398 %}
13399 
13400 // Or Memory with Register
13401 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13402 %{
13403   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13404   effect(KILL cr);
13405   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13406 
13407   ins_cost(150);
13408   format %{ "orb    $dst, $src\t# byte" %}
13409   ins_encode %{
13410     __ orb($dst$$Address, $src$$Register);
13411   %}
13412   ins_pipe(ialu_mem_reg);
13413 %}
13414 
13415 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13416 %{
13417   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13418   effect(KILL cr);
13419   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13420 
13421   ins_cost(150);
13422   format %{ "orl     $dst, $src\t# int" %}
13423   ins_encode %{
13424     __ orl($dst$$Address, $src$$Register);
13425   %}
13426   ins_pipe(ialu_mem_reg);
13427 %}
13428 
13429 // Or Memory with Immediate
13430 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13431 %{
13432   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13433   effect(KILL cr);
13434   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13435 
13436   ins_cost(125);
13437   format %{ "orl     $dst, $src\t# int" %}
13438   ins_encode %{
13439     __ orl($dst$$Address, $src$$constant);
13440   %}
13441   ins_pipe(ialu_mem_imm);
13442 %}
13443 
13444 // Xor Instructions
13445 // Xor Register with Register
13446 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13447 %{
13448   predicate(!UseAPX);
13449   match(Set dst (XorI dst src));
13450   effect(KILL cr);
13451   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13452 
13453   format %{ "xorl    $dst, $src\t# int" %}
13454   ins_encode %{
13455     __ xorl($dst$$Register, $src$$Register);
13456   %}
13457   ins_pipe(ialu_reg_reg);
13458 %}
13459 
13460 // Xor Register with Register using New Data Destination (NDD)
13461 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13462 %{
13463   predicate(UseAPX);
13464   match(Set dst (XorI src1 src2));
13465   effect(KILL cr);
13466   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13467 
13468   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13469   ins_encode %{
13470     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13471   %}
13472   ins_pipe(ialu_reg_reg);
13473 %}
13474 
13475 // Xor Register with Immediate -1
13476 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13477 %{
13478   predicate(!UseAPX);
13479   match(Set dst (XorI dst imm));
13480 
13481   format %{ "notl    $dst" %}
13482   ins_encode %{
13483      __ notl($dst$$Register);
13484   %}
13485   ins_pipe(ialu_reg);
13486 %}
13487 
13488 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13489 %{
13490   match(Set dst (XorI src imm));
13491   predicate(UseAPX);
13492   flag(PD::Flag_ndd_demotable_opr1);
13493 
13494   format %{ "enotl    $dst, $src" %}
13495   ins_encode %{
13496      __ enotl($dst$$Register, $src$$Register);
13497   %}
13498   ins_pipe(ialu_reg);
13499 %}
13500 
13501 // Xor Register with Immediate
13502 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13503 %{
13504   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13505   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13506   match(Set dst (XorI dst src));
13507   effect(KILL cr);
13508   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13509 
13510   format %{ "xorl    $dst, $src\t# int" %}
13511   ins_encode %{
13512     __ xorl($dst$$Register, $src$$constant);
13513   %}
13514   ins_pipe(ialu_reg);
13515 %}
13516 
13517 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13518 %{
13519   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13520   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13521   match(Set dst (XorI src1 src2));
13522   effect(KILL cr);
13523   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13524 
13525   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13526   ins_encode %{
13527     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13528   %}
13529   ins_pipe(ialu_reg);
13530 %}
13531 
13532 // Xor Memory with Immediate
13533 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13534 %{
13535   predicate(UseAPX);
13536   match(Set dst (XorI (LoadI src1) src2));
13537   effect(KILL cr);
13538   ins_cost(150);
13539   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13540 
13541   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13542   ins_encode %{
13543     __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13544   %}
13545   ins_pipe(ialu_reg);
13546 %}
13547 
13548 // Xor Register with Memory
13549 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13550 %{
13551   predicate(!UseAPX);
13552   match(Set dst (XorI dst (LoadI src)));
13553   effect(KILL cr);
13554   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13555 
13556   ins_cost(150);
13557   format %{ "xorl    $dst, $src\t# int" %}
13558   ins_encode %{
13559     __ xorl($dst$$Register, $src$$Address);
13560   %}
13561   ins_pipe(ialu_reg_mem);
13562 %}
13563 
13564 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13565 %{
13566   predicate(UseAPX);
13567   match(Set dst (XorI src1 (LoadI src2)));
13568   effect(KILL cr);
13569   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13570 
13571   ins_cost(150);
13572   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13573   ins_encode %{
13574     __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13575   %}
13576   ins_pipe(ialu_reg_mem);
13577 %}
13578 
13579 // Xor Memory with Register
13580 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13581 %{
13582   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13583   effect(KILL cr);
13584   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13585 
13586   ins_cost(150);
13587   format %{ "xorb    $dst, $src\t# byte" %}
13588   ins_encode %{
13589     __ xorb($dst$$Address, $src$$Register);
13590   %}
13591   ins_pipe(ialu_mem_reg);
13592 %}
13593 
13594 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13595 %{
13596   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13597   effect(KILL cr);
13598   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13599 
13600   ins_cost(150);
13601   format %{ "xorl    $dst, $src\t# int" %}
13602   ins_encode %{
13603     __ xorl($dst$$Address, $src$$Register);
13604   %}
13605   ins_pipe(ialu_mem_reg);
13606 %}
13607 
13608 // Xor Memory with Immediate
13609 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13610 %{
13611   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13612   effect(KILL cr);
13613   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13614 
13615   ins_cost(125);
13616   format %{ "xorl    $dst, $src\t# int" %}
13617   ins_encode %{
13618     __ xorl($dst$$Address, $src$$constant);
13619   %}
13620   ins_pipe(ialu_mem_imm);
13621 %}
13622 
13623 
13624 // Long Logical Instructions
13625 
13626 // And Instructions
13627 // And Register with Register
13628 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13629 %{
13630   predicate(!UseAPX);
13631   match(Set dst (AndL dst src));
13632   effect(KILL cr);
13633   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13634 
13635   format %{ "andq    $dst, $src\t# long" %}
13636   ins_encode %{
13637     __ andq($dst$$Register, $src$$Register);
13638   %}
13639   ins_pipe(ialu_reg_reg);
13640 %}
13641 
13642 // And Register with Register using New Data Destination (NDD)
13643 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13644 %{
13645   predicate(UseAPX);
13646   match(Set dst (AndL src1 src2));
13647   effect(KILL cr);
13648   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13649 
13650   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13651   ins_encode %{
13652     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13653 
13654   %}
13655   ins_pipe(ialu_reg_reg);
13656 %}
13657 
13658 // And Register with Immediate 255
13659 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13660 %{
13661   match(Set dst (AndL src mask));
13662 
13663   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13664   ins_encode %{
13665     // movzbl zeroes out the upper 32-bit and does not need REX.W
13666     __ movzbl($dst$$Register, $src$$Register);
13667   %}
13668   ins_pipe(ialu_reg);
13669 %}
13670 
13671 // And Register with Immediate 65535
13672 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13673 %{
13674   match(Set dst (AndL src mask));
13675 
13676   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13677   ins_encode %{
13678     // movzwl zeroes out the upper 32-bit and does not need REX.W
13679     __ movzwl($dst$$Register, $src$$Register);
13680   %}
13681   ins_pipe(ialu_reg);
13682 %}
13683 
13684 // And Register with Immediate
13685 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13686 %{
13687   predicate(!UseAPX);
13688   match(Set dst (AndL dst src));
13689   effect(KILL cr);
13690   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13691 
13692   format %{ "andq    $dst, $src\t# long" %}
13693   ins_encode %{
13694     __ andq($dst$$Register, $src$$constant);
13695   %}
13696   ins_pipe(ialu_reg);
13697 %}
13698 
13699 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13700 %{
13701   predicate(UseAPX);
13702   match(Set dst (AndL src1 src2));
13703   effect(KILL cr);
13704   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13705 
13706   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13707   ins_encode %{
13708     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13709   %}
13710   ins_pipe(ialu_reg);
13711 %}
13712 
13713 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13714 %{
13715   predicate(UseAPX);
13716   match(Set dst (AndL (LoadL src1) src2));
13717   effect(KILL cr);
13718   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13719 
13720   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13721   ins_encode %{
13722     __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13723   %}
13724   ins_pipe(ialu_reg);
13725 %}
13726 
13727 // And Register with Memory
13728 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13729 %{
13730   predicate(!UseAPX);
13731   match(Set dst (AndL dst (LoadL src)));
13732   effect(KILL cr);
13733   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13734 
13735   ins_cost(150);
13736   format %{ "andq    $dst, $src\t# long" %}
13737   ins_encode %{
13738     __ andq($dst$$Register, $src$$Address);
13739   %}
13740   ins_pipe(ialu_reg_mem);
13741 %}
13742 
13743 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13744 %{
13745   predicate(UseAPX);
13746   match(Set dst (AndL src1 (LoadL src2)));
13747   effect(KILL cr);
13748   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13749 
13750   ins_cost(150);
13751   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13752   ins_encode %{
13753     __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13754   %}
13755   ins_pipe(ialu_reg_mem);
13756 %}
13757 
13758 // And Memory with Register
13759 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13760 %{
13761   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13762   effect(KILL cr);
13763   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13764 
13765   ins_cost(150);
13766   format %{ "andq    $dst, $src\t# long" %}
13767   ins_encode %{
13768     __ andq($dst$$Address, $src$$Register);
13769   %}
13770   ins_pipe(ialu_mem_reg);
13771 %}
13772 
13773 // And Memory with Immediate
13774 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13775 %{
13776   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13777   effect(KILL cr);
13778   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13779 
13780   ins_cost(125);
13781   format %{ "andq    $dst, $src\t# long" %}
13782   ins_encode %{
13783     __ andq($dst$$Address, $src$$constant);
13784   %}
13785   ins_pipe(ialu_mem_imm);
13786 %}
13787 
13788 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13789 %{
13790   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13791   // because AND/OR works well enough for 8/32-bit values.
13792   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13793 
13794   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13795   effect(KILL cr);
13796 
13797   ins_cost(125);
13798   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13799   ins_encode %{
13800     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13801   %}
13802   ins_pipe(ialu_mem_imm);
13803 %}
13804 
13805 // BMI1 instructions
13806 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13807   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13808   predicate(UseBMI1Instructions);
13809   effect(KILL cr);
13810   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13811 
13812   ins_cost(125);
13813   format %{ "andnq  $dst, $src1, $src2" %}
13814 
13815   ins_encode %{
13816     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13817   %}
13818   ins_pipe(ialu_reg_mem);
13819 %}
13820 
13821 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13822   match(Set dst (AndL (XorL src1 minus_1) src2));
13823   predicate(UseBMI1Instructions);
13824   effect(KILL cr);
13825   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13826 
13827   format %{ "andnq  $dst, $src1, $src2" %}
13828 
13829   ins_encode %{
13830   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13831   %}
13832   ins_pipe(ialu_reg_mem);
13833 %}
13834 
13835 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13836   match(Set dst (AndL (SubL imm_zero src) src));
13837   predicate(UseBMI1Instructions);
13838   effect(KILL cr);
13839   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13840 
13841   format %{ "blsiq  $dst, $src" %}
13842 
13843   ins_encode %{
13844     __ blsiq($dst$$Register, $src$$Register);
13845   %}
13846   ins_pipe(ialu_reg);
13847 %}
13848 
13849 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13850   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13851   predicate(UseBMI1Instructions);
13852   effect(KILL cr);
13853   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13854 
13855   ins_cost(125);
13856   format %{ "blsiq  $dst, $src" %}
13857 
13858   ins_encode %{
13859     __ blsiq($dst$$Register, $src$$Address);
13860   %}
13861   ins_pipe(ialu_reg_mem);
13862 %}
13863 
13864 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13865 %{
13866   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13867   predicate(UseBMI1Instructions);
13868   effect(KILL cr);
13869   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13870 
13871   ins_cost(125);
13872   format %{ "blsmskq $dst, $src" %}
13873 
13874   ins_encode %{
13875     __ blsmskq($dst$$Register, $src$$Address);
13876   %}
13877   ins_pipe(ialu_reg_mem);
13878 %}
13879 
13880 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13881 %{
13882   match(Set dst (XorL (AddL src minus_1) src));
13883   predicate(UseBMI1Instructions);
13884   effect(KILL cr);
13885   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13886 
13887   format %{ "blsmskq $dst, $src" %}
13888 
13889   ins_encode %{
13890     __ blsmskq($dst$$Register, $src$$Register);
13891   %}
13892 
13893   ins_pipe(ialu_reg);
13894 %}
13895 
13896 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13897 %{
13898   match(Set dst (AndL (AddL src minus_1) src) );
13899   predicate(UseBMI1Instructions);
13900   effect(KILL cr);
13901   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13902 
13903   format %{ "blsrq  $dst, $src" %}
13904 
13905   ins_encode %{
13906     __ blsrq($dst$$Register, $src$$Register);
13907   %}
13908 
13909   ins_pipe(ialu_reg);
13910 %}
13911 
13912 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13913 %{
13914   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13915   predicate(UseBMI1Instructions);
13916   effect(KILL cr);
13917   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13918 
13919   ins_cost(125);
13920   format %{ "blsrq  $dst, $src" %}
13921 
13922   ins_encode %{
13923     __ blsrq($dst$$Register, $src$$Address);
13924   %}
13925 
13926   ins_pipe(ialu_reg);
13927 %}
13928 
13929 // Or Instructions
13930 // Or Register with Register
13931 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13932 %{
13933   predicate(!UseAPX);
13934   match(Set dst (OrL dst src));
13935   effect(KILL cr);
13936   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13937 
13938   format %{ "orq     $dst, $src\t# long" %}
13939   ins_encode %{
13940     __ orq($dst$$Register, $src$$Register);
13941   %}
13942   ins_pipe(ialu_reg_reg);
13943 %}
13944 
13945 // Or Register with Register using New Data Destination (NDD)
13946 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13947 %{
13948   predicate(UseAPX);
13949   match(Set dst (OrL src1 src2));
13950   effect(KILL cr);
13951   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13952 
13953   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13954   ins_encode %{
13955     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13956 
13957   %}
13958   ins_pipe(ialu_reg_reg);
13959 %}
13960 
13961 // Use any_RegP to match R15 (TLS register) without spilling.
13962 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13963   match(Set dst (OrL dst (CastP2X src)));
13964   effect(KILL cr);
13965   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13966 
13967   format %{ "orq     $dst, $src\t# long" %}
13968   ins_encode %{
13969     __ orq($dst$$Register, $src$$Register);
13970   %}
13971   ins_pipe(ialu_reg_reg);
13972 %}
13973 
13974 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13975   match(Set dst (OrL src1 (CastP2X src2)));
13976   effect(KILL cr);
13977   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13978 
13979   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13980   ins_encode %{
13981     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13982   %}
13983   ins_pipe(ialu_reg_reg);
13984 %}
13985 
13986 // Or Register with Immediate
13987 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13988 %{
13989   predicate(!UseAPX);
13990   match(Set dst (OrL dst src));
13991   effect(KILL cr);
13992   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13993 
13994   format %{ "orq     $dst, $src\t# long" %}
13995   ins_encode %{
13996     __ orq($dst$$Register, $src$$constant);
13997   %}
13998   ins_pipe(ialu_reg);
13999 %}
14000 
14001 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14002 %{
14003   predicate(UseAPX);
14004   match(Set dst (OrL src1 src2));
14005   effect(KILL cr);
14006   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14007 
14008   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14009   ins_encode %{
14010     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14011   %}
14012   ins_pipe(ialu_reg);
14013 %}
14014 
14015 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14016 %{
14017   predicate(UseAPX);
14018   match(Set dst (OrL src1 src2));
14019   effect(KILL cr);
14020   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14021 
14022   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
14023   ins_encode %{
14024     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14025   %}
14026   ins_pipe(ialu_reg);
14027 %}
14028 
14029 // Or Memory with Immediate
14030 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14031 %{
14032   predicate(UseAPX);
14033   match(Set dst (OrL (LoadL src1) src2));
14034   effect(KILL cr);
14035   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14036 
14037   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14038   ins_encode %{
14039     __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14040   %}
14041   ins_pipe(ialu_reg);
14042 %}
14043 
14044 // Or Register with Memory
14045 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14046 %{
14047   predicate(!UseAPX);
14048   match(Set dst (OrL dst (LoadL src)));
14049   effect(KILL cr);
14050   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14051 
14052   ins_cost(150);
14053   format %{ "orq     $dst, $src\t# long" %}
14054   ins_encode %{
14055     __ orq($dst$$Register, $src$$Address);
14056   %}
14057   ins_pipe(ialu_reg_mem);
14058 %}
14059 
14060 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14061 %{
14062   predicate(UseAPX);
14063   match(Set dst (OrL src1 (LoadL src2)));
14064   effect(KILL cr);
14065   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14066 
14067   ins_cost(150);
14068   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14069   ins_encode %{
14070     __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14071   %}
14072   ins_pipe(ialu_reg_mem);
14073 %}
14074 
14075 // Or Memory with Register
14076 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14077 %{
14078   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14079   effect(KILL cr);
14080   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14081 
14082   ins_cost(150);
14083   format %{ "orq     $dst, $src\t# long" %}
14084   ins_encode %{
14085     __ orq($dst$$Address, $src$$Register);
14086   %}
14087   ins_pipe(ialu_mem_reg);
14088 %}
14089 
14090 // Or Memory with Immediate
14091 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14092 %{
14093   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14094   effect(KILL cr);
14095   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14096 
14097   ins_cost(125);
14098   format %{ "orq     $dst, $src\t# long" %}
14099   ins_encode %{
14100     __ orq($dst$$Address, $src$$constant);
14101   %}
14102   ins_pipe(ialu_mem_imm);
14103 %}
14104 
14105 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14106 %{
14107   // con should be a pure 64-bit power of 2 immediate
14108   // because AND/OR works well enough for 8/32-bit values.
14109   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14110 
14111   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14112   effect(KILL cr);
14113 
14114   ins_cost(125);
14115   format %{ "btsq    $dst, log2($con)\t# long" %}
14116   ins_encode %{
14117     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14118   %}
14119   ins_pipe(ialu_mem_imm);
14120 %}
14121 
14122 // Xor Instructions
14123 // Xor Register with Register
14124 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14125 %{
14126   predicate(!UseAPX);
14127   match(Set dst (XorL dst src));
14128   effect(KILL cr);
14129   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14130 
14131   format %{ "xorq    $dst, $src\t# long" %}
14132   ins_encode %{
14133     __ xorq($dst$$Register, $src$$Register);
14134   %}
14135   ins_pipe(ialu_reg_reg);
14136 %}
14137 
14138 // Xor Register with Register using New Data Destination (NDD)
14139 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14140 %{
14141   predicate(UseAPX);
14142   match(Set dst (XorL src1 src2));
14143   effect(KILL cr);
14144   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14145 
14146   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14147   ins_encode %{
14148     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14149   %}
14150   ins_pipe(ialu_reg_reg);
14151 %}
14152 
14153 // Xor Register with Immediate -1
14154 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14155 %{
14156   predicate(!UseAPX);
14157   match(Set dst (XorL dst imm));
14158 
14159   format %{ "notq   $dst" %}
14160   ins_encode %{
14161      __ notq($dst$$Register);
14162   %}
14163   ins_pipe(ialu_reg);
14164 %}
14165 
14166 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14167 %{
14168   predicate(UseAPX);
14169   match(Set dst (XorL src imm));
14170   flag(PD::Flag_ndd_demotable_opr1);
14171 
14172   format %{ "enotq   $dst, $src" %}
14173   ins_encode %{
14174     __ enotq($dst$$Register, $src$$Register);
14175   %}
14176   ins_pipe(ialu_reg);
14177 %}
14178 
14179 // Xor Register with Immediate
14180 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14181 %{
14182   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14183   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14184   match(Set dst (XorL dst src));
14185   effect(KILL cr);
14186   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14187 
14188   format %{ "xorq    $dst, $src\t# long" %}
14189   ins_encode %{
14190     __ xorq($dst$$Register, $src$$constant);
14191   %}
14192   ins_pipe(ialu_reg);
14193 %}
14194 
14195 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14196 %{
14197   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14198   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14199   match(Set dst (XorL src1 src2));
14200   effect(KILL cr);
14201   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14202 
14203   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14204   ins_encode %{
14205     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14206   %}
14207   ins_pipe(ialu_reg);
14208 %}
14209 
14210 // Xor Memory with Immediate
14211 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14212 %{
14213   predicate(UseAPX);
14214   match(Set dst (XorL (LoadL src1) src2));
14215   effect(KILL cr);
14216   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14217   ins_cost(150);
14218 
14219   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14220   ins_encode %{
14221     __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14222   %}
14223   ins_pipe(ialu_reg);
14224 %}
14225 
14226 // Xor Register with Memory
14227 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14228 %{
14229   predicate(!UseAPX);
14230   match(Set dst (XorL dst (LoadL src)));
14231   effect(KILL cr);
14232   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14233 
14234   ins_cost(150);
14235   format %{ "xorq    $dst, $src\t# long" %}
14236   ins_encode %{
14237     __ xorq($dst$$Register, $src$$Address);
14238   %}
14239   ins_pipe(ialu_reg_mem);
14240 %}
14241 
14242 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14243 %{
14244   predicate(UseAPX);
14245   match(Set dst (XorL src1 (LoadL src2)));
14246   effect(KILL cr);
14247   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14248 
14249   ins_cost(150);
14250   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14251   ins_encode %{
14252     __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14253   %}
14254   ins_pipe(ialu_reg_mem);
14255 %}
14256 
14257 // Xor Memory with Register
14258 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14259 %{
14260   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14261   effect(KILL cr);
14262   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14263 
14264   ins_cost(150);
14265   format %{ "xorq    $dst, $src\t# long" %}
14266   ins_encode %{
14267     __ xorq($dst$$Address, $src$$Register);
14268   %}
14269   ins_pipe(ialu_mem_reg);
14270 %}
14271 
14272 // Xor Memory with Immediate
14273 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14274 %{
14275   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14276   effect(KILL cr);
14277   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14278 
14279   ins_cost(125);
14280   format %{ "xorq    $dst, $src\t# long" %}
14281   ins_encode %{
14282     __ xorq($dst$$Address, $src$$constant);
14283   %}
14284   ins_pipe(ialu_mem_imm);
14285 %}
14286 
14287 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14288 %{
14289   match(Set dst (CmpLTMask p q));
14290   effect(KILL cr);
14291 
14292   ins_cost(400);
14293   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
14294             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
14295             "negl    $dst" %}
14296   ins_encode %{
14297     __ cmpl($p$$Register, $q$$Register);
14298     __ setcc(Assembler::less, $dst$$Register);
14299     __ negl($dst$$Register);
14300   %}
14301   ins_pipe(pipe_slow);
14302 %}
14303 
14304 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14305 %{
14306   match(Set dst (CmpLTMask dst zero));
14307   effect(KILL cr);
14308 
14309   ins_cost(100);
14310   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
14311   ins_encode %{
14312     __ sarl($dst$$Register, 31);
14313   %}
14314   ins_pipe(ialu_reg);
14315 %}
14316 
14317 /* Better to save a register than avoid a branch */
14318 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14319 %{
14320   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14321   effect(KILL cr);
14322   ins_cost(300);
14323   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
14324             "jge     done\n\t"
14325             "addl    $p,$y\n"
14326             "done:   " %}
14327   ins_encode %{
14328     Register Rp = $p$$Register;
14329     Register Rq = $q$$Register;
14330     Register Ry = $y$$Register;
14331     Label done;
14332     __ subl(Rp, Rq);
14333     __ jccb(Assembler::greaterEqual, done);
14334     __ addl(Rp, Ry);
14335     __ bind(done);
14336   %}
14337   ins_pipe(pipe_cmplt);
14338 %}
14339 
14340 /* Better to save a register than avoid a branch */
14341 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14342 %{
14343   match(Set y (AndI (CmpLTMask p q) y));
14344   effect(KILL cr);
14345 
14346   ins_cost(300);
14347 
14348   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
14349             "jlt     done\n\t"
14350             "xorl    $y, $y\n"
14351             "done:   " %}
14352   ins_encode %{
14353     Register Rp = $p$$Register;
14354     Register Rq = $q$$Register;
14355     Register Ry = $y$$Register;
14356     Label done;
14357     __ cmpl(Rp, Rq);
14358     __ jccb(Assembler::less, done);
14359     __ xorl(Ry, Ry);
14360     __ bind(done);
14361   %}
14362   ins_pipe(pipe_cmplt);
14363 %}
14364 
14365 
14366 //---------- FP Instructions------------------------------------------------
14367 
14368 // Really expensive, avoid
14369 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14370 %{
14371   match(Set cr (CmpF src1 src2));
14372 
14373   ins_cost(500);
14374   format %{ "ucomiss $src1, $src2\n\t"
14375             "jnp,s   exit\n\t"
14376             "pushfq\t# saw NaN, set CF\n\t"
14377             "andq    [rsp], #0xffffff2b\n\t"
14378             "popfq\n"
14379     "exit:" %}
14380   ins_encode %{
14381     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14382     emit_cmpfp_fixup(masm);
14383   %}
14384   ins_pipe(pipe_slow);
14385 %}
14386 
14387 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14388   match(Set cr (CmpF src1 src2));
14389 
14390   ins_cost(100);
14391   format %{ "ucomiss $src1, $src2" %}
14392   ins_encode %{
14393     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14394   %}
14395   ins_pipe(pipe_slow);
14396 %}
14397 
14398 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14399   match(Set cr (CmpF src1 src2));
14400 
14401   ins_cost(100);
14402   format %{ "vucomxss $src1, $src2" %}
14403   ins_encode %{
14404     __ vucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14405   %}
14406   ins_pipe(pipe_slow);
14407 %}
14408 
14409 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14410   match(Set cr (CmpF src1 (LoadF src2)));
14411 
14412   ins_cost(100);
14413   format %{ "ucomiss $src1, $src2" %}
14414   ins_encode %{
14415     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14416   %}
14417   ins_pipe(pipe_slow);
14418 %}
14419 
14420 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14421   match(Set cr (CmpF src1 (LoadF src2)));
14422 
14423   ins_cost(100);
14424   format %{ "vucomxss $src1, $src2" %}
14425   ins_encode %{
14426     __ vucomxss($src1$$XMMRegister, $src2$$Address);
14427   %}
14428   ins_pipe(pipe_slow);
14429 %}
14430 
14431 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14432   match(Set cr (CmpF src con));
14433 
14434   ins_cost(100);
14435   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14436   ins_encode %{
14437     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14438   %}
14439   ins_pipe(pipe_slow);
14440 %}
14441 
14442 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14443   match(Set cr (CmpF src con));
14444 
14445   ins_cost(100);
14446   format %{ "vucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14447   ins_encode %{
14448     __ vucomxss($src$$XMMRegister, $constantaddress($con));
14449   %}
14450   ins_pipe(pipe_slow);
14451 %}
14452 
14453 // Really expensive, avoid
14454 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14455 %{
14456   match(Set cr (CmpD src1 src2));
14457 
14458   ins_cost(500);
14459   format %{ "ucomisd $src1, $src2\n\t"
14460             "jnp,s   exit\n\t"
14461             "pushfq\t# saw NaN, set CF\n\t"
14462             "andq    [rsp], #0xffffff2b\n\t"
14463             "popfq\n"
14464     "exit:" %}
14465   ins_encode %{
14466     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14467     emit_cmpfp_fixup(masm);
14468   %}
14469   ins_pipe(pipe_slow);
14470 %}
14471 
14472 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14473   match(Set cr (CmpD src1 src2));
14474 
14475   ins_cost(100);
14476   format %{ "ucomisd $src1, $src2 test" %}
14477   ins_encode %{
14478     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14479   %}
14480   ins_pipe(pipe_slow);
14481 %}
14482 
14483 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14484   match(Set cr (CmpD src1 src2));
14485 
14486   ins_cost(100);
14487   format %{ "vucomxsd $src1, $src2 test" %}
14488   ins_encode %{
14489     __ vucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14490   %}
14491   ins_pipe(pipe_slow);
14492 %}
14493 
14494 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14495   match(Set cr (CmpD src1 (LoadD src2)));
14496 
14497   ins_cost(100);
14498   format %{ "ucomisd $src1, $src2" %}
14499   ins_encode %{
14500     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14501   %}
14502   ins_pipe(pipe_slow);
14503 %}
14504 
14505 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14506   match(Set cr (CmpD src1 (LoadD src2)));
14507 
14508   ins_cost(100);
14509   format %{ "vucomxsd $src1, $src2" %}
14510   ins_encode %{
14511     __ vucomxsd($src1$$XMMRegister, $src2$$Address);
14512   %}
14513   ins_pipe(pipe_slow);
14514 %}
14515 
14516 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14517   match(Set cr (CmpD src con));
14518   ins_cost(100);
14519   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14520   ins_encode %{
14521     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14522   %}
14523   ins_pipe(pipe_slow);
14524 %}
14525 
14526 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14527   match(Set cr (CmpD src con));
14528 
14529   ins_cost(100);
14530   format %{ "vucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14531   ins_encode %{
14532     __ vucomxsd($src$$XMMRegister, $constantaddress($con));
14533   %}
14534   ins_pipe(pipe_slow);
14535 %}
14536 
14537 // Compare into -1,0,1
14538 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14539 %{
14540   match(Set dst (CmpF3 src1 src2));
14541   effect(KILL cr);
14542 
14543   ins_cost(275);
14544   format %{ "ucomiss $src1, $src2\n\t"
14545             "movl    $dst, #-1\n\t"
14546             "jp,s    done\n\t"
14547             "jb,s    done\n\t"
14548             "setne   $dst\n\t"
14549             "movzbl  $dst, $dst\n"
14550     "done:" %}
14551   ins_encode %{
14552     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14553     emit_cmpfp3(masm, $dst$$Register);
14554   %}
14555   ins_pipe(pipe_slow);
14556 %}
14557 
14558 // Compare into -1,0,1
14559 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14560 %{
14561   match(Set dst (CmpF3 src1 (LoadF src2)));
14562   effect(KILL cr);
14563 
14564   ins_cost(275);
14565   format %{ "ucomiss $src1, $src2\n\t"
14566             "movl    $dst, #-1\n\t"
14567             "jp,s    done\n\t"
14568             "jb,s    done\n\t"
14569             "setne   $dst\n\t"
14570             "movzbl  $dst, $dst\n"
14571     "done:" %}
14572   ins_encode %{
14573     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14574     emit_cmpfp3(masm, $dst$$Register);
14575   %}
14576   ins_pipe(pipe_slow);
14577 %}
14578 
14579 // Compare into -1,0,1
14580 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14581   match(Set dst (CmpF3 src con));
14582   effect(KILL cr);
14583 
14584   ins_cost(275);
14585   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14586             "movl    $dst, #-1\n\t"
14587             "jp,s    done\n\t"
14588             "jb,s    done\n\t"
14589             "setne   $dst\n\t"
14590             "movzbl  $dst, $dst\n"
14591     "done:" %}
14592   ins_encode %{
14593     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14594     emit_cmpfp3(masm, $dst$$Register);
14595   %}
14596   ins_pipe(pipe_slow);
14597 %}
14598 
14599 // Compare into -1,0,1
14600 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14601 %{
14602   match(Set dst (CmpD3 src1 src2));
14603   effect(KILL cr);
14604 
14605   ins_cost(275);
14606   format %{ "ucomisd $src1, $src2\n\t"
14607             "movl    $dst, #-1\n\t"
14608             "jp,s    done\n\t"
14609             "jb,s    done\n\t"
14610             "setne   $dst\n\t"
14611             "movzbl  $dst, $dst\n"
14612     "done:" %}
14613   ins_encode %{
14614     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14615     emit_cmpfp3(masm, $dst$$Register);
14616   %}
14617   ins_pipe(pipe_slow);
14618 %}
14619 
14620 // Compare into -1,0,1
14621 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14622 %{
14623   match(Set dst (CmpD3 src1 (LoadD src2)));
14624   effect(KILL cr);
14625 
14626   ins_cost(275);
14627   format %{ "ucomisd $src1, $src2\n\t"
14628             "movl    $dst, #-1\n\t"
14629             "jp,s    done\n\t"
14630             "jb,s    done\n\t"
14631             "setne   $dst\n\t"
14632             "movzbl  $dst, $dst\n"
14633     "done:" %}
14634   ins_encode %{
14635     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14636     emit_cmpfp3(masm, $dst$$Register);
14637   %}
14638   ins_pipe(pipe_slow);
14639 %}
14640 
14641 // Compare into -1,0,1
14642 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14643   match(Set dst (CmpD3 src con));
14644   effect(KILL cr);
14645 
14646   ins_cost(275);
14647   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14648             "movl    $dst, #-1\n\t"
14649             "jp,s    done\n\t"
14650             "jb,s    done\n\t"
14651             "setne   $dst\n\t"
14652             "movzbl  $dst, $dst\n"
14653     "done:" %}
14654   ins_encode %{
14655     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14656     emit_cmpfp3(masm, $dst$$Register);
14657   %}
14658   ins_pipe(pipe_slow);
14659 %}
14660 
14661 //----------Arithmetic Conversion Instructions---------------------------------
14662 
14663 instruct convF2D_reg_reg(regD dst, regF src)
14664 %{
14665   match(Set dst (ConvF2D src));
14666 
14667   format %{ "cvtss2sd $dst, $src" %}
14668   ins_encode %{
14669     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14670   %}
14671   ins_pipe(pipe_slow); // XXX
14672 %}
14673 
14674 instruct convF2D_reg_mem(regD dst, memory src)
14675 %{
14676   predicate(UseAVX == 0);
14677   match(Set dst (ConvF2D (LoadF src)));
14678 
14679   format %{ "cvtss2sd $dst, $src" %}
14680   ins_encode %{
14681     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14682   %}
14683   ins_pipe(pipe_slow); // XXX
14684 %}
14685 
14686 instruct convD2F_reg_reg(regF dst, regD src)
14687 %{
14688   match(Set dst (ConvD2F src));
14689 
14690   format %{ "cvtsd2ss $dst, $src" %}
14691   ins_encode %{
14692     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14693   %}
14694   ins_pipe(pipe_slow); // XXX
14695 %}
14696 
14697 instruct convD2F_reg_mem(regF dst, memory src)
14698 %{
14699   predicate(UseAVX == 0);
14700   match(Set dst (ConvD2F (LoadD src)));
14701 
14702   format %{ "cvtsd2ss $dst, $src" %}
14703   ins_encode %{
14704     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14705   %}
14706   ins_pipe(pipe_slow); // XXX
14707 %}
14708 
14709 // XXX do mem variants
14710 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14711 %{
14712   predicate(!VM_Version::supports_avx10_2());
14713   match(Set dst (ConvF2I src));
14714   effect(KILL cr);
14715   format %{ "convert_f2i $dst, $src" %}
14716   ins_encode %{
14717     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14718   %}
14719   ins_pipe(pipe_slow);
14720 %}
14721 
14722 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14723 %{
14724   predicate(VM_Version::supports_avx10_2());
14725   match(Set dst (ConvF2I src));
14726   format %{ "evcvttss2sisl $dst, $src" %}
14727   ins_encode %{
14728     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14729   %}
14730   ins_pipe(pipe_slow);
14731 %}
14732 
14733 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14734 %{
14735   predicate(VM_Version::supports_avx10_2());
14736   match(Set dst (ConvF2I (LoadF src)));
14737   format %{ "evcvttss2sisl $dst, $src" %}
14738   ins_encode %{
14739     __ evcvttss2sisl($dst$$Register, $src$$Address);
14740   %}
14741   ins_pipe(pipe_slow);
14742 %}
14743 
14744 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14745 %{
14746   predicate(!VM_Version::supports_avx10_2());
14747   match(Set dst (ConvF2L src));
14748   effect(KILL cr);
14749   format %{ "convert_f2l $dst, $src"%}
14750   ins_encode %{
14751     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14752   %}
14753   ins_pipe(pipe_slow);
14754 %}
14755 
14756 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14757 %{
14758   predicate(VM_Version::supports_avx10_2());
14759   match(Set dst (ConvF2L src));
14760   format %{ "evcvttss2sisq $dst, $src" %}
14761   ins_encode %{
14762     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14763   %}
14764   ins_pipe(pipe_slow);
14765 %}
14766 
14767 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14768 %{
14769   predicate(VM_Version::supports_avx10_2());
14770   match(Set dst (ConvF2L (LoadF src)));
14771   format %{ "evcvttss2sisq $dst, $src" %}
14772   ins_encode %{
14773     __ evcvttss2sisq($dst$$Register, $src$$Address);
14774   %}
14775   ins_pipe(pipe_slow);
14776 %}
14777 
14778 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14779 %{
14780   predicate(!VM_Version::supports_avx10_2());
14781   match(Set dst (ConvD2I src));
14782   effect(KILL cr);
14783   format %{ "convert_d2i $dst, $src"%}
14784   ins_encode %{
14785     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14786   %}
14787   ins_pipe(pipe_slow);
14788 %}
14789 
14790 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14791 %{
14792   predicate(VM_Version::supports_avx10_2());
14793   match(Set dst (ConvD2I src));
14794   format %{ "evcvttsd2sisl $dst, $src" %}
14795   ins_encode %{
14796     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14797   %}
14798   ins_pipe(pipe_slow);
14799 %}
14800 
14801 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14802 %{
14803   predicate(VM_Version::supports_avx10_2());
14804   match(Set dst (ConvD2I (LoadD src)));
14805   format %{ "evcvttsd2sisl $dst, $src" %}
14806   ins_encode %{
14807     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14808   %}
14809   ins_pipe(pipe_slow);
14810 %}
14811 
14812 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14813 %{
14814   predicate(!VM_Version::supports_avx10_2());
14815   match(Set dst (ConvD2L src));
14816   effect(KILL cr);
14817   format %{ "convert_d2l $dst, $src"%}
14818   ins_encode %{
14819     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14820   %}
14821   ins_pipe(pipe_slow);
14822 %}
14823 
14824 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14825 %{
14826   predicate(VM_Version::supports_avx10_2());
14827   match(Set dst (ConvD2L src));
14828   format %{ "evcvttsd2sisq $dst, $src" %}
14829   ins_encode %{
14830     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14831   %}
14832   ins_pipe(pipe_slow);
14833 %}
14834 
14835 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14836 %{
14837   predicate(VM_Version::supports_avx10_2());
14838   match(Set dst (ConvD2L (LoadD src)));
14839   format %{ "evcvttsd2sisq $dst, $src" %}
14840   ins_encode %{
14841     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14842   %}
14843   ins_pipe(pipe_slow);
14844 %}
14845 
14846 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14847 %{
14848   match(Set dst (RoundD src));
14849   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14850   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14851   ins_encode %{
14852     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14853   %}
14854   ins_pipe(pipe_slow);
14855 %}
14856 
14857 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14858 %{
14859   match(Set dst (RoundF src));
14860   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14861   format %{ "round_float $dst,$src" %}
14862   ins_encode %{
14863     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14864   %}
14865   ins_pipe(pipe_slow);
14866 %}
14867 
14868 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14869 %{
14870   predicate(!UseXmmI2F);
14871   match(Set dst (ConvI2F src));
14872 
14873   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14874   ins_encode %{
14875     if (UseAVX > 0) {
14876       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14877     }
14878     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14879   %}
14880   ins_pipe(pipe_slow); // XXX
14881 %}
14882 
14883 instruct convI2F_reg_mem(regF dst, memory src)
14884 %{
14885   predicate(UseAVX == 0);
14886   match(Set dst (ConvI2F (LoadI src)));
14887 
14888   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14889   ins_encode %{
14890     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14891   %}
14892   ins_pipe(pipe_slow); // XXX
14893 %}
14894 
14895 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14896 %{
14897   predicate(!UseXmmI2D);
14898   match(Set dst (ConvI2D src));
14899 
14900   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14901   ins_encode %{
14902     if (UseAVX > 0) {
14903       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14904     }
14905     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14906   %}
14907   ins_pipe(pipe_slow); // XXX
14908 %}
14909 
14910 instruct convI2D_reg_mem(regD dst, memory src)
14911 %{
14912   predicate(UseAVX == 0);
14913   match(Set dst (ConvI2D (LoadI src)));
14914 
14915   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14916   ins_encode %{
14917     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14918   %}
14919   ins_pipe(pipe_slow); // XXX
14920 %}
14921 
14922 instruct convXI2F_reg(regF dst, rRegI src)
14923 %{
14924   predicate(UseXmmI2F);
14925   match(Set dst (ConvI2F src));
14926 
14927   format %{ "movdl $dst, $src\n\t"
14928             "cvtdq2psl $dst, $dst\t# i2f" %}
14929   ins_encode %{
14930     __ movdl($dst$$XMMRegister, $src$$Register);
14931     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14932   %}
14933   ins_pipe(pipe_slow); // XXX
14934 %}
14935 
14936 instruct convXI2D_reg(regD dst, rRegI src)
14937 %{
14938   predicate(UseXmmI2D);
14939   match(Set dst (ConvI2D src));
14940 
14941   format %{ "movdl $dst, $src\n\t"
14942             "cvtdq2pdl $dst, $dst\t# i2d" %}
14943   ins_encode %{
14944     __ movdl($dst$$XMMRegister, $src$$Register);
14945     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14946   %}
14947   ins_pipe(pipe_slow); // XXX
14948 %}
14949 
14950 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14951 %{
14952   match(Set dst (ConvL2F src));
14953 
14954   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14955   ins_encode %{
14956     if (UseAVX > 0) {
14957       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14958     }
14959     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14960   %}
14961   ins_pipe(pipe_slow); // XXX
14962 %}
14963 
14964 instruct convL2F_reg_mem(regF dst, memory src)
14965 %{
14966   predicate(UseAVX == 0);
14967   match(Set dst (ConvL2F (LoadL src)));
14968 
14969   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14970   ins_encode %{
14971     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14972   %}
14973   ins_pipe(pipe_slow); // XXX
14974 %}
14975 
14976 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14977 %{
14978   match(Set dst (ConvL2D src));
14979 
14980   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14981   ins_encode %{
14982     if (UseAVX > 0) {
14983       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14984     }
14985     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14986   %}
14987   ins_pipe(pipe_slow); // XXX
14988 %}
14989 
14990 instruct convL2D_reg_mem(regD dst, memory src)
14991 %{
14992   predicate(UseAVX == 0);
14993   match(Set dst (ConvL2D (LoadL src)));
14994 
14995   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14996   ins_encode %{
14997     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14998   %}
14999   ins_pipe(pipe_slow); // XXX
15000 %}
15001 
15002 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15003 %{
15004   match(Set dst (ConvI2L src));
15005 
15006   ins_cost(125);
15007   format %{ "movslq  $dst, $src\t# i2l" %}
15008   ins_encode %{
15009     __ movslq($dst$$Register, $src$$Register);
15010   %}
15011   ins_pipe(ialu_reg_reg);
15012 %}
15013 
15014 // Zero-extend convert int to long
15015 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15016 %{
15017   match(Set dst (AndL (ConvI2L src) mask));
15018 
15019   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15020   ins_encode %{
15021     if ($dst$$reg != $src$$reg) {
15022       __ movl($dst$$Register, $src$$Register);
15023     }
15024   %}
15025   ins_pipe(ialu_reg_reg);
15026 %}
15027 
15028 // Zero-extend convert int to long
15029 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15030 %{
15031   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15032 
15033   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15034   ins_encode %{
15035     __ movl($dst$$Register, $src$$Address);
15036   %}
15037   ins_pipe(ialu_reg_mem);
15038 %}
15039 
15040 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15041 %{
15042   match(Set dst (AndL src mask));
15043 
15044   format %{ "movl    $dst, $src\t# zero-extend long" %}
15045   ins_encode %{
15046     __ movl($dst$$Register, $src$$Register);
15047   %}
15048   ins_pipe(ialu_reg_reg);
15049 %}
15050 
15051 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15052 %{
15053   match(Set dst (ConvL2I src));
15054 
15055   format %{ "movl    $dst, $src\t# l2i" %}
15056   ins_encode %{
15057     __ movl($dst$$Register, $src$$Register);
15058   %}
15059   ins_pipe(ialu_reg_reg);
15060 %}
15061 
15062 
15063 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15064   match(Set dst (MoveF2I src));
15065   effect(DEF dst, USE src);
15066 
15067   ins_cost(125);
15068   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
15069   ins_encode %{
15070     __ movl($dst$$Register, Address(rsp, $src$$disp));
15071   %}
15072   ins_pipe(ialu_reg_mem);
15073 %}
15074 
15075 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15076   match(Set dst (MoveI2F src));
15077   effect(DEF dst, USE src);
15078 
15079   ins_cost(125);
15080   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
15081   ins_encode %{
15082     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15083   %}
15084   ins_pipe(pipe_slow);
15085 %}
15086 
15087 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15088   match(Set dst (MoveD2L src));
15089   effect(DEF dst, USE src);
15090 
15091   ins_cost(125);
15092   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
15093   ins_encode %{
15094     __ movq($dst$$Register, Address(rsp, $src$$disp));
15095   %}
15096   ins_pipe(ialu_reg_mem);
15097 %}
15098 
15099 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15100   predicate(!UseXmmLoadAndClearUpper);
15101   match(Set dst (MoveL2D src));
15102   effect(DEF dst, USE src);
15103 
15104   ins_cost(125);
15105   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
15106   ins_encode %{
15107     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15108   %}
15109   ins_pipe(pipe_slow);
15110 %}
15111 
15112 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15113   predicate(UseXmmLoadAndClearUpper);
15114   match(Set dst (MoveL2D src));
15115   effect(DEF dst, USE src);
15116 
15117   ins_cost(125);
15118   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
15119   ins_encode %{
15120     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15121   %}
15122   ins_pipe(pipe_slow);
15123 %}
15124 
15125 
15126 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15127   match(Set dst (MoveF2I src));
15128   effect(DEF dst, USE src);
15129 
15130   ins_cost(95); // XXX
15131   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
15132   ins_encode %{
15133     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15134   %}
15135   ins_pipe(pipe_slow);
15136 %}
15137 
15138 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15139   match(Set dst (MoveI2F src));
15140   effect(DEF dst, USE src);
15141 
15142   ins_cost(100);
15143   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
15144   ins_encode %{
15145     __ movl(Address(rsp, $dst$$disp), $src$$Register);
15146   %}
15147   ins_pipe( ialu_mem_reg );
15148 %}
15149 
15150 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15151   match(Set dst (MoveD2L src));
15152   effect(DEF dst, USE src);
15153 
15154   ins_cost(95); // XXX
15155   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
15156   ins_encode %{
15157     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15158   %}
15159   ins_pipe(pipe_slow);
15160 %}
15161 
15162 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15163   match(Set dst (MoveL2D src));
15164   effect(DEF dst, USE src);
15165 
15166   ins_cost(100);
15167   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
15168   ins_encode %{
15169     __ movq(Address(rsp, $dst$$disp), $src$$Register);
15170   %}
15171   ins_pipe(ialu_mem_reg);
15172 %}
15173 
15174 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15175   match(Set dst (MoveF2I src));
15176   effect(DEF dst, USE src);
15177   ins_cost(85);
15178   format %{ "movd    $dst,$src\t# MoveF2I" %}
15179   ins_encode %{
15180     __ movdl($dst$$Register, $src$$XMMRegister);
15181   %}
15182   ins_pipe( pipe_slow );
15183 %}
15184 
15185 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15186   match(Set dst (MoveD2L src));
15187   effect(DEF dst, USE src);
15188   ins_cost(85);
15189   format %{ "movd    $dst,$src\t# MoveD2L" %}
15190   ins_encode %{
15191     __ movdq($dst$$Register, $src$$XMMRegister);
15192   %}
15193   ins_pipe( pipe_slow );
15194 %}
15195 
15196 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15197   match(Set dst (MoveI2F src));
15198   effect(DEF dst, USE src);
15199   ins_cost(100);
15200   format %{ "movd    $dst,$src\t# MoveI2F" %}
15201   ins_encode %{
15202     __ movdl($dst$$XMMRegister, $src$$Register);
15203   %}
15204   ins_pipe( pipe_slow );
15205 %}
15206 
15207 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15208   match(Set dst (MoveL2D src));
15209   effect(DEF dst, USE src);
15210   ins_cost(100);
15211   format %{ "movd    $dst,$src\t# MoveL2D" %}
15212   ins_encode %{
15213      __ movdq($dst$$XMMRegister, $src$$Register);
15214   %}
15215   ins_pipe( pipe_slow );
15216 %}
15217 
15218 // Fast clearing of an array
15219 // Small non-constant lenght ClearArray for non-AVX512 targets.
15220 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15221                   Universe dummy, rFlagsReg cr)
15222 %{
15223   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15224   match(Set dummy (ClearArray cnt base));
15225   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15226 
15227   format %{ $$template
15228     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15229     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15230     $$emit$$"jg      LARGE\n\t"
15231     $$emit$$"dec     rcx\n\t"
15232     $$emit$$"js      DONE\t# Zero length\n\t"
15233     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15234     $$emit$$"dec     rcx\n\t"
15235     $$emit$$"jge     LOOP\n\t"
15236     $$emit$$"jmp     DONE\n\t"
15237     $$emit$$"# LARGE:\n\t"
15238     if (UseFastStosb) {
15239        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15240        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15241     } else if (UseXMMForObjInit) {
15242        $$emit$$"mov     rdi,rax\n\t"
15243        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15244        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15245        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15246        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15247        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15248        $$emit$$"add     0x40,rax\n\t"
15249        $$emit$$"# L_zero_64_bytes:\n\t"
15250        $$emit$$"sub     0x8,rcx\n\t"
15251        $$emit$$"jge     L_loop\n\t"
15252        $$emit$$"add     0x4,rcx\n\t"
15253        $$emit$$"jl      L_tail\n\t"
15254        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15255        $$emit$$"add     0x20,rax\n\t"
15256        $$emit$$"sub     0x4,rcx\n\t"
15257        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15258        $$emit$$"add     0x4,rcx\n\t"
15259        $$emit$$"jle     L_end\n\t"
15260        $$emit$$"dec     rcx\n\t"
15261        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15262        $$emit$$"vmovq   xmm0,(rax)\n\t"
15263        $$emit$$"add     0x8,rax\n\t"
15264        $$emit$$"dec     rcx\n\t"
15265        $$emit$$"jge     L_sloop\n\t"
15266        $$emit$$"# L_end:\n\t"
15267     } else {
15268        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15269     }
15270     $$emit$$"# DONE"
15271   %}
15272   ins_encode %{
15273     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15274                  $tmp$$XMMRegister, false, knoreg);
15275   %}
15276   ins_pipe(pipe_slow);
15277 %}
15278 
15279 // Small non-constant length ClearArray for AVX512 targets.
15280 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15281                        Universe dummy, rFlagsReg cr)
15282 %{
15283   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15284   match(Set dummy (ClearArray cnt base));
15285   ins_cost(125);
15286   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15287 
15288   format %{ $$template
15289     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15290     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15291     $$emit$$"jg      LARGE\n\t"
15292     $$emit$$"dec     rcx\n\t"
15293     $$emit$$"js      DONE\t# Zero length\n\t"
15294     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15295     $$emit$$"dec     rcx\n\t"
15296     $$emit$$"jge     LOOP\n\t"
15297     $$emit$$"jmp     DONE\n\t"
15298     $$emit$$"# LARGE:\n\t"
15299     if (UseFastStosb) {
15300        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15301        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15302     } else if (UseXMMForObjInit) {
15303        $$emit$$"mov     rdi,rax\n\t"
15304        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15305        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15306        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15307        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15308        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15309        $$emit$$"add     0x40,rax\n\t"
15310        $$emit$$"# L_zero_64_bytes:\n\t"
15311        $$emit$$"sub     0x8,rcx\n\t"
15312        $$emit$$"jge     L_loop\n\t"
15313        $$emit$$"add     0x4,rcx\n\t"
15314        $$emit$$"jl      L_tail\n\t"
15315        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15316        $$emit$$"add     0x20,rax\n\t"
15317        $$emit$$"sub     0x4,rcx\n\t"
15318        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15319        $$emit$$"add     0x4,rcx\n\t"
15320        $$emit$$"jle     L_end\n\t"
15321        $$emit$$"dec     rcx\n\t"
15322        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15323        $$emit$$"vmovq   xmm0,(rax)\n\t"
15324        $$emit$$"add     0x8,rax\n\t"
15325        $$emit$$"dec     rcx\n\t"
15326        $$emit$$"jge     L_sloop\n\t"
15327        $$emit$$"# L_end:\n\t"
15328     } else {
15329        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15330     }
15331     $$emit$$"# DONE"
15332   %}
15333   ins_encode %{
15334     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15335                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15336   %}
15337   ins_pipe(pipe_slow);
15338 %}
15339 
15340 // Large non-constant length ClearArray for non-AVX512 targets.
15341 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15342                         Universe dummy, rFlagsReg cr)
15343 %{
15344   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15345   match(Set dummy (ClearArray cnt base));
15346   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15347 
15348   format %{ $$template
15349     if (UseFastStosb) {
15350        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15351        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15352        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15353     } else if (UseXMMForObjInit) {
15354        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15355        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15356        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15357        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15358        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15359        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15360        $$emit$$"add     0x40,rax\n\t"
15361        $$emit$$"# L_zero_64_bytes:\n\t"
15362        $$emit$$"sub     0x8,rcx\n\t"
15363        $$emit$$"jge     L_loop\n\t"
15364        $$emit$$"add     0x4,rcx\n\t"
15365        $$emit$$"jl      L_tail\n\t"
15366        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15367        $$emit$$"add     0x20,rax\n\t"
15368        $$emit$$"sub     0x4,rcx\n\t"
15369        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15370        $$emit$$"add     0x4,rcx\n\t"
15371        $$emit$$"jle     L_end\n\t"
15372        $$emit$$"dec     rcx\n\t"
15373        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15374        $$emit$$"vmovq   xmm0,(rax)\n\t"
15375        $$emit$$"add     0x8,rax\n\t"
15376        $$emit$$"dec     rcx\n\t"
15377        $$emit$$"jge     L_sloop\n\t"
15378        $$emit$$"# L_end:\n\t"
15379     } else {
15380        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15381        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15382     }
15383   %}
15384   ins_encode %{
15385     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15386                  $tmp$$XMMRegister, true, knoreg);
15387   %}
15388   ins_pipe(pipe_slow);
15389 %}
15390 
15391 // Large non-constant length ClearArray for AVX512 targets.
15392 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15393                              Universe dummy, rFlagsReg cr)
15394 %{
15395   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15396   match(Set dummy (ClearArray cnt base));
15397   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15398 
15399   format %{ $$template
15400     if (UseFastStosb) {
15401        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15402        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15403        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15404     } else if (UseXMMForObjInit) {
15405        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15406        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15407        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15408        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15409        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15410        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15411        $$emit$$"add     0x40,rax\n\t"
15412        $$emit$$"# L_zero_64_bytes:\n\t"
15413        $$emit$$"sub     0x8,rcx\n\t"
15414        $$emit$$"jge     L_loop\n\t"
15415        $$emit$$"add     0x4,rcx\n\t"
15416        $$emit$$"jl      L_tail\n\t"
15417        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15418        $$emit$$"add     0x20,rax\n\t"
15419        $$emit$$"sub     0x4,rcx\n\t"
15420        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15421        $$emit$$"add     0x4,rcx\n\t"
15422        $$emit$$"jle     L_end\n\t"
15423        $$emit$$"dec     rcx\n\t"
15424        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15425        $$emit$$"vmovq   xmm0,(rax)\n\t"
15426        $$emit$$"add     0x8,rax\n\t"
15427        $$emit$$"dec     rcx\n\t"
15428        $$emit$$"jge     L_sloop\n\t"
15429        $$emit$$"# L_end:\n\t"
15430     } else {
15431        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15432        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15433     }
15434   %}
15435   ins_encode %{
15436     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15437                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15438   %}
15439   ins_pipe(pipe_slow);
15440 %}
15441 
15442 // Small constant length ClearArray for AVX512 targets.
15443 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15444 %{
15445   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15446   match(Set dummy (ClearArray cnt base));
15447   ins_cost(100);
15448   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15449   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15450   ins_encode %{
15451    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15452   %}
15453   ins_pipe(pipe_slow);
15454 %}
15455 
15456 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15457                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15458 %{
15459   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15460   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15461   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15462 
15463   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15464   ins_encode %{
15465     __ string_compare($str1$$Register, $str2$$Register,
15466                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15467                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15468   %}
15469   ins_pipe( pipe_slow );
15470 %}
15471 
15472 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15473                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15474 %{
15475   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15476   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15477   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15478 
15479   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15480   ins_encode %{
15481     __ string_compare($str1$$Register, $str2$$Register,
15482                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15483                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15484   %}
15485   ins_pipe( pipe_slow );
15486 %}
15487 
15488 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15489                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15490 %{
15491   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15492   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15493   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15494 
15495   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15496   ins_encode %{
15497     __ string_compare($str1$$Register, $str2$$Register,
15498                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15499                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15500   %}
15501   ins_pipe( pipe_slow );
15502 %}
15503 
15504 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15505                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15506 %{
15507   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15508   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15509   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15510 
15511   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15512   ins_encode %{
15513     __ string_compare($str1$$Register, $str2$$Register,
15514                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15515                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15516   %}
15517   ins_pipe( pipe_slow );
15518 %}
15519 
15520 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15521                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15522 %{
15523   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15524   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15525   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15526 
15527   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15528   ins_encode %{
15529     __ string_compare($str1$$Register, $str2$$Register,
15530                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15531                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15532   %}
15533   ins_pipe( pipe_slow );
15534 %}
15535 
15536 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15537                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15538 %{
15539   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15540   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15541   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15542 
15543   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15544   ins_encode %{
15545     __ string_compare($str1$$Register, $str2$$Register,
15546                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15547                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15548   %}
15549   ins_pipe( pipe_slow );
15550 %}
15551 
15552 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15553                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15554 %{
15555   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15556   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15557   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15558 
15559   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15560   ins_encode %{
15561     __ string_compare($str2$$Register, $str1$$Register,
15562                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15563                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15564   %}
15565   ins_pipe( pipe_slow );
15566 %}
15567 
15568 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15569                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15570 %{
15571   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15572   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15573   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15574 
15575   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15576   ins_encode %{
15577     __ string_compare($str2$$Register, $str1$$Register,
15578                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15579                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15580   %}
15581   ins_pipe( pipe_slow );
15582 %}
15583 
15584 // fast search of substring with known size.
15585 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15586                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15587 %{
15588   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15589   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15590   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15591 
15592   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15593   ins_encode %{
15594     int icnt2 = (int)$int_cnt2$$constant;
15595     if (icnt2 >= 16) {
15596       // IndexOf for constant substrings with size >= 16 elements
15597       // which don't need to be loaded through stack.
15598       __ string_indexofC8($str1$$Register, $str2$$Register,
15599                           $cnt1$$Register, $cnt2$$Register,
15600                           icnt2, $result$$Register,
15601                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15602     } else {
15603       // Small strings are loaded through stack if they cross page boundary.
15604       __ string_indexof($str1$$Register, $str2$$Register,
15605                         $cnt1$$Register, $cnt2$$Register,
15606                         icnt2, $result$$Register,
15607                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15608     }
15609   %}
15610   ins_pipe( pipe_slow );
15611 %}
15612 
15613 // fast search of substring with known size.
15614 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15615                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15616 %{
15617   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15618   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15619   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15620 
15621   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15622   ins_encode %{
15623     int icnt2 = (int)$int_cnt2$$constant;
15624     if (icnt2 >= 8) {
15625       // IndexOf for constant substrings with size >= 8 elements
15626       // which don't need to be loaded through stack.
15627       __ string_indexofC8($str1$$Register, $str2$$Register,
15628                           $cnt1$$Register, $cnt2$$Register,
15629                           icnt2, $result$$Register,
15630                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15631     } else {
15632       // Small strings are loaded through stack if they cross page boundary.
15633       __ string_indexof($str1$$Register, $str2$$Register,
15634                         $cnt1$$Register, $cnt2$$Register,
15635                         icnt2, $result$$Register,
15636                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15637     }
15638   %}
15639   ins_pipe( pipe_slow );
15640 %}
15641 
15642 // fast search of substring with known size.
15643 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15644                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15645 %{
15646   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15647   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15648   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15649 
15650   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15651   ins_encode %{
15652     int icnt2 = (int)$int_cnt2$$constant;
15653     if (icnt2 >= 8) {
15654       // IndexOf for constant substrings with size >= 8 elements
15655       // which don't need to be loaded through stack.
15656       __ string_indexofC8($str1$$Register, $str2$$Register,
15657                           $cnt1$$Register, $cnt2$$Register,
15658                           icnt2, $result$$Register,
15659                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15660     } else {
15661       // Small strings are loaded through stack if they cross page boundary.
15662       __ string_indexof($str1$$Register, $str2$$Register,
15663                         $cnt1$$Register, $cnt2$$Register,
15664                         icnt2, $result$$Register,
15665                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15666     }
15667   %}
15668   ins_pipe( pipe_slow );
15669 %}
15670 
15671 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15672                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15673 %{
15674   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15675   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15676   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15677 
15678   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15679   ins_encode %{
15680     __ string_indexof($str1$$Register, $str2$$Register,
15681                       $cnt1$$Register, $cnt2$$Register,
15682                       (-1), $result$$Register,
15683                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15684   %}
15685   ins_pipe( pipe_slow );
15686 %}
15687 
15688 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15689                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15690 %{
15691   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15692   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15693   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15694 
15695   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15696   ins_encode %{
15697     __ string_indexof($str1$$Register, $str2$$Register,
15698                       $cnt1$$Register, $cnt2$$Register,
15699                       (-1), $result$$Register,
15700                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15701   %}
15702   ins_pipe( pipe_slow );
15703 %}
15704 
15705 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15706                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15707 %{
15708   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15709   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15710   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15711 
15712   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15713   ins_encode %{
15714     __ string_indexof($str1$$Register, $str2$$Register,
15715                       $cnt1$$Register, $cnt2$$Register,
15716                       (-1), $result$$Register,
15717                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15718   %}
15719   ins_pipe( pipe_slow );
15720 %}
15721 
15722 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15723                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15724 %{
15725   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15726   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15727   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15728   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15729   ins_encode %{
15730     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15731                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15732   %}
15733   ins_pipe( pipe_slow );
15734 %}
15735 
15736 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15737                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15738 %{
15739   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15740   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15741   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15742   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15743   ins_encode %{
15744     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15745                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15746   %}
15747   ins_pipe( pipe_slow );
15748 %}
15749 
15750 // fast string equals
15751 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15752                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15753 %{
15754   predicate(!VM_Version::supports_avx512vlbw());
15755   match(Set result (StrEquals (Binary str1 str2) cnt));
15756   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15757 
15758   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15759   ins_encode %{
15760     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15761                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15762                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15763   %}
15764   ins_pipe( pipe_slow );
15765 %}
15766 
15767 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15768                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15769 %{
15770   predicate(VM_Version::supports_avx512vlbw());
15771   match(Set result (StrEquals (Binary str1 str2) cnt));
15772   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15773 
15774   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15775   ins_encode %{
15776     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15777                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15778                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15779   %}
15780   ins_pipe( pipe_slow );
15781 %}
15782 
15783 // fast array equals
15784 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15785                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15786 %{
15787   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15788   match(Set result (AryEq ary1 ary2));
15789   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15790 
15791   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15792   ins_encode %{
15793     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15794                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15795                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15796   %}
15797   ins_pipe( pipe_slow );
15798 %}
15799 
15800 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15801                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15802 %{
15803   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15804   match(Set result (AryEq ary1 ary2));
15805   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15806 
15807   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15808   ins_encode %{
15809     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15810                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15811                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15812   %}
15813   ins_pipe( pipe_slow );
15814 %}
15815 
15816 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15817                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15818 %{
15819   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15820   match(Set result (AryEq ary1 ary2));
15821   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15822 
15823   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15824   ins_encode %{
15825     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15826                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15827                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15828   %}
15829   ins_pipe( pipe_slow );
15830 %}
15831 
15832 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15833                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15834 %{
15835   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15836   match(Set result (AryEq ary1 ary2));
15837   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15838 
15839   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15840   ins_encode %{
15841     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15842                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15843                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15844   %}
15845   ins_pipe( pipe_slow );
15846 %}
15847 
15848 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15849                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15850                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15851                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15852                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15853 %{
15854   predicate(UseAVX >= 2);
15855   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15856   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15857          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15858          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15859          USE basic_type, KILL cr);
15860 
15861   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
15862   ins_encode %{
15863     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15864                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15865                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15866                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15867                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15868                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15869                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15870   %}
15871   ins_pipe( pipe_slow );
15872 %}
15873 
15874 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15875                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15876 %{
15877   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15878   match(Set result (CountPositives ary1 len));
15879   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15880 
15881   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15882   ins_encode %{
15883     __ count_positives($ary1$$Register, $len$$Register,
15884                        $result$$Register, $tmp3$$Register,
15885                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15886   %}
15887   ins_pipe( pipe_slow );
15888 %}
15889 
15890 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15891                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15892 %{
15893   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15894   match(Set result (CountPositives ary1 len));
15895   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15896 
15897   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15898   ins_encode %{
15899     __ count_positives($ary1$$Register, $len$$Register,
15900                        $result$$Register, $tmp3$$Register,
15901                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15902   %}
15903   ins_pipe( pipe_slow );
15904 %}
15905 
15906 // fast char[] to byte[] compression
15907 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15908                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15909   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15910   match(Set result (StrCompressedCopy src (Binary dst len)));
15911   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15912          USE_KILL len, KILL tmp5, KILL cr);
15913 
15914   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15915   ins_encode %{
15916     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15917                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15918                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15919                            knoreg, knoreg);
15920   %}
15921   ins_pipe( pipe_slow );
15922 %}
15923 
15924 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15925                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15926   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15927   match(Set result (StrCompressedCopy src (Binary dst len)));
15928   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15929          USE_KILL len, KILL tmp5, KILL cr);
15930 
15931   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15932   ins_encode %{
15933     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15934                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15935                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15936                            $ktmp1$$KRegister, $ktmp2$$KRegister);
15937   %}
15938   ins_pipe( pipe_slow );
15939 %}
15940 // fast byte[] to char[] inflation
15941 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15942                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15943   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15944   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15945   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15946 
15947   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15948   ins_encode %{
15949     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15950                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15951   %}
15952   ins_pipe( pipe_slow );
15953 %}
15954 
15955 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15956                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15957   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15958   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15959   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15960 
15961   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15962   ins_encode %{
15963     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15964                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15965   %}
15966   ins_pipe( pipe_slow );
15967 %}
15968 
15969 // encode char[] to byte[] in ISO_8859_1
15970 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15971                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15972                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15973   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15974   match(Set result (EncodeISOArray src (Binary dst len)));
15975   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15976 
15977   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15978   ins_encode %{
15979     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15980                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15981                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15982   %}
15983   ins_pipe( pipe_slow );
15984 %}
15985 
15986 // encode char[] to byte[] in ASCII
15987 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15988                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15989                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15990   predicate(((EncodeISOArrayNode*)n)->is_ascii());
15991   match(Set result (EncodeISOArray src (Binary dst len)));
15992   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15993 
15994   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15995   ins_encode %{
15996     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15997                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15998                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15999   %}
16000   ins_pipe( pipe_slow );
16001 %}
16002 
16003 //----------Overflow Math Instructions-----------------------------------------
16004 
16005 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16006 %{
16007   match(Set cr (OverflowAddI op1 op2));
16008   effect(DEF cr, USE_KILL op1, USE op2);
16009 
16010   format %{ "addl    $op1, $op2\t# overflow check int" %}
16011 
16012   ins_encode %{
16013     __ addl($op1$$Register, $op2$$Register);
16014   %}
16015   ins_pipe(ialu_reg_reg);
16016 %}
16017 
16018 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16019 %{
16020   match(Set cr (OverflowAddI op1 op2));
16021   effect(DEF cr, USE_KILL op1, USE op2);
16022 
16023   format %{ "addl    $op1, $op2\t# overflow check int" %}
16024 
16025   ins_encode %{
16026     __ addl($op1$$Register, $op2$$constant);
16027   %}
16028   ins_pipe(ialu_reg_reg);
16029 %}
16030 
16031 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16032 %{
16033   match(Set cr (OverflowAddL op1 op2));
16034   effect(DEF cr, USE_KILL op1, USE op2);
16035 
16036   format %{ "addq    $op1, $op2\t# overflow check long" %}
16037   ins_encode %{
16038     __ addq($op1$$Register, $op2$$Register);
16039   %}
16040   ins_pipe(ialu_reg_reg);
16041 %}
16042 
16043 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16044 %{
16045   match(Set cr (OverflowAddL op1 op2));
16046   effect(DEF cr, USE_KILL op1, USE op2);
16047 
16048   format %{ "addq    $op1, $op2\t# overflow check long" %}
16049   ins_encode %{
16050     __ addq($op1$$Register, $op2$$constant);
16051   %}
16052   ins_pipe(ialu_reg_reg);
16053 %}
16054 
16055 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16056 %{
16057   match(Set cr (OverflowSubI op1 op2));
16058 
16059   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16060   ins_encode %{
16061     __ cmpl($op1$$Register, $op2$$Register);
16062   %}
16063   ins_pipe(ialu_reg_reg);
16064 %}
16065 
16066 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16067 %{
16068   match(Set cr (OverflowSubI op1 op2));
16069 
16070   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16071   ins_encode %{
16072     __ cmpl($op1$$Register, $op2$$constant);
16073   %}
16074   ins_pipe(ialu_reg_reg);
16075 %}
16076 
16077 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16078 %{
16079   match(Set cr (OverflowSubL op1 op2));
16080 
16081   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16082   ins_encode %{
16083     __ cmpq($op1$$Register, $op2$$Register);
16084   %}
16085   ins_pipe(ialu_reg_reg);
16086 %}
16087 
16088 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16089 %{
16090   match(Set cr (OverflowSubL op1 op2));
16091 
16092   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16093   ins_encode %{
16094     __ cmpq($op1$$Register, $op2$$constant);
16095   %}
16096   ins_pipe(ialu_reg_reg);
16097 %}
16098 
16099 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16100 %{
16101   match(Set cr (OverflowSubI zero op2));
16102   effect(DEF cr, USE_KILL op2);
16103 
16104   format %{ "negl    $op2\t# overflow check int" %}
16105   ins_encode %{
16106     __ negl($op2$$Register);
16107   %}
16108   ins_pipe(ialu_reg_reg);
16109 %}
16110 
16111 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16112 %{
16113   match(Set cr (OverflowSubL zero op2));
16114   effect(DEF cr, USE_KILL op2);
16115 
16116   format %{ "negq    $op2\t# overflow check long" %}
16117   ins_encode %{
16118     __ negq($op2$$Register);
16119   %}
16120   ins_pipe(ialu_reg_reg);
16121 %}
16122 
16123 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16124 %{
16125   match(Set cr (OverflowMulI op1 op2));
16126   effect(DEF cr, USE_KILL op1, USE op2);
16127 
16128   format %{ "imull    $op1, $op2\t# overflow check int" %}
16129   ins_encode %{
16130     __ imull($op1$$Register, $op2$$Register);
16131   %}
16132   ins_pipe(ialu_reg_reg_alu0);
16133 %}
16134 
16135 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16136 %{
16137   match(Set cr (OverflowMulI op1 op2));
16138   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16139 
16140   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
16141   ins_encode %{
16142     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16143   %}
16144   ins_pipe(ialu_reg_reg_alu0);
16145 %}
16146 
16147 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16148 %{
16149   match(Set cr (OverflowMulL op1 op2));
16150   effect(DEF cr, USE_KILL op1, USE op2);
16151 
16152   format %{ "imulq    $op1, $op2\t# overflow check long" %}
16153   ins_encode %{
16154     __ imulq($op1$$Register, $op2$$Register);
16155   %}
16156   ins_pipe(ialu_reg_reg_alu0);
16157 %}
16158 
16159 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16160 %{
16161   match(Set cr (OverflowMulL op1 op2));
16162   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16163 
16164   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
16165   ins_encode %{
16166     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16167   %}
16168   ins_pipe(ialu_reg_reg_alu0);
16169 %}
16170 
16171 
16172 //----------Control Flow Instructions------------------------------------------
16173 // Signed compare Instructions
16174 
16175 // XXX more variants!!
16176 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16177 %{
16178   match(Set cr (CmpI op1 op2));
16179   effect(DEF cr, USE op1, USE op2);
16180 
16181   format %{ "cmpl    $op1, $op2" %}
16182   ins_encode %{
16183     __ cmpl($op1$$Register, $op2$$Register);
16184   %}
16185   ins_pipe(ialu_cr_reg_reg);
16186 %}
16187 
16188 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16189 %{
16190   match(Set cr (CmpI op1 op2));
16191 
16192   format %{ "cmpl    $op1, $op2" %}
16193   ins_encode %{
16194     __ cmpl($op1$$Register, $op2$$constant);
16195   %}
16196   ins_pipe(ialu_cr_reg_imm);
16197 %}
16198 
16199 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16200 %{
16201   match(Set cr (CmpI op1 (LoadI op2)));
16202 
16203   ins_cost(500); // XXX
16204   format %{ "cmpl    $op1, $op2" %}
16205   ins_encode %{
16206     __ cmpl($op1$$Register, $op2$$Address);
16207   %}
16208   ins_pipe(ialu_cr_reg_mem);
16209 %}
16210 
16211 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16212 %{
16213   match(Set cr (CmpI src zero));
16214 
16215   format %{ "testl   $src, $src" %}
16216   ins_encode %{
16217     __ testl($src$$Register, $src$$Register);
16218   %}
16219   ins_pipe(ialu_cr_reg_imm);
16220 %}
16221 
16222 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16223 %{
16224   match(Set cr (CmpI (AndI src con) zero));
16225 
16226   format %{ "testl   $src, $con" %}
16227   ins_encode %{
16228     __ testl($src$$Register, $con$$constant);
16229   %}
16230   ins_pipe(ialu_cr_reg_imm);
16231 %}
16232 
16233 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16234 %{
16235   match(Set cr (CmpI (AndI src1 src2) zero));
16236 
16237   format %{ "testl   $src1, $src2" %}
16238   ins_encode %{
16239     __ testl($src1$$Register, $src2$$Register);
16240   %}
16241   ins_pipe(ialu_cr_reg_imm);
16242 %}
16243 
16244 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16245 %{
16246   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16247 
16248   format %{ "testl   $src, $mem" %}
16249   ins_encode %{
16250     __ testl($src$$Register, $mem$$Address);
16251   %}
16252   ins_pipe(ialu_cr_reg_mem);
16253 %}
16254 
16255 // Unsigned compare Instructions; really, same as signed except they
16256 // produce an rFlagsRegU instead of rFlagsReg.
16257 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16258 %{
16259   match(Set cr (CmpU op1 op2));
16260 
16261   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16262   ins_encode %{
16263     __ cmpl($op1$$Register, $op2$$Register);
16264   %}
16265   ins_pipe(ialu_cr_reg_reg);
16266 %}
16267 
16268 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16269 %{
16270   match(Set cr (CmpU op1 op2));
16271 
16272   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16273   ins_encode %{
16274     __ cmpl($op1$$Register, $op2$$constant);
16275   %}
16276   ins_pipe(ialu_cr_reg_imm);
16277 %}
16278 
16279 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16280 %{
16281   match(Set cr (CmpU op1 (LoadI op2)));
16282 
16283   ins_cost(500); // XXX
16284   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16285   ins_encode %{
16286     __ cmpl($op1$$Register, $op2$$Address);
16287   %}
16288   ins_pipe(ialu_cr_reg_mem);
16289 %}
16290 
16291 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16292 %{
16293   match(Set cr (CmpU src zero));
16294 
16295   format %{ "testl   $src, $src\t# unsigned" %}
16296   ins_encode %{
16297     __ testl($src$$Register, $src$$Register);
16298   %}
16299   ins_pipe(ialu_cr_reg_imm);
16300 %}
16301 
16302 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16303 %{
16304   match(Set cr (CmpP op1 op2));
16305 
16306   format %{ "cmpq    $op1, $op2\t# ptr" %}
16307   ins_encode %{
16308     __ cmpq($op1$$Register, $op2$$Register);
16309   %}
16310   ins_pipe(ialu_cr_reg_reg);
16311 %}
16312 
16313 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16314 %{
16315   match(Set cr (CmpP op1 (LoadP op2)));
16316   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16317 
16318   ins_cost(500); // XXX
16319   format %{ "cmpq    $op1, $op2\t# ptr" %}
16320   ins_encode %{
16321     __ cmpq($op1$$Register, $op2$$Address);
16322   %}
16323   ins_pipe(ialu_cr_reg_mem);
16324 %}
16325 
16326 // XXX this is generalized by compP_rReg_mem???
16327 // Compare raw pointer (used in out-of-heap check).
16328 // Only works because non-oop pointers must be raw pointers
16329 // and raw pointers have no anti-dependencies.
16330 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16331 %{
16332   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16333             n->in(2)->as_Load()->barrier_data() == 0);
16334   match(Set cr (CmpP op1 (LoadP op2)));
16335 
16336   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
16337   ins_encode %{
16338     __ cmpq($op1$$Register, $op2$$Address);
16339   %}
16340   ins_pipe(ialu_cr_reg_mem);
16341 %}
16342 
16343 // This will generate a signed flags result. This should be OK since
16344 // any compare to a zero should be eq/neq.
16345 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16346 %{
16347   match(Set cr (CmpP src zero));
16348 
16349   format %{ "testq   $src, $src\t# ptr" %}
16350   ins_encode %{
16351     __ testq($src$$Register, $src$$Register);
16352   %}
16353   ins_pipe(ialu_cr_reg_imm);
16354 %}
16355 
16356 // This will generate a signed flags result. This should be OK since
16357 // any compare to a zero should be eq/neq.
16358 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16359 %{
16360   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16361             n->in(1)->as_Load()->barrier_data() == 0);
16362   match(Set cr (CmpP (LoadP op) zero));
16363 
16364   ins_cost(500); // XXX
16365   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
16366   ins_encode %{
16367     __ testq($op$$Address, 0xFFFFFFFF);
16368   %}
16369   ins_pipe(ialu_cr_reg_imm);
16370 %}
16371 
16372 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16373 %{
16374   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16375             n->in(1)->as_Load()->barrier_data() == 0);
16376   match(Set cr (CmpP (LoadP mem) zero));
16377 
16378   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
16379   ins_encode %{
16380     __ cmpq(r12, $mem$$Address);
16381   %}
16382   ins_pipe(ialu_cr_reg_mem);
16383 %}
16384 
16385 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16386 %{
16387   match(Set cr (CmpN op1 op2));
16388 
16389   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16390   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16391   ins_pipe(ialu_cr_reg_reg);
16392 %}
16393 
16394 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16395 %{
16396   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16397   match(Set cr (CmpN src (LoadN mem)));
16398 
16399   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
16400   ins_encode %{
16401     __ cmpl($src$$Register, $mem$$Address);
16402   %}
16403   ins_pipe(ialu_cr_reg_mem);
16404 %}
16405 
16406 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16407   match(Set cr (CmpN op1 op2));
16408 
16409   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16410   ins_encode %{
16411     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16412   %}
16413   ins_pipe(ialu_cr_reg_imm);
16414 %}
16415 
16416 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16417 %{
16418   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16419   match(Set cr (CmpN src (LoadN mem)));
16420 
16421   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
16422   ins_encode %{
16423     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16424   %}
16425   ins_pipe(ialu_cr_reg_mem);
16426 %}
16427 
16428 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16429   match(Set cr (CmpN op1 op2));
16430 
16431   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
16432   ins_encode %{
16433     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16434   %}
16435   ins_pipe(ialu_cr_reg_imm);
16436 %}
16437 
16438 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16439 %{
16440   predicate(!UseCompactObjectHeaders);
16441   match(Set cr (CmpN src (LoadNKlass mem)));
16442 
16443   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
16444   ins_encode %{
16445     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16446   %}
16447   ins_pipe(ialu_cr_reg_mem);
16448 %}
16449 
16450 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16451   match(Set cr (CmpN src zero));
16452 
16453   format %{ "testl   $src, $src\t# compressed ptr" %}
16454   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16455   ins_pipe(ialu_cr_reg_imm);
16456 %}
16457 
16458 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16459 %{
16460   predicate(CompressedOops::base() != nullptr &&
16461             n->in(1)->as_Load()->barrier_data() == 0);
16462   match(Set cr (CmpN (LoadN mem) zero));
16463 
16464   ins_cost(500); // XXX
16465   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
16466   ins_encode %{
16467     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16468   %}
16469   ins_pipe(ialu_cr_reg_mem);
16470 %}
16471 
16472 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16473 %{
16474   predicate(CompressedOops::base() == nullptr &&
16475             n->in(1)->as_Load()->barrier_data() == 0);
16476   match(Set cr (CmpN (LoadN mem) zero));
16477 
16478   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16479   ins_encode %{
16480     __ cmpl(r12, $mem$$Address);
16481   %}
16482   ins_pipe(ialu_cr_reg_mem);
16483 %}
16484 
16485 // Yanked all unsigned pointer compare operations.
16486 // Pointer compares are done with CmpP which is already unsigned.
16487 
16488 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16489 %{
16490   match(Set cr (CmpL op1 op2));
16491 
16492   format %{ "cmpq    $op1, $op2" %}
16493   ins_encode %{
16494     __ cmpq($op1$$Register, $op2$$Register);
16495   %}
16496   ins_pipe(ialu_cr_reg_reg);
16497 %}
16498 
16499 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16500 %{
16501   match(Set cr (CmpL op1 op2));
16502 
16503   format %{ "cmpq    $op1, $op2" %}
16504   ins_encode %{
16505     __ cmpq($op1$$Register, $op2$$constant);
16506   %}
16507   ins_pipe(ialu_cr_reg_imm);
16508 %}
16509 
16510 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16511 %{
16512   match(Set cr (CmpL op1 (LoadL op2)));
16513 
16514   format %{ "cmpq    $op1, $op2" %}
16515   ins_encode %{
16516     __ cmpq($op1$$Register, $op2$$Address);
16517   %}
16518   ins_pipe(ialu_cr_reg_mem);
16519 %}
16520 
16521 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16522 %{
16523   match(Set cr (CmpL src zero));
16524 
16525   format %{ "testq   $src, $src" %}
16526   ins_encode %{
16527     __ testq($src$$Register, $src$$Register);
16528   %}
16529   ins_pipe(ialu_cr_reg_imm);
16530 %}
16531 
16532 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16533 %{
16534   match(Set cr (CmpL (AndL src con) zero));
16535 
16536   format %{ "testq   $src, $con\t# long" %}
16537   ins_encode %{
16538     __ testq($src$$Register, $con$$constant);
16539   %}
16540   ins_pipe(ialu_cr_reg_imm);
16541 %}
16542 
16543 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16544 %{
16545   match(Set cr (CmpL (AndL src1 src2) zero));
16546 
16547   format %{ "testq   $src1, $src2\t# long" %}
16548   ins_encode %{
16549     __ testq($src1$$Register, $src2$$Register);
16550   %}
16551   ins_pipe(ialu_cr_reg_imm);
16552 %}
16553 
16554 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16555 %{
16556   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16557 
16558   format %{ "testq   $src, $mem" %}
16559   ins_encode %{
16560     __ testq($src$$Register, $mem$$Address);
16561   %}
16562   ins_pipe(ialu_cr_reg_mem);
16563 %}
16564 
16565 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16566 %{
16567   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16568 
16569   format %{ "testq   $src, $mem" %}
16570   ins_encode %{
16571     __ testq($src$$Register, $mem$$Address);
16572   %}
16573   ins_pipe(ialu_cr_reg_mem);
16574 %}
16575 
16576 // Manifest a CmpU result in an integer register.  Very painful.
16577 // This is the test to avoid.
16578 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16579 %{
16580   match(Set dst (CmpU3 src1 src2));
16581   effect(KILL flags);
16582 
16583   ins_cost(275); // XXX
16584   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16585             "movl    $dst, -1\n\t"
16586             "jb,u    done\n\t"
16587             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16588     "done:" %}
16589   ins_encode %{
16590     Label done;
16591     __ cmpl($src1$$Register, $src2$$Register);
16592     __ movl($dst$$Register, -1);
16593     __ jccb(Assembler::below, done);
16594     __ setcc(Assembler::notZero, $dst$$Register);
16595     __ bind(done);
16596   %}
16597   ins_pipe(pipe_slow);
16598 %}
16599 
16600 // Manifest a CmpL result in an integer register.  Very painful.
16601 // This is the test to avoid.
16602 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16603 %{
16604   match(Set dst (CmpL3 src1 src2));
16605   effect(KILL flags);
16606 
16607   ins_cost(275); // XXX
16608   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16609             "movl    $dst, -1\n\t"
16610             "jl,s    done\n\t"
16611             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16612     "done:" %}
16613   ins_encode %{
16614     Label done;
16615     __ cmpq($src1$$Register, $src2$$Register);
16616     __ movl($dst$$Register, -1);
16617     __ jccb(Assembler::less, done);
16618     __ setcc(Assembler::notZero, $dst$$Register);
16619     __ bind(done);
16620   %}
16621   ins_pipe(pipe_slow);
16622 %}
16623 
16624 // Manifest a CmpUL result in an integer register.  Very painful.
16625 // This is the test to avoid.
16626 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16627 %{
16628   match(Set dst (CmpUL3 src1 src2));
16629   effect(KILL flags);
16630 
16631   ins_cost(275); // XXX
16632   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16633             "movl    $dst, -1\n\t"
16634             "jb,u    done\n\t"
16635             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16636     "done:" %}
16637   ins_encode %{
16638     Label done;
16639     __ cmpq($src1$$Register, $src2$$Register);
16640     __ movl($dst$$Register, -1);
16641     __ jccb(Assembler::below, done);
16642     __ setcc(Assembler::notZero, $dst$$Register);
16643     __ bind(done);
16644   %}
16645   ins_pipe(pipe_slow);
16646 %}
16647 
16648 // Unsigned long compare Instructions; really, same as signed long except they
16649 // produce an rFlagsRegU instead of rFlagsReg.
16650 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16651 %{
16652   match(Set cr (CmpUL op1 op2));
16653 
16654   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16655   ins_encode %{
16656     __ cmpq($op1$$Register, $op2$$Register);
16657   %}
16658   ins_pipe(ialu_cr_reg_reg);
16659 %}
16660 
16661 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16662 %{
16663   match(Set cr (CmpUL op1 op2));
16664 
16665   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16666   ins_encode %{
16667     __ cmpq($op1$$Register, $op2$$constant);
16668   %}
16669   ins_pipe(ialu_cr_reg_imm);
16670 %}
16671 
16672 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16673 %{
16674   match(Set cr (CmpUL op1 (LoadL op2)));
16675 
16676   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16677   ins_encode %{
16678     __ cmpq($op1$$Register, $op2$$Address);
16679   %}
16680   ins_pipe(ialu_cr_reg_mem);
16681 %}
16682 
16683 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16684 %{
16685   match(Set cr (CmpUL src zero));
16686 
16687   format %{ "testq   $src, $src\t# unsigned" %}
16688   ins_encode %{
16689     __ testq($src$$Register, $src$$Register);
16690   %}
16691   ins_pipe(ialu_cr_reg_imm);
16692 %}
16693 
16694 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16695 %{
16696   match(Set cr (CmpI (LoadB mem) imm));
16697 
16698   ins_cost(125);
16699   format %{ "cmpb    $mem, $imm" %}
16700   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16701   ins_pipe(ialu_cr_reg_mem);
16702 %}
16703 
16704 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16705 %{
16706   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16707 
16708   ins_cost(125);
16709   format %{ "testb   $mem, $imm\t# ubyte" %}
16710   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16711   ins_pipe(ialu_cr_reg_mem);
16712 %}
16713 
16714 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16715 %{
16716   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16717 
16718   ins_cost(125);
16719   format %{ "testb   $mem, $imm\t# byte" %}
16720   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16721   ins_pipe(ialu_cr_reg_mem);
16722 %}
16723 
16724 //----------Max and Min--------------------------------------------------------
16725 // Min Instructions
16726 
16727 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16728 %{
16729   predicate(!UseAPX);
16730   effect(USE_DEF dst, USE src, USE cr);
16731 
16732   format %{ "cmovlgt $dst, $src\t# min" %}
16733   ins_encode %{
16734     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16735   %}
16736   ins_pipe(pipe_cmov_reg);
16737 %}
16738 
16739 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16740 %{
16741   predicate(UseAPX);
16742   effect(DEF dst, USE src1, USE src2, USE cr);
16743 
16744   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16745   ins_encode %{
16746     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16747   %}
16748   ins_pipe(pipe_cmov_reg);
16749 %}
16750 
16751 instruct minI_rReg(rRegI dst, rRegI src)
16752 %{
16753   predicate(!UseAPX);
16754   match(Set dst (MinI dst src));
16755 
16756   ins_cost(200);
16757   expand %{
16758     rFlagsReg cr;
16759     compI_rReg(cr, dst, src);
16760     cmovI_reg_g(dst, src, cr);
16761   %}
16762 %}
16763 
16764 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16765 %{
16766   predicate(UseAPX);
16767   match(Set dst (MinI src1 src2));
16768   effect(DEF dst, USE src1, USE src2);
16769   flag(PD::Flag_ndd_demotable_opr1);
16770 
16771   ins_cost(200);
16772   expand %{
16773     rFlagsReg cr;
16774     compI_rReg(cr, src1, src2);
16775     cmovI_reg_g_ndd(dst, src1, src2, cr);
16776   %}
16777 %}
16778 
16779 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16780 %{
16781   predicate(!UseAPX);
16782   effect(USE_DEF dst, USE src, USE cr);
16783 
16784   format %{ "cmovllt $dst, $src\t# max" %}
16785   ins_encode %{
16786     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16787   %}
16788   ins_pipe(pipe_cmov_reg);
16789 %}
16790 
16791 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16792 %{
16793   predicate(UseAPX);
16794   effect(DEF dst, USE src1, USE src2, USE cr);
16795 
16796   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16797   ins_encode %{
16798     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16799   %}
16800   ins_pipe(pipe_cmov_reg);
16801 %}
16802 
16803 instruct maxI_rReg(rRegI dst, rRegI src)
16804 %{
16805   predicate(!UseAPX);
16806   match(Set dst (MaxI dst src));
16807 
16808   ins_cost(200);
16809   expand %{
16810     rFlagsReg cr;
16811     compI_rReg(cr, dst, src);
16812     cmovI_reg_l(dst, src, cr);
16813   %}
16814 %}
16815 
16816 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16817 %{
16818   predicate(UseAPX);
16819   match(Set dst (MaxI src1 src2));
16820   effect(DEF dst, USE src1, USE src2);
16821   flag(PD::Flag_ndd_demotable_opr1);
16822 
16823   ins_cost(200);
16824   expand %{
16825     rFlagsReg cr;
16826     compI_rReg(cr, src1, src2);
16827     cmovI_reg_l_ndd(dst, src1, src2, cr);
16828   %}
16829 %}
16830 
16831 // ============================================================================
16832 // Branch Instructions
16833 
16834 // Jump Direct - Label defines a relative address from JMP+1
16835 instruct jmpDir(label labl)
16836 %{
16837   match(Goto);
16838   effect(USE labl);
16839 
16840   ins_cost(300);
16841   format %{ "jmp     $labl" %}
16842   size(5);
16843   ins_encode %{
16844     Label* L = $labl$$label;
16845     __ jmp(*L, false); // Always long jump
16846   %}
16847   ins_pipe(pipe_jmp);
16848 %}
16849 
16850 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16851 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16852 %{
16853   match(If cop cr);
16854   effect(USE labl);
16855 
16856   ins_cost(300);
16857   format %{ "j$cop     $labl" %}
16858   size(6);
16859   ins_encode %{
16860     Label* L = $labl$$label;
16861     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16862   %}
16863   ins_pipe(pipe_jcc);
16864 %}
16865 
16866 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16867 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16868 %{
16869   match(CountedLoopEnd cop cr);
16870   effect(USE labl);
16871 
16872   ins_cost(300);
16873   format %{ "j$cop     $labl\t# loop end" %}
16874   size(6);
16875   ins_encode %{
16876     Label* L = $labl$$label;
16877     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16878   %}
16879   ins_pipe(pipe_jcc);
16880 %}
16881 
16882 // Jump Direct Conditional - using unsigned comparison
16883 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16884   match(If cop cmp);
16885   effect(USE labl);
16886 
16887   ins_cost(300);
16888   format %{ "j$cop,u   $labl" %}
16889   size(6);
16890   ins_encode %{
16891     Label* L = $labl$$label;
16892     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16893   %}
16894   ins_pipe(pipe_jcc);
16895 %}
16896 
16897 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16898   match(If cop cmp);
16899   effect(USE labl);
16900 
16901   ins_cost(200);
16902   format %{ "j$cop,u   $labl" %}
16903   size(6);
16904   ins_encode %{
16905     Label* L = $labl$$label;
16906     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16907   %}
16908   ins_pipe(pipe_jcc);
16909 %}
16910 
16911 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16912   match(If cop cmp);
16913   effect(USE labl);
16914 
16915   ins_cost(200);
16916   format %{ $$template
16917     if ($cop$$cmpcode == Assembler::notEqual) {
16918       $$emit$$"jp,u    $labl\n\t"
16919       $$emit$$"j$cop,u   $labl"
16920     } else {
16921       $$emit$$"jp,u    done\n\t"
16922       $$emit$$"j$cop,u   $labl\n\t"
16923       $$emit$$"done:"
16924     }
16925   %}
16926   ins_encode %{
16927     Label* l = $labl$$label;
16928     if ($cop$$cmpcode == Assembler::notEqual) {
16929       __ jcc(Assembler::parity, *l, false);
16930       __ jcc(Assembler::notEqual, *l, false);
16931     } else if ($cop$$cmpcode == Assembler::equal) {
16932       Label done;
16933       __ jccb(Assembler::parity, done);
16934       __ jcc(Assembler::equal, *l, false);
16935       __ bind(done);
16936     } else {
16937        ShouldNotReachHere();
16938     }
16939   %}
16940   ins_pipe(pipe_jcc);
16941 %}
16942 
16943 // Jump Direct Conditional - using signed and unsigned comparison
16944 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16945   match(If cop cmp);
16946   effect(USE labl);
16947 
16948   ins_cost(200);
16949   format %{ "j$cop,su   $labl" %}
16950   size(6);
16951   ins_encode %{
16952     Label* L = $labl$$label;
16953     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16954   %}
16955   ins_pipe(pipe_jcc);
16956 %}
16957 
16958 // ============================================================================
16959 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
16960 // superklass array for an instance of the superklass.  Set a hidden
16961 // internal cache on a hit (cache is checked with exposed code in
16962 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
16963 // encoding ALSO sets flags.
16964 
16965 instruct partialSubtypeCheck(rdi_RegP result,
16966                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16967                              rFlagsReg cr)
16968 %{
16969   match(Set result (PartialSubtypeCheck sub super));
16970   predicate(!UseSecondarySupersTable);
16971   effect(KILL rcx, KILL cr);
16972 
16973   ins_cost(1100);  // slightly larger than the next version
16974   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16975             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16976             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16977             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16978             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
16979             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16980             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
16981     "miss:\t" %}
16982 
16983   ins_encode %{
16984     Label miss;
16985     // NB: Callers may assume that, when $result is a valid register,
16986     // check_klass_subtype_slow_path_linear sets it to a nonzero
16987     // value.
16988     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16989                                             $rcx$$Register, $result$$Register,
16990                                             nullptr, &miss,
16991                                             /*set_cond_codes:*/ true);
16992     __ xorptr($result$$Register, $result$$Register);
16993     __ bind(miss);
16994   %}
16995 
16996   ins_pipe(pipe_slow);
16997 %}
16998 
16999 // ============================================================================
17000 // Two versions of hashtable-based partialSubtypeCheck, both used when
17001 // we need to search for a super class in the secondary supers array.
17002 // The first is used when we don't know _a priori_ the class being
17003 // searched for. The second, far more common, is used when we do know:
17004 // this is used for instanceof, checkcast, and any case where C2 can
17005 // determine it by constant propagation.
17006 
17007 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17008                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17009                                        rFlagsReg cr)
17010 %{
17011   match(Set result (PartialSubtypeCheck sub super));
17012   predicate(UseSecondarySupersTable);
17013   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17014 
17015   ins_cost(1000);
17016   format %{ "partialSubtypeCheck $result, $sub, $super" %}
17017 
17018   ins_encode %{
17019     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17020 					 $temp3$$Register, $temp4$$Register, $result$$Register);
17021   %}
17022 
17023   ins_pipe(pipe_slow);
17024 %}
17025 
17026 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17027                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17028                                        rFlagsReg cr)
17029 %{
17030   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17031   predicate(UseSecondarySupersTable);
17032   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17033 
17034   ins_cost(700);  // smaller than the next version
17035   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17036 
17037   ins_encode %{
17038     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17039     if (InlineSecondarySupersTest) {
17040       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17041                                        $temp3$$Register, $temp4$$Register, $result$$Register,
17042                                        super_klass_slot);
17043     } else {
17044       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17045     }
17046   %}
17047 
17048   ins_pipe(pipe_slow);
17049 %}
17050 
17051 // ============================================================================
17052 // Branch Instructions -- short offset versions
17053 //
17054 // These instructions are used to replace jumps of a long offset (the default
17055 // match) with jumps of a shorter offset.  These instructions are all tagged
17056 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17057 // match rules in general matching.  Instead, the ADLC generates a conversion
17058 // method in the MachNode which can be used to do in-place replacement of the
17059 // long variant with the shorter variant.  The compiler will determine if a
17060 // branch can be taken by the is_short_branch_offset() predicate in the machine
17061 // specific code section of the file.
17062 
17063 // Jump Direct - Label defines a relative address from JMP+1
17064 instruct jmpDir_short(label labl) %{
17065   match(Goto);
17066   effect(USE labl);
17067 
17068   ins_cost(300);
17069   format %{ "jmp,s   $labl" %}
17070   size(2);
17071   ins_encode %{
17072     Label* L = $labl$$label;
17073     __ jmpb(*L);
17074   %}
17075   ins_pipe(pipe_jmp);
17076   ins_short_branch(1);
17077 %}
17078 
17079 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17080 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17081   match(If cop cr);
17082   effect(USE labl);
17083 
17084   ins_cost(300);
17085   format %{ "j$cop,s   $labl" %}
17086   size(2);
17087   ins_encode %{
17088     Label* L = $labl$$label;
17089     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17090   %}
17091   ins_pipe(pipe_jcc);
17092   ins_short_branch(1);
17093 %}
17094 
17095 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17096 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17097   match(CountedLoopEnd cop cr);
17098   effect(USE labl);
17099 
17100   ins_cost(300);
17101   format %{ "j$cop,s   $labl\t# loop end" %}
17102   size(2);
17103   ins_encode %{
17104     Label* L = $labl$$label;
17105     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17106   %}
17107   ins_pipe(pipe_jcc);
17108   ins_short_branch(1);
17109 %}
17110 
17111 // Jump Direct Conditional - using unsigned comparison
17112 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17113   match(If cop cmp);
17114   effect(USE labl);
17115 
17116   ins_cost(300);
17117   format %{ "j$cop,us  $labl" %}
17118   size(2);
17119   ins_encode %{
17120     Label* L = $labl$$label;
17121     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17122   %}
17123   ins_pipe(pipe_jcc);
17124   ins_short_branch(1);
17125 %}
17126 
17127 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17128   match(If cop cmp);
17129   effect(USE labl);
17130 
17131   ins_cost(300);
17132   format %{ "j$cop,us  $labl" %}
17133   size(2);
17134   ins_encode %{
17135     Label* L = $labl$$label;
17136     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17137   %}
17138   ins_pipe(pipe_jcc);
17139   ins_short_branch(1);
17140 %}
17141 
17142 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17143   match(If cop cmp);
17144   effect(USE labl);
17145 
17146   ins_cost(300);
17147   format %{ $$template
17148     if ($cop$$cmpcode == Assembler::notEqual) {
17149       $$emit$$"jp,u,s  $labl\n\t"
17150       $$emit$$"j$cop,u,s  $labl"
17151     } else {
17152       $$emit$$"jp,u,s  done\n\t"
17153       $$emit$$"j$cop,u,s  $labl\n\t"
17154       $$emit$$"done:"
17155     }
17156   %}
17157   size(4);
17158   ins_encode %{
17159     Label* l = $labl$$label;
17160     if ($cop$$cmpcode == Assembler::notEqual) {
17161       __ jccb(Assembler::parity, *l);
17162       __ jccb(Assembler::notEqual, *l);
17163     } else if ($cop$$cmpcode == Assembler::equal) {
17164       Label done;
17165       __ jccb(Assembler::parity, done);
17166       __ jccb(Assembler::equal, *l);
17167       __ bind(done);
17168     } else {
17169        ShouldNotReachHere();
17170     }
17171   %}
17172   ins_pipe(pipe_jcc);
17173   ins_short_branch(1);
17174 %}
17175 
17176 // Jump Direct Conditional - using signed and unsigned comparison
17177 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17178   match(If cop cmp);
17179   effect(USE labl);
17180 
17181   ins_cost(300);
17182   format %{ "j$cop,sus  $labl" %}
17183   size(2);
17184   ins_encode %{
17185     Label* L = $labl$$label;
17186     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17187   %}
17188   ins_pipe(pipe_jcc);
17189   ins_short_branch(1);
17190 %}
17191 
17192 // ============================================================================
17193 // inlined locking and unlocking
17194 
17195 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17196   match(Set cr (FastLock object box));
17197   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17198   ins_cost(300);
17199   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17200   ins_encode %{
17201     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17202   %}
17203   ins_pipe(pipe_slow);
17204 %}
17205 
17206 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17207   match(Set cr (FastUnlock object rax_reg));
17208   effect(TEMP tmp, USE_KILL rax_reg);
17209   ins_cost(300);
17210   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17211   ins_encode %{
17212     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17213   %}
17214   ins_pipe(pipe_slow);
17215 %}
17216 
17217 
17218 // ============================================================================
17219 // Safepoint Instructions
17220 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17221 %{
17222   match(SafePoint poll);
17223   effect(KILL cr, USE poll);
17224 
17225   format %{ "testl   rax, [$poll]\t"
17226             "# Safepoint: poll for GC" %}
17227   ins_cost(125);
17228   ins_encode %{
17229     __ relocate(relocInfo::poll_type);
17230     address pre_pc = __ pc();
17231     __ testl(rax, Address($poll$$Register, 0));
17232     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17233   %}
17234   ins_pipe(ialu_reg_mem);
17235 %}
17236 
17237 instruct mask_all_evexL(kReg dst, rRegL src) %{
17238   match(Set dst (MaskAll src));
17239   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17240   ins_encode %{
17241     int mask_len = Matcher::vector_length(this);
17242     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17243   %}
17244   ins_pipe( pipe_slow );
17245 %}
17246 
17247 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17248   predicate(Matcher::vector_length(n) > 32);
17249   match(Set dst (MaskAll src));
17250   effect(TEMP tmp);
17251   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17252   ins_encode %{
17253     int mask_len = Matcher::vector_length(this);
17254     __ movslq($tmp$$Register, $src$$Register);
17255     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17256   %}
17257   ins_pipe( pipe_slow );
17258 %}
17259 
17260 // ============================================================================
17261 // Procedure Call/Return Instructions
17262 // Call Java Static Instruction
17263 // Note: If this code changes, the corresponding ret_addr_offset() and
17264 //       compute_padding() functions will have to be adjusted.
17265 instruct CallStaticJavaDirect(method meth) %{
17266   match(CallStaticJava);
17267   effect(USE meth);
17268 
17269   ins_cost(300);
17270   format %{ "call,static " %}
17271   opcode(0xE8); /* E8 cd */
17272   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17273   ins_pipe(pipe_slow);
17274   ins_alignment(4);
17275 %}
17276 
17277 // Call Java Dynamic Instruction
17278 // Note: If this code changes, the corresponding ret_addr_offset() and
17279 //       compute_padding() functions will have to be adjusted.
17280 instruct CallDynamicJavaDirect(method meth)
17281 %{
17282   match(CallDynamicJava);
17283   effect(USE meth);
17284 
17285   ins_cost(300);
17286   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
17287             "call,dynamic " %}
17288   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17289   ins_pipe(pipe_slow);
17290   ins_alignment(4);
17291 %}
17292 
17293 // Call Runtime Instruction
17294 instruct CallRuntimeDirect(method meth)
17295 %{
17296   match(CallRuntime);
17297   effect(USE meth);
17298 
17299   ins_cost(300);
17300   format %{ "call,runtime " %}
17301   ins_encode(clear_avx, Java_To_Runtime(meth));
17302   ins_pipe(pipe_slow);
17303 %}
17304 
17305 // Call runtime without safepoint
17306 instruct CallLeafDirect(method meth)
17307 %{
17308   match(CallLeaf);
17309   effect(USE meth);
17310 
17311   ins_cost(300);
17312   format %{ "call_leaf,runtime " %}
17313   ins_encode(clear_avx, Java_To_Runtime(meth));
17314   ins_pipe(pipe_slow);
17315 %}
17316 
17317 // Call runtime without safepoint and with vector arguments
17318 instruct CallLeafDirectVector(method meth)
17319 %{
17320   match(CallLeafVector);
17321   effect(USE meth);
17322 
17323   ins_cost(300);
17324   format %{ "call_leaf,vector " %}
17325   ins_encode(Java_To_Runtime(meth));
17326   ins_pipe(pipe_slow);
17327 %}
17328 
17329 // Call runtime without safepoint
17330 instruct CallLeafNoFPDirect(method meth)
17331 %{
17332   match(CallLeafNoFP);
17333   effect(USE meth);
17334 
17335   ins_cost(300);
17336   format %{ "call_leaf_nofp,runtime " %}
17337   ins_encode(clear_avx, Java_To_Runtime(meth));
17338   ins_pipe(pipe_slow);
17339 %}
17340 
17341 // Return Instruction
17342 // Remove the return address & jump to it.
17343 // Notice: We always emit a nop after a ret to make sure there is room
17344 // for safepoint patching
17345 instruct Ret()
17346 %{
17347   match(Return);
17348 
17349   format %{ "ret" %}
17350   ins_encode %{
17351     __ ret(0);
17352   %}
17353   ins_pipe(pipe_jmp);
17354 %}
17355 
17356 // Tail Call; Jump from runtime stub to Java code.
17357 // Also known as an 'interprocedural jump'.
17358 // Target of jump will eventually return to caller.
17359 // TailJump below removes the return address.
17360 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17361 // emitted just above the TailCall which has reset rbp to the caller state.
17362 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17363 %{
17364   match(TailCall jump_target method_ptr);
17365 
17366   ins_cost(300);
17367   format %{ "jmp     $jump_target\t# rbx holds method" %}
17368   ins_encode %{
17369     __ jmp($jump_target$$Register);
17370   %}
17371   ins_pipe(pipe_jmp);
17372 %}
17373 
17374 // Tail Jump; remove the return address; jump to target.
17375 // TailCall above leaves the return address around.
17376 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17377 %{
17378   match(TailJump jump_target ex_oop);
17379 
17380   ins_cost(300);
17381   format %{ "popq    rdx\t# pop return address\n\t"
17382             "jmp     $jump_target" %}
17383   ins_encode %{
17384     __ popq(as_Register(RDX_enc));
17385     __ jmp($jump_target$$Register);
17386   %}
17387   ins_pipe(pipe_jmp);
17388 %}
17389 
17390 // Forward exception.
17391 instruct ForwardExceptionjmp()
17392 %{
17393   match(ForwardException);
17394 
17395   format %{ "jmp     forward_exception_stub" %}
17396   ins_encode %{
17397     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17398   %}
17399   ins_pipe(pipe_jmp);
17400 %}
17401 
17402 // Create exception oop: created by stack-crawling runtime code.
17403 // Created exception is now available to this handler, and is setup
17404 // just prior to jumping to this handler.  No code emitted.
17405 instruct CreateException(rax_RegP ex_oop)
17406 %{
17407   match(Set ex_oop (CreateEx));
17408 
17409   size(0);
17410   // use the following format syntax
17411   format %{ "# exception oop is in rax; no code emitted" %}
17412   ins_encode();
17413   ins_pipe(empty);
17414 %}
17415 
17416 // Rethrow exception:
17417 // The exception oop will come in the first argument position.
17418 // Then JUMP (not call) to the rethrow stub code.
17419 instruct RethrowException()
17420 %{
17421   match(Rethrow);
17422 
17423   // use the following format syntax
17424   format %{ "jmp     rethrow_stub" %}
17425   ins_encode %{
17426     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17427   %}
17428   ins_pipe(pipe_jmp);
17429 %}
17430 
17431 // ============================================================================
17432 // This name is KNOWN by the ADLC and cannot be changed.
17433 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17434 // for this guy.
17435 instruct tlsLoadP(r15_RegP dst) %{
17436   match(Set dst (ThreadLocal));
17437   effect(DEF dst);
17438 
17439   size(0);
17440   format %{ "# TLS is in R15" %}
17441   ins_encode( /*empty encoding*/ );
17442   ins_pipe(ialu_reg_reg);
17443 %}
17444 
17445 instruct addF_reg(regF dst, regF src) %{
17446   predicate(UseAVX == 0);
17447   match(Set dst (AddF dst src));
17448 
17449   format %{ "addss   $dst, $src" %}
17450   ins_cost(150);
17451   ins_encode %{
17452     __ addss($dst$$XMMRegister, $src$$XMMRegister);
17453   %}
17454   ins_pipe(pipe_slow);
17455 %}
17456 
17457 instruct addF_mem(regF dst, memory src) %{
17458   predicate(UseAVX == 0);
17459   match(Set dst (AddF dst (LoadF src)));
17460 
17461   format %{ "addss   $dst, $src" %}
17462   ins_cost(150);
17463   ins_encode %{
17464     __ addss($dst$$XMMRegister, $src$$Address);
17465   %}
17466   ins_pipe(pipe_slow);
17467 %}
17468 
17469 instruct addF_imm(regF dst, immF con) %{
17470   predicate(UseAVX == 0);
17471   match(Set dst (AddF dst con));
17472   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17473   ins_cost(150);
17474   ins_encode %{
17475     __ addss($dst$$XMMRegister, $constantaddress($con));
17476   %}
17477   ins_pipe(pipe_slow);
17478 %}
17479 
17480 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17481   predicate(UseAVX > 0);
17482   match(Set dst (AddF src1 src2));
17483 
17484   format %{ "vaddss  $dst, $src1, $src2" %}
17485   ins_cost(150);
17486   ins_encode %{
17487     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17488   %}
17489   ins_pipe(pipe_slow);
17490 %}
17491 
17492 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17493   predicate(UseAVX > 0);
17494   match(Set dst (AddF src1 (LoadF src2)));
17495 
17496   format %{ "vaddss  $dst, $src1, $src2" %}
17497   ins_cost(150);
17498   ins_encode %{
17499     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17500   %}
17501   ins_pipe(pipe_slow);
17502 %}
17503 
17504 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17505   predicate(UseAVX > 0);
17506   match(Set dst (AddF src con));
17507 
17508   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17509   ins_cost(150);
17510   ins_encode %{
17511     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17512   %}
17513   ins_pipe(pipe_slow);
17514 %}
17515 
17516 instruct addD_reg(regD dst, regD src) %{
17517   predicate(UseAVX == 0);
17518   match(Set dst (AddD dst src));
17519 
17520   format %{ "addsd   $dst, $src" %}
17521   ins_cost(150);
17522   ins_encode %{
17523     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17524   %}
17525   ins_pipe(pipe_slow);
17526 %}
17527 
17528 instruct addD_mem(regD dst, memory src) %{
17529   predicate(UseAVX == 0);
17530   match(Set dst (AddD dst (LoadD src)));
17531 
17532   format %{ "addsd   $dst, $src" %}
17533   ins_cost(150);
17534   ins_encode %{
17535     __ addsd($dst$$XMMRegister, $src$$Address);
17536   %}
17537   ins_pipe(pipe_slow);
17538 %}
17539 
17540 instruct addD_imm(regD dst, immD con) %{
17541   predicate(UseAVX == 0);
17542   match(Set dst (AddD dst con));
17543   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17544   ins_cost(150);
17545   ins_encode %{
17546     __ addsd($dst$$XMMRegister, $constantaddress($con));
17547   %}
17548   ins_pipe(pipe_slow);
17549 %}
17550 
17551 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17552   predicate(UseAVX > 0);
17553   match(Set dst (AddD src1 src2));
17554 
17555   format %{ "vaddsd  $dst, $src1, $src2" %}
17556   ins_cost(150);
17557   ins_encode %{
17558     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17559   %}
17560   ins_pipe(pipe_slow);
17561 %}
17562 
17563 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17564   predicate(UseAVX > 0);
17565   match(Set dst (AddD src1 (LoadD src2)));
17566 
17567   format %{ "vaddsd  $dst, $src1, $src2" %}
17568   ins_cost(150);
17569   ins_encode %{
17570     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17571   %}
17572   ins_pipe(pipe_slow);
17573 %}
17574 
17575 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17576   predicate(UseAVX > 0);
17577   match(Set dst (AddD src con));
17578 
17579   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17580   ins_cost(150);
17581   ins_encode %{
17582     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17583   %}
17584   ins_pipe(pipe_slow);
17585 %}
17586 
17587 instruct subF_reg(regF dst, regF src) %{
17588   predicate(UseAVX == 0);
17589   match(Set dst (SubF dst src));
17590 
17591   format %{ "subss   $dst, $src" %}
17592   ins_cost(150);
17593   ins_encode %{
17594     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17595   %}
17596   ins_pipe(pipe_slow);
17597 %}
17598 
17599 instruct subF_mem(regF dst, memory src) %{
17600   predicate(UseAVX == 0);
17601   match(Set dst (SubF dst (LoadF src)));
17602 
17603   format %{ "subss   $dst, $src" %}
17604   ins_cost(150);
17605   ins_encode %{
17606     __ subss($dst$$XMMRegister, $src$$Address);
17607   %}
17608   ins_pipe(pipe_slow);
17609 %}
17610 
17611 instruct subF_imm(regF dst, immF con) %{
17612   predicate(UseAVX == 0);
17613   match(Set dst (SubF dst con));
17614   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17615   ins_cost(150);
17616   ins_encode %{
17617     __ subss($dst$$XMMRegister, $constantaddress($con));
17618   %}
17619   ins_pipe(pipe_slow);
17620 %}
17621 
17622 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17623   predicate(UseAVX > 0);
17624   match(Set dst (SubF src1 src2));
17625 
17626   format %{ "vsubss  $dst, $src1, $src2" %}
17627   ins_cost(150);
17628   ins_encode %{
17629     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17630   %}
17631   ins_pipe(pipe_slow);
17632 %}
17633 
17634 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17635   predicate(UseAVX > 0);
17636   match(Set dst (SubF src1 (LoadF src2)));
17637 
17638   format %{ "vsubss  $dst, $src1, $src2" %}
17639   ins_cost(150);
17640   ins_encode %{
17641     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17642   %}
17643   ins_pipe(pipe_slow);
17644 %}
17645 
17646 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17647   predicate(UseAVX > 0);
17648   match(Set dst (SubF src con));
17649 
17650   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17651   ins_cost(150);
17652   ins_encode %{
17653     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17654   %}
17655   ins_pipe(pipe_slow);
17656 %}
17657 
17658 instruct subD_reg(regD dst, regD src) %{
17659   predicate(UseAVX == 0);
17660   match(Set dst (SubD dst src));
17661 
17662   format %{ "subsd   $dst, $src" %}
17663   ins_cost(150);
17664   ins_encode %{
17665     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17666   %}
17667   ins_pipe(pipe_slow);
17668 %}
17669 
17670 instruct subD_mem(regD dst, memory src) %{
17671   predicate(UseAVX == 0);
17672   match(Set dst (SubD dst (LoadD src)));
17673 
17674   format %{ "subsd   $dst, $src" %}
17675   ins_cost(150);
17676   ins_encode %{
17677     __ subsd($dst$$XMMRegister, $src$$Address);
17678   %}
17679   ins_pipe(pipe_slow);
17680 %}
17681 
17682 instruct subD_imm(regD dst, immD con) %{
17683   predicate(UseAVX == 0);
17684   match(Set dst (SubD dst con));
17685   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17686   ins_cost(150);
17687   ins_encode %{
17688     __ subsd($dst$$XMMRegister, $constantaddress($con));
17689   %}
17690   ins_pipe(pipe_slow);
17691 %}
17692 
17693 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17694   predicate(UseAVX > 0);
17695   match(Set dst (SubD src1 src2));
17696 
17697   format %{ "vsubsd  $dst, $src1, $src2" %}
17698   ins_cost(150);
17699   ins_encode %{
17700     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17701   %}
17702   ins_pipe(pipe_slow);
17703 %}
17704 
17705 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17706   predicate(UseAVX > 0);
17707   match(Set dst (SubD src1 (LoadD src2)));
17708 
17709   format %{ "vsubsd  $dst, $src1, $src2" %}
17710   ins_cost(150);
17711   ins_encode %{
17712     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17713   %}
17714   ins_pipe(pipe_slow);
17715 %}
17716 
17717 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17718   predicate(UseAVX > 0);
17719   match(Set dst (SubD src con));
17720 
17721   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17722   ins_cost(150);
17723   ins_encode %{
17724     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17725   %}
17726   ins_pipe(pipe_slow);
17727 %}
17728 
17729 instruct mulF_reg(regF dst, regF src) %{
17730   predicate(UseAVX == 0);
17731   match(Set dst (MulF dst src));
17732 
17733   format %{ "mulss   $dst, $src" %}
17734   ins_cost(150);
17735   ins_encode %{
17736     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17737   %}
17738   ins_pipe(pipe_slow);
17739 %}
17740 
17741 instruct mulF_mem(regF dst, memory src) %{
17742   predicate(UseAVX == 0);
17743   match(Set dst (MulF dst (LoadF src)));
17744 
17745   format %{ "mulss   $dst, $src" %}
17746   ins_cost(150);
17747   ins_encode %{
17748     __ mulss($dst$$XMMRegister, $src$$Address);
17749   %}
17750   ins_pipe(pipe_slow);
17751 %}
17752 
17753 instruct mulF_imm(regF dst, immF con) %{
17754   predicate(UseAVX == 0);
17755   match(Set dst (MulF dst con));
17756   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17757   ins_cost(150);
17758   ins_encode %{
17759     __ mulss($dst$$XMMRegister, $constantaddress($con));
17760   %}
17761   ins_pipe(pipe_slow);
17762 %}
17763 
17764 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17765   predicate(UseAVX > 0);
17766   match(Set dst (MulF src1 src2));
17767 
17768   format %{ "vmulss  $dst, $src1, $src2" %}
17769   ins_cost(150);
17770   ins_encode %{
17771     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17772   %}
17773   ins_pipe(pipe_slow);
17774 %}
17775 
17776 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17777   predicate(UseAVX > 0);
17778   match(Set dst (MulF src1 (LoadF src2)));
17779 
17780   format %{ "vmulss  $dst, $src1, $src2" %}
17781   ins_cost(150);
17782   ins_encode %{
17783     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17784   %}
17785   ins_pipe(pipe_slow);
17786 %}
17787 
17788 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17789   predicate(UseAVX > 0);
17790   match(Set dst (MulF src con));
17791 
17792   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17793   ins_cost(150);
17794   ins_encode %{
17795     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17796   %}
17797   ins_pipe(pipe_slow);
17798 %}
17799 
17800 instruct mulD_reg(regD dst, regD src) %{
17801   predicate(UseAVX == 0);
17802   match(Set dst (MulD dst src));
17803 
17804   format %{ "mulsd   $dst, $src" %}
17805   ins_cost(150);
17806   ins_encode %{
17807     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17808   %}
17809   ins_pipe(pipe_slow);
17810 %}
17811 
17812 instruct mulD_mem(regD dst, memory src) %{
17813   predicate(UseAVX == 0);
17814   match(Set dst (MulD dst (LoadD src)));
17815 
17816   format %{ "mulsd   $dst, $src" %}
17817   ins_cost(150);
17818   ins_encode %{
17819     __ mulsd($dst$$XMMRegister, $src$$Address);
17820   %}
17821   ins_pipe(pipe_slow);
17822 %}
17823 
17824 instruct mulD_imm(regD dst, immD con) %{
17825   predicate(UseAVX == 0);
17826   match(Set dst (MulD dst con));
17827   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17828   ins_cost(150);
17829   ins_encode %{
17830     __ mulsd($dst$$XMMRegister, $constantaddress($con));
17831   %}
17832   ins_pipe(pipe_slow);
17833 %}
17834 
17835 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17836   predicate(UseAVX > 0);
17837   match(Set dst (MulD src1 src2));
17838 
17839   format %{ "vmulsd  $dst, $src1, $src2" %}
17840   ins_cost(150);
17841   ins_encode %{
17842     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17843   %}
17844   ins_pipe(pipe_slow);
17845 %}
17846 
17847 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17848   predicate(UseAVX > 0);
17849   match(Set dst (MulD src1 (LoadD src2)));
17850 
17851   format %{ "vmulsd  $dst, $src1, $src2" %}
17852   ins_cost(150);
17853   ins_encode %{
17854     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17855   %}
17856   ins_pipe(pipe_slow);
17857 %}
17858 
17859 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17860   predicate(UseAVX > 0);
17861   match(Set dst (MulD src con));
17862 
17863   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17864   ins_cost(150);
17865   ins_encode %{
17866     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17867   %}
17868   ins_pipe(pipe_slow);
17869 %}
17870 
17871 instruct divF_reg(regF dst, regF src) %{
17872   predicate(UseAVX == 0);
17873   match(Set dst (DivF dst src));
17874 
17875   format %{ "divss   $dst, $src" %}
17876   ins_cost(150);
17877   ins_encode %{
17878     __ divss($dst$$XMMRegister, $src$$XMMRegister);
17879   %}
17880   ins_pipe(pipe_slow);
17881 %}
17882 
17883 instruct divF_mem(regF dst, memory src) %{
17884   predicate(UseAVX == 0);
17885   match(Set dst (DivF dst (LoadF src)));
17886 
17887   format %{ "divss   $dst, $src" %}
17888   ins_cost(150);
17889   ins_encode %{
17890     __ divss($dst$$XMMRegister, $src$$Address);
17891   %}
17892   ins_pipe(pipe_slow);
17893 %}
17894 
17895 instruct divF_imm(regF dst, immF con) %{
17896   predicate(UseAVX == 0);
17897   match(Set dst (DivF dst con));
17898   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17899   ins_cost(150);
17900   ins_encode %{
17901     __ divss($dst$$XMMRegister, $constantaddress($con));
17902   %}
17903   ins_pipe(pipe_slow);
17904 %}
17905 
17906 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17907   predicate(UseAVX > 0);
17908   match(Set dst (DivF src1 src2));
17909 
17910   format %{ "vdivss  $dst, $src1, $src2" %}
17911   ins_cost(150);
17912   ins_encode %{
17913     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17914   %}
17915   ins_pipe(pipe_slow);
17916 %}
17917 
17918 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17919   predicate(UseAVX > 0);
17920   match(Set dst (DivF src1 (LoadF src2)));
17921 
17922   format %{ "vdivss  $dst, $src1, $src2" %}
17923   ins_cost(150);
17924   ins_encode %{
17925     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17926   %}
17927   ins_pipe(pipe_slow);
17928 %}
17929 
17930 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17931   predicate(UseAVX > 0);
17932   match(Set dst (DivF src con));
17933 
17934   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17935   ins_cost(150);
17936   ins_encode %{
17937     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17938   %}
17939   ins_pipe(pipe_slow);
17940 %}
17941 
17942 instruct divD_reg(regD dst, regD src) %{
17943   predicate(UseAVX == 0);
17944   match(Set dst (DivD dst src));
17945 
17946   format %{ "divsd   $dst, $src" %}
17947   ins_cost(150);
17948   ins_encode %{
17949     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17950   %}
17951   ins_pipe(pipe_slow);
17952 %}
17953 
17954 instruct divD_mem(regD dst, memory src) %{
17955   predicate(UseAVX == 0);
17956   match(Set dst (DivD dst (LoadD src)));
17957 
17958   format %{ "divsd   $dst, $src" %}
17959   ins_cost(150);
17960   ins_encode %{
17961     __ divsd($dst$$XMMRegister, $src$$Address);
17962   %}
17963   ins_pipe(pipe_slow);
17964 %}
17965 
17966 instruct divD_imm(regD dst, immD con) %{
17967   predicate(UseAVX == 0);
17968   match(Set dst (DivD dst con));
17969   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17970   ins_cost(150);
17971   ins_encode %{
17972     __ divsd($dst$$XMMRegister, $constantaddress($con));
17973   %}
17974   ins_pipe(pipe_slow);
17975 %}
17976 
17977 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17978   predicate(UseAVX > 0);
17979   match(Set dst (DivD src1 src2));
17980 
17981   format %{ "vdivsd  $dst, $src1, $src2" %}
17982   ins_cost(150);
17983   ins_encode %{
17984     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17985   %}
17986   ins_pipe(pipe_slow);
17987 %}
17988 
17989 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17990   predicate(UseAVX > 0);
17991   match(Set dst (DivD src1 (LoadD src2)));
17992 
17993   format %{ "vdivsd  $dst, $src1, $src2" %}
17994   ins_cost(150);
17995   ins_encode %{
17996     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17997   %}
17998   ins_pipe(pipe_slow);
17999 %}
18000 
18001 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18002   predicate(UseAVX > 0);
18003   match(Set dst (DivD src con));
18004 
18005   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18006   ins_cost(150);
18007   ins_encode %{
18008     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18009   %}
18010   ins_pipe(pipe_slow);
18011 %}
18012 
18013 instruct absF_reg(regF dst) %{
18014   predicate(UseAVX == 0);
18015   match(Set dst (AbsF dst));
18016   ins_cost(150);
18017   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
18018   ins_encode %{
18019     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18020   %}
18021   ins_pipe(pipe_slow);
18022 %}
18023 
18024 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18025   predicate(UseAVX > 0);
18026   match(Set dst (AbsF src));
18027   ins_cost(150);
18028   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18029   ins_encode %{
18030     int vlen_enc = Assembler::AVX_128bit;
18031     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18032               ExternalAddress(float_signmask()), vlen_enc);
18033   %}
18034   ins_pipe(pipe_slow);
18035 %}
18036 
18037 instruct absD_reg(regD dst) %{
18038   predicate(UseAVX == 0);
18039   match(Set dst (AbsD dst));
18040   ins_cost(150);
18041   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
18042             "# abs double by sign masking" %}
18043   ins_encode %{
18044     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18045   %}
18046   ins_pipe(pipe_slow);
18047 %}
18048 
18049 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18050   predicate(UseAVX > 0);
18051   match(Set dst (AbsD src));
18052   ins_cost(150);
18053   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
18054             "# abs double by sign masking" %}
18055   ins_encode %{
18056     int vlen_enc = Assembler::AVX_128bit;
18057     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18058               ExternalAddress(double_signmask()), vlen_enc);
18059   %}
18060   ins_pipe(pipe_slow);
18061 %}
18062 
18063 instruct negF_reg(regF dst) %{
18064   predicate(UseAVX == 0);
18065   match(Set dst (NegF dst));
18066   ins_cost(150);
18067   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
18068   ins_encode %{
18069     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18070   %}
18071   ins_pipe(pipe_slow);
18072 %}
18073 
18074 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18075   predicate(UseAVX > 0);
18076   match(Set dst (NegF src));
18077   ins_cost(150);
18078   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18079   ins_encode %{
18080     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18081                  ExternalAddress(float_signflip()));
18082   %}
18083   ins_pipe(pipe_slow);
18084 %}
18085 
18086 instruct negD_reg(regD dst) %{
18087   predicate(UseAVX == 0);
18088   match(Set dst (NegD dst));
18089   ins_cost(150);
18090   format %{ "xorpd   $dst, [0x8000000000000000]\t"
18091             "# neg double by sign flipping" %}
18092   ins_encode %{
18093     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18094   %}
18095   ins_pipe(pipe_slow);
18096 %}
18097 
18098 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18099   predicate(UseAVX > 0);
18100   match(Set dst (NegD src));
18101   ins_cost(150);
18102   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
18103             "# neg double by sign flipping" %}
18104   ins_encode %{
18105     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18106                  ExternalAddress(double_signflip()));
18107   %}
18108   ins_pipe(pipe_slow);
18109 %}
18110 
18111 // sqrtss instruction needs destination register to be pre initialized for best performance
18112 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18113 instruct sqrtF_reg(regF dst) %{
18114   match(Set dst (SqrtF dst));
18115   format %{ "sqrtss  $dst, $dst" %}
18116   ins_encode %{
18117     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18118   %}
18119   ins_pipe(pipe_slow);
18120 %}
18121 
18122 // sqrtsd instruction needs destination register to be pre initialized for best performance
18123 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18124 instruct sqrtD_reg(regD dst) %{
18125   match(Set dst (SqrtD dst));
18126   format %{ "sqrtsd  $dst, $dst" %}
18127   ins_encode %{
18128     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18129   %}
18130   ins_pipe(pipe_slow);
18131 %}
18132 
18133 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18134   effect(TEMP tmp);
18135   match(Set dst (ConvF2HF src));
18136   ins_cost(125);
18137   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18138   ins_encode %{
18139     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18140   %}
18141   ins_pipe( pipe_slow );
18142 %}
18143 
18144 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18145   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18146   effect(TEMP ktmp, TEMP rtmp);
18147   match(Set mem (StoreC mem (ConvF2HF src)));
18148   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18149   ins_encode %{
18150     __ movl($rtmp$$Register, 0x1);
18151     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18152     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18153   %}
18154   ins_pipe( pipe_slow );
18155 %}
18156 
18157 instruct vconvF2HF(vec dst, vec src) %{
18158   match(Set dst (VectorCastF2HF src));
18159   format %{ "vector_conv_F2HF $dst $src" %}
18160   ins_encode %{
18161     int vlen_enc = vector_length_encoding(this, $src);
18162     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18163   %}
18164   ins_pipe( pipe_slow );
18165 %}
18166 
18167 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18168   predicate(n->as_StoreVector()->memory_size() >= 16);
18169   match(Set mem (StoreVector mem (VectorCastF2HF src)));
18170   format %{ "vcvtps2ph $mem,$src" %}
18171   ins_encode %{
18172     int vlen_enc = vector_length_encoding(this, $src);
18173     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18174   %}
18175   ins_pipe( pipe_slow );
18176 %}
18177 
18178 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18179   match(Set dst (ConvHF2F src));
18180   format %{ "vcvtph2ps $dst,$src" %}
18181   ins_encode %{
18182     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18183   %}
18184   ins_pipe( pipe_slow );
18185 %}
18186 
18187 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18188   match(Set dst (VectorCastHF2F (LoadVector mem)));
18189   format %{ "vcvtph2ps $dst,$mem" %}
18190   ins_encode %{
18191     int vlen_enc = vector_length_encoding(this);
18192     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18193   %}
18194   ins_pipe( pipe_slow );
18195 %}
18196 
18197 instruct vconvHF2F(vec dst, vec src) %{
18198   match(Set dst (VectorCastHF2F src));
18199   ins_cost(125);
18200   format %{ "vector_conv_HF2F $dst,$src" %}
18201   ins_encode %{
18202     int vlen_enc = vector_length_encoding(this);
18203     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18204   %}
18205   ins_pipe( pipe_slow );
18206 %}
18207 
18208 // ---------------------------------------- VectorReinterpret ------------------------------------
18209 instruct reinterpret_mask(kReg dst) %{
18210   predicate(n->bottom_type()->isa_vectmask() &&
18211             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18212   match(Set dst (VectorReinterpret dst));
18213   ins_cost(125);
18214   format %{ "vector_reinterpret $dst\t!" %}
18215   ins_encode %{
18216     // empty
18217   %}
18218   ins_pipe( pipe_slow );
18219 %}
18220 
18221 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18222   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18223             n->bottom_type()->isa_vectmask() &&
18224             n->in(1)->bottom_type()->isa_vectmask() &&
18225             n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18226             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18227   match(Set dst (VectorReinterpret src));
18228   effect(TEMP xtmp);
18229   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18230   ins_encode %{
18231      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18232      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18233      assert(src_sz == dst_sz , "src and dst size mismatch");
18234      int vlen_enc = vector_length_encoding(src_sz);
18235      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18236      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18237   %}
18238   ins_pipe( pipe_slow );
18239 %}
18240 
18241 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18242   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18243             n->bottom_type()->isa_vectmask() &&
18244             n->in(1)->bottom_type()->isa_vectmask() &&
18245             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18246              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18247             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18248   match(Set dst (VectorReinterpret src));
18249   effect(TEMP xtmp);
18250   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18251   ins_encode %{
18252      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18253      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18254      assert(src_sz == dst_sz , "src and dst size mismatch");
18255      int vlen_enc = vector_length_encoding(src_sz);
18256      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18257      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18258   %}
18259   ins_pipe( pipe_slow );
18260 %}
18261 
18262 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18263   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18264             n->bottom_type()->isa_vectmask() &&
18265             n->in(1)->bottom_type()->isa_vectmask() &&
18266             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18267              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18268             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18269   match(Set dst (VectorReinterpret src));
18270   effect(TEMP xtmp);
18271   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18272   ins_encode %{
18273      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18274      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18275      assert(src_sz == dst_sz , "src and dst size mismatch");
18276      int vlen_enc = vector_length_encoding(src_sz);
18277      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18278      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18279   %}
18280   ins_pipe( pipe_slow );
18281 %}
18282 
18283 instruct reinterpret(vec dst) %{
18284   predicate(!n->bottom_type()->isa_vectmask() &&
18285             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18286   match(Set dst (VectorReinterpret dst));
18287   ins_cost(125);
18288   format %{ "vector_reinterpret $dst\t!" %}
18289   ins_encode %{
18290     // empty
18291   %}
18292   ins_pipe( pipe_slow );
18293 %}
18294 
18295 instruct reinterpret_expand(vec dst, vec src) %{
18296   predicate(UseAVX == 0 &&
18297             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18298   match(Set dst (VectorReinterpret src));
18299   ins_cost(125);
18300   effect(TEMP dst);
18301   format %{ "vector_reinterpret_expand $dst,$src" %}
18302   ins_encode %{
18303     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
18304     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
18305 
18306     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18307     if (src_vlen_in_bytes == 4) {
18308       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18309     } else {
18310       assert(src_vlen_in_bytes == 8, "");
18311       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18312     }
18313     __ pand($dst$$XMMRegister, $src$$XMMRegister);
18314   %}
18315   ins_pipe( pipe_slow );
18316 %}
18317 
18318 instruct vreinterpret_expand4(legVec dst, vec src) %{
18319   predicate(UseAVX > 0 &&
18320             !n->bottom_type()->isa_vectmask() &&
18321             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18322             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18323   match(Set dst (VectorReinterpret src));
18324   ins_cost(125);
18325   format %{ "vector_reinterpret_expand $dst,$src" %}
18326   ins_encode %{
18327     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18328   %}
18329   ins_pipe( pipe_slow );
18330 %}
18331 
18332 
18333 instruct vreinterpret_expand(legVec dst, vec src) %{
18334   predicate(UseAVX > 0 &&
18335             !n->bottom_type()->isa_vectmask() &&
18336             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18337             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18338   match(Set dst (VectorReinterpret src));
18339   ins_cost(125);
18340   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18341   ins_encode %{
18342     switch (Matcher::vector_length_in_bytes(this, $src)) {
18343       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18344       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18345       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18346       default: ShouldNotReachHere();
18347     }
18348   %}
18349   ins_pipe( pipe_slow );
18350 %}
18351 
18352 instruct reinterpret_shrink(vec dst, legVec src) %{
18353   predicate(!n->bottom_type()->isa_vectmask() &&
18354             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18355   match(Set dst (VectorReinterpret src));
18356   ins_cost(125);
18357   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18358   ins_encode %{
18359     switch (Matcher::vector_length_in_bytes(this)) {
18360       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18361       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18362       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18363       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18364       default: ShouldNotReachHere();
18365     }
18366   %}
18367   ins_pipe( pipe_slow );
18368 %}
18369 
18370 // ----------------------------------------------------------------------------------------------------
18371 
18372 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18373   match(Set dst (RoundDoubleMode src rmode));
18374   format %{ "roundsd $dst,$src" %}
18375   ins_cost(150);
18376   ins_encode %{
18377     assert(UseSSE >= 4, "required");
18378     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18379       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18380     }
18381     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18382   %}
18383   ins_pipe(pipe_slow);
18384 %}
18385 
18386 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18387   match(Set dst (RoundDoubleMode con rmode));
18388   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18389   ins_cost(150);
18390   ins_encode %{
18391     assert(UseSSE >= 4, "required");
18392     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18393   %}
18394   ins_pipe(pipe_slow);
18395 %}
18396 
18397 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18398   predicate(Matcher::vector_length(n) < 8);
18399   match(Set dst (RoundDoubleModeV src rmode));
18400   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18401   ins_encode %{
18402     assert(UseAVX > 0, "required");
18403     int vlen_enc = vector_length_encoding(this);
18404     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18405   %}
18406   ins_pipe( pipe_slow );
18407 %}
18408 
18409 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18410   predicate(Matcher::vector_length(n) == 8);
18411   match(Set dst (RoundDoubleModeV src rmode));
18412   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18413   ins_encode %{
18414     assert(UseAVX > 2, "required");
18415     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18416   %}
18417   ins_pipe( pipe_slow );
18418 %}
18419 
18420 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18421   predicate(Matcher::vector_length(n) < 8);
18422   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18423   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18424   ins_encode %{
18425     assert(UseAVX > 0, "required");
18426     int vlen_enc = vector_length_encoding(this);
18427     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18428   %}
18429   ins_pipe( pipe_slow );
18430 %}
18431 
18432 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18433   predicate(Matcher::vector_length(n) == 8);
18434   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18435   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18436   ins_encode %{
18437     assert(UseAVX > 2, "required");
18438     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18439   %}
18440   ins_pipe( pipe_slow );
18441 %}
18442 
18443 instruct onspinwait() %{
18444   match(OnSpinWait);
18445   ins_cost(200);
18446 
18447   format %{
18448     $$template
18449     $$emit$$"pause\t! membar_onspinwait"
18450   %}
18451   ins_encode %{
18452     __ pause();
18453   %}
18454   ins_pipe(pipe_slow);
18455 %}
18456 
18457 // a * b + c
18458 instruct fmaD_reg(regD a, regD b, regD c) %{
18459   match(Set c (FmaD  c (Binary a b)));
18460   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18461   ins_cost(150);
18462   ins_encode %{
18463     assert(UseFMA, "Needs FMA instructions support.");
18464     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18465   %}
18466   ins_pipe( pipe_slow );
18467 %}
18468 
18469 // a * b + c
18470 instruct fmaF_reg(regF a, regF b, regF c) %{
18471   match(Set c (FmaF  c (Binary a b)));
18472   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18473   ins_cost(150);
18474   ins_encode %{
18475     assert(UseFMA, "Needs FMA instructions support.");
18476     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18477   %}
18478   ins_pipe( pipe_slow );
18479 %}
18480 
18481 // ====================VECTOR INSTRUCTIONS=====================================
18482 
18483 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18484 instruct MoveVec2Leg(legVec dst, vec src) %{
18485   match(Set dst src);
18486   format %{ "" %}
18487   ins_encode %{
18488     ShouldNotReachHere();
18489   %}
18490   ins_pipe( fpu_reg_reg );
18491 %}
18492 
18493 instruct MoveLeg2Vec(vec dst, legVec src) %{
18494   match(Set dst src);
18495   format %{ "" %}
18496   ins_encode %{
18497     ShouldNotReachHere();
18498   %}
18499   ins_pipe( fpu_reg_reg );
18500 %}
18501 
18502 // ============================================================================
18503 
18504 // Load vectors generic operand pattern
18505 instruct loadV(vec dst, memory mem) %{
18506   match(Set dst (LoadVector mem));
18507   ins_cost(125);
18508   format %{ "load_vector $dst,$mem" %}
18509   ins_encode %{
18510     BasicType bt = Matcher::vector_element_basic_type(this);
18511     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18512   %}
18513   ins_pipe( pipe_slow );
18514 %}
18515 
18516 // Store vectors generic operand pattern.
18517 instruct storeV(memory mem, vec src) %{
18518   match(Set mem (StoreVector mem src));
18519   ins_cost(145);
18520   format %{ "store_vector $mem,$src\n\t" %}
18521   ins_encode %{
18522     switch (Matcher::vector_length_in_bytes(this, $src)) {
18523       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
18524       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
18525       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
18526       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
18527       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18528       default: ShouldNotReachHere();
18529     }
18530   %}
18531   ins_pipe( pipe_slow );
18532 %}
18533 
18534 // ---------------------------------------- Gather ------------------------------------
18535 
18536 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18537 
18538 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18539   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18540             Matcher::vector_length_in_bytes(n) <= 32);
18541   match(Set dst (LoadVectorGather mem idx));
18542   effect(TEMP dst, TEMP tmp, TEMP mask);
18543   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18544   ins_encode %{
18545     int vlen_enc = vector_length_encoding(this);
18546     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18547     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18548     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18549     __ lea($tmp$$Register, $mem$$Address);
18550     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18551   %}
18552   ins_pipe( pipe_slow );
18553 %}
18554 
18555 
18556 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18557   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18558             !is_subword_type(Matcher::vector_element_basic_type(n)));
18559   match(Set dst (LoadVectorGather mem idx));
18560   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18561   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18562   ins_encode %{
18563     int vlen_enc = vector_length_encoding(this);
18564     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18565     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18566     __ lea($tmp$$Register, $mem$$Address);
18567     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18568   %}
18569   ins_pipe( pipe_slow );
18570 %}
18571 
18572 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18573   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18574             !is_subword_type(Matcher::vector_element_basic_type(n)));
18575   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18576   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18577   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18578   ins_encode %{
18579     assert(UseAVX > 2, "sanity");
18580     int vlen_enc = vector_length_encoding(this);
18581     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18582     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18583     // Note: Since gather instruction partially updates the opmask register used
18584     // for predication hense moving mask operand to a temporary.
18585     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18586     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18587     __ lea($tmp$$Register, $mem$$Address);
18588     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18589   %}
18590   ins_pipe( pipe_slow );
18591 %}
18592 
18593 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18594   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18595   match(Set dst (LoadVectorGather mem idx_base));
18596   effect(TEMP tmp, TEMP rtmp);
18597   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18598   ins_encode %{
18599     int vlen_enc = vector_length_encoding(this);
18600     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18601     __ lea($tmp$$Register, $mem$$Address);
18602     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18603   %}
18604   ins_pipe( pipe_slow );
18605 %}
18606 
18607 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18608                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18609   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18610   match(Set dst (LoadVectorGather mem idx_base));
18611   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18612   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18613   ins_encode %{
18614     int vlen_enc = vector_length_encoding(this);
18615     int vector_len = Matcher::vector_length(this);
18616     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18617     __ lea($tmp$$Register, $mem$$Address);
18618     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18619     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18620                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18621   %}
18622   ins_pipe( pipe_slow );
18623 %}
18624 
18625 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18626   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18627   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18628   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18629   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18630   ins_encode %{
18631     int vlen_enc = vector_length_encoding(this);
18632     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18633     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18634     __ lea($tmp$$Register, $mem$$Address);
18635     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18636     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18637   %}
18638   ins_pipe( pipe_slow );
18639 %}
18640 
18641 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18642                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18643   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18644   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18645   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18646   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18647   ins_encode %{
18648     int vlen_enc = vector_length_encoding(this);
18649     int vector_len = Matcher::vector_length(this);
18650     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18651     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18652     __ lea($tmp$$Register, $mem$$Address);
18653     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18654     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18655     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18656                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18657   %}
18658   ins_pipe( pipe_slow );
18659 %}
18660 
18661 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18662   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18663   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18664   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18665   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18666   ins_encode %{
18667     int vlen_enc = vector_length_encoding(this);
18668     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18669     __ lea($tmp$$Register, $mem$$Address);
18670     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18671     if (elem_bt == T_SHORT) {
18672       __ movl($mask_idx$$Register, 0x55555555);
18673       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18674     }
18675     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18676     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18677   %}
18678   ins_pipe( pipe_slow );
18679 %}
18680 
18681 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18682                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18683   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18684   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18685   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18686   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18687   ins_encode %{
18688     int vlen_enc = vector_length_encoding(this);
18689     int vector_len = Matcher::vector_length(this);
18690     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18691     __ lea($tmp$$Register, $mem$$Address);
18692     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18693     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18694     if (elem_bt == T_SHORT) {
18695       __ movl($mask_idx$$Register, 0x55555555);
18696       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18697     }
18698     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18699     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18700                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18701   %}
18702   ins_pipe( pipe_slow );
18703 %}
18704 
18705 // ====================Scatter=======================================
18706 
18707 // Scatter INT, LONG, FLOAT, DOUBLE
18708 
18709 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18710   predicate(UseAVX > 2);
18711   match(Set mem (StoreVectorScatter mem (Binary src idx)));
18712   effect(TEMP tmp, TEMP ktmp);
18713   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18714   ins_encode %{
18715     int vlen_enc = vector_length_encoding(this, $src);
18716     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18717 
18718     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18719     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18720 
18721     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18722     __ lea($tmp$$Register, $mem$$Address);
18723     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18724   %}
18725   ins_pipe( pipe_slow );
18726 %}
18727 
18728 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18729   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18730   effect(TEMP tmp, TEMP ktmp);
18731   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18732   ins_encode %{
18733     int vlen_enc = vector_length_encoding(this, $src);
18734     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18735     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18736     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18737     // Note: Since scatter instruction partially updates the opmask register used
18738     // for predication hense moving mask operand to a temporary.
18739     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18740     __ lea($tmp$$Register, $mem$$Address);
18741     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18742   %}
18743   ins_pipe( pipe_slow );
18744 %}
18745 
18746 // ====================REPLICATE=======================================
18747 
18748 // Replicate byte scalar to be vector
18749 instruct vReplB_reg(vec dst, rRegI src) %{
18750   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18751   match(Set dst (Replicate src));
18752   format %{ "replicateB $dst,$src" %}
18753   ins_encode %{
18754     uint vlen = Matcher::vector_length(this);
18755     if (UseAVX >= 2) {
18756       int vlen_enc = vector_length_encoding(this);
18757       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18758         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18759         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18760       } else {
18761         __ movdl($dst$$XMMRegister, $src$$Register);
18762         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18763       }
18764     } else {
18765        assert(UseAVX < 2, "");
18766       __ movdl($dst$$XMMRegister, $src$$Register);
18767       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18768       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18769       if (vlen >= 16) {
18770         assert(vlen == 16, "");
18771         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18772       }
18773     }
18774   %}
18775   ins_pipe( pipe_slow );
18776 %}
18777 
18778 instruct ReplB_mem(vec dst, memory mem) %{
18779   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18780   match(Set dst (Replicate (LoadB mem)));
18781   format %{ "replicateB $dst,$mem" %}
18782   ins_encode %{
18783     int vlen_enc = vector_length_encoding(this);
18784     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18785   %}
18786   ins_pipe( pipe_slow );
18787 %}
18788 
18789 // ====================ReplicateS=======================================
18790 
18791 instruct vReplS_reg(vec dst, rRegI src) %{
18792   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18793   match(Set dst (Replicate src));
18794   format %{ "replicateS $dst,$src" %}
18795   ins_encode %{
18796     uint vlen = Matcher::vector_length(this);
18797     int vlen_enc = vector_length_encoding(this);
18798     if (UseAVX >= 2) {
18799       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18800         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18801         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18802       } else {
18803         __ movdl($dst$$XMMRegister, $src$$Register);
18804         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18805       }
18806     } else {
18807       assert(UseAVX < 2, "");
18808       __ movdl($dst$$XMMRegister, $src$$Register);
18809       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18810       if (vlen >= 8) {
18811         assert(vlen == 8, "");
18812         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18813       }
18814     }
18815   %}
18816   ins_pipe( pipe_slow );
18817 %}
18818 
18819 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18820   match(Set dst (Replicate con));
18821   effect(TEMP rtmp);
18822   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18823   ins_encode %{
18824     int vlen_enc = vector_length_encoding(this);
18825     BasicType bt = Matcher::vector_element_basic_type(this);
18826     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18827     __ movl($rtmp$$Register, $con$$constant);
18828     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18829   %}
18830   ins_pipe( pipe_slow );
18831 %}
18832 
18833 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18834   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18835   match(Set dst (Replicate src));
18836   effect(TEMP rtmp);
18837   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18838   ins_encode %{
18839     int vlen_enc = vector_length_encoding(this);
18840     __ vmovw($rtmp$$Register, $src$$XMMRegister);
18841     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18842   %}
18843   ins_pipe( pipe_slow );
18844 %}
18845 
18846 instruct ReplS_mem(vec dst, memory mem) %{
18847   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18848   match(Set dst (Replicate (LoadS mem)));
18849   format %{ "replicateS $dst,$mem" %}
18850   ins_encode %{
18851     int vlen_enc = vector_length_encoding(this);
18852     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18853   %}
18854   ins_pipe( pipe_slow );
18855 %}
18856 
18857 // ====================ReplicateI=======================================
18858 
18859 instruct ReplI_reg(vec dst, rRegI src) %{
18860   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18861   match(Set dst (Replicate src));
18862   format %{ "replicateI $dst,$src" %}
18863   ins_encode %{
18864     uint vlen = Matcher::vector_length(this);
18865     int vlen_enc = vector_length_encoding(this);
18866     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18867       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18868     } else if (VM_Version::supports_avx2()) {
18869       __ movdl($dst$$XMMRegister, $src$$Register);
18870       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18871     } else {
18872       __ movdl($dst$$XMMRegister, $src$$Register);
18873       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18874     }
18875   %}
18876   ins_pipe( pipe_slow );
18877 %}
18878 
18879 instruct ReplI_mem(vec dst, memory mem) %{
18880   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18881   match(Set dst (Replicate (LoadI mem)));
18882   format %{ "replicateI $dst,$mem" %}
18883   ins_encode %{
18884     int vlen_enc = vector_length_encoding(this);
18885     if (VM_Version::supports_avx2()) {
18886       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18887     } else if (VM_Version::supports_avx()) {
18888       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18889     } else {
18890       __ movdl($dst$$XMMRegister, $mem$$Address);
18891       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18892     }
18893   %}
18894   ins_pipe( pipe_slow );
18895 %}
18896 
18897 instruct ReplI_imm(vec dst, immI con) %{
18898   predicate(Matcher::is_non_long_integral_vector(n));
18899   match(Set dst (Replicate con));
18900   format %{ "replicateI $dst,$con" %}
18901   ins_encode %{
18902     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18903                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18904                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
18905     BasicType bt = Matcher::vector_element_basic_type(this);
18906     int vlen = Matcher::vector_length_in_bytes(this);
18907     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18908   %}
18909   ins_pipe( pipe_slow );
18910 %}
18911 
18912 // Replicate scalar zero to be vector
18913 instruct ReplI_zero(vec dst, immI_0 zero) %{
18914   predicate(Matcher::is_non_long_integral_vector(n));
18915   match(Set dst (Replicate zero));
18916   format %{ "replicateI $dst,$zero" %}
18917   ins_encode %{
18918     int vlen_enc = vector_length_encoding(this);
18919     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18920       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18921     } else {
18922       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18923     }
18924   %}
18925   ins_pipe( fpu_reg_reg );
18926 %}
18927 
18928 instruct ReplI_M1(vec dst, immI_M1 con) %{
18929   predicate(Matcher::is_non_long_integral_vector(n));
18930   match(Set dst (Replicate con));
18931   format %{ "vallones $dst" %}
18932   ins_encode %{
18933     int vector_len = vector_length_encoding(this);
18934     __ vallones($dst$$XMMRegister, vector_len);
18935   %}
18936   ins_pipe( pipe_slow );
18937 %}
18938 
18939 // ====================ReplicateL=======================================
18940 
18941 // Replicate long (8 byte) scalar to be vector
18942 instruct ReplL_reg(vec dst, rRegL src) %{
18943   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18944   match(Set dst (Replicate src));
18945   format %{ "replicateL $dst,$src" %}
18946   ins_encode %{
18947     int vlen = Matcher::vector_length(this);
18948     int vlen_enc = vector_length_encoding(this);
18949     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18950       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18951     } else if (VM_Version::supports_avx2()) {
18952       __ movdq($dst$$XMMRegister, $src$$Register);
18953       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18954     } else {
18955       __ movdq($dst$$XMMRegister, $src$$Register);
18956       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18957     }
18958   %}
18959   ins_pipe( pipe_slow );
18960 %}
18961 
18962 instruct ReplL_mem(vec dst, memory mem) %{
18963   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18964   match(Set dst (Replicate (LoadL mem)));
18965   format %{ "replicateL $dst,$mem" %}
18966   ins_encode %{
18967     int vlen_enc = vector_length_encoding(this);
18968     if (VM_Version::supports_avx2()) {
18969       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18970     } else if (VM_Version::supports_sse3()) {
18971       __ movddup($dst$$XMMRegister, $mem$$Address);
18972     } else {
18973       __ movq($dst$$XMMRegister, $mem$$Address);
18974       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18975     }
18976   %}
18977   ins_pipe( pipe_slow );
18978 %}
18979 
18980 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18981 instruct ReplL_imm(vec dst, immL con) %{
18982   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18983   match(Set dst (Replicate con));
18984   format %{ "replicateL $dst,$con" %}
18985   ins_encode %{
18986     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18987     int vlen = Matcher::vector_length_in_bytes(this);
18988     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18989   %}
18990   ins_pipe( pipe_slow );
18991 %}
18992 
18993 instruct ReplL_zero(vec dst, immL0 zero) %{
18994   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18995   match(Set dst (Replicate zero));
18996   format %{ "replicateL $dst,$zero" %}
18997   ins_encode %{
18998     int vlen_enc = vector_length_encoding(this);
18999     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19000       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19001     } else {
19002       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19003     }
19004   %}
19005   ins_pipe( fpu_reg_reg );
19006 %}
19007 
19008 instruct ReplL_M1(vec dst, immL_M1 con) %{
19009   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19010   match(Set dst (Replicate con));
19011   format %{ "vallones $dst" %}
19012   ins_encode %{
19013     int vector_len = vector_length_encoding(this);
19014     __ vallones($dst$$XMMRegister, vector_len);
19015   %}
19016   ins_pipe( pipe_slow );
19017 %}
19018 
19019 // ====================ReplicateF=======================================
19020 
19021 instruct vReplF_reg(vec dst, vlRegF src) %{
19022   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19023   match(Set dst (Replicate src));
19024   format %{ "replicateF $dst,$src" %}
19025   ins_encode %{
19026     uint vlen = Matcher::vector_length(this);
19027     int vlen_enc = vector_length_encoding(this);
19028     if (vlen <= 4) {
19029       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19030     } else if (VM_Version::supports_avx2()) {
19031       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19032     } else {
19033       assert(vlen == 8, "sanity");
19034       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19035       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19036     }
19037   %}
19038   ins_pipe( pipe_slow );
19039 %}
19040 
19041 instruct ReplF_reg(vec dst, vlRegF src) %{
19042   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19043   match(Set dst (Replicate src));
19044   format %{ "replicateF $dst,$src" %}
19045   ins_encode %{
19046     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19047   %}
19048   ins_pipe( pipe_slow );
19049 %}
19050 
19051 instruct ReplF_mem(vec dst, memory mem) %{
19052   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19053   match(Set dst (Replicate (LoadF mem)));
19054   format %{ "replicateF $dst,$mem" %}
19055   ins_encode %{
19056     int vlen_enc = vector_length_encoding(this);
19057     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19058   %}
19059   ins_pipe( pipe_slow );
19060 %}
19061 
19062 // Replicate float scalar immediate to be vector by loading from const table.
19063 instruct ReplF_imm(vec dst, immF con) %{
19064   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19065   match(Set dst (Replicate con));
19066   format %{ "replicateF $dst,$con" %}
19067   ins_encode %{
19068     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19069                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19070     int vlen = Matcher::vector_length_in_bytes(this);
19071     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19072   %}
19073   ins_pipe( pipe_slow );
19074 %}
19075 
19076 instruct ReplF_zero(vec dst, immF0 zero) %{
19077   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19078   match(Set dst (Replicate zero));
19079   format %{ "replicateF $dst,$zero" %}
19080   ins_encode %{
19081     int vlen_enc = vector_length_encoding(this);
19082     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19083       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19084     } else {
19085       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19086     }
19087   %}
19088   ins_pipe( fpu_reg_reg );
19089 %}
19090 
19091 // ====================ReplicateD=======================================
19092 
19093 // Replicate double (8 bytes) scalar to be vector
19094 instruct vReplD_reg(vec dst, vlRegD src) %{
19095   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19096   match(Set dst (Replicate src));
19097   format %{ "replicateD $dst,$src" %}
19098   ins_encode %{
19099     uint vlen = Matcher::vector_length(this);
19100     int vlen_enc = vector_length_encoding(this);
19101     if (vlen <= 2) {
19102       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19103     } else if (VM_Version::supports_avx2()) {
19104       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19105     } else {
19106       assert(vlen == 4, "sanity");
19107       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19108       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19109     }
19110   %}
19111   ins_pipe( pipe_slow );
19112 %}
19113 
19114 instruct ReplD_reg(vec dst, vlRegD src) %{
19115   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19116   match(Set dst (Replicate src));
19117   format %{ "replicateD $dst,$src" %}
19118   ins_encode %{
19119     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19120   %}
19121   ins_pipe( pipe_slow );
19122 %}
19123 
19124 instruct ReplD_mem(vec dst, memory mem) %{
19125   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19126   match(Set dst (Replicate (LoadD mem)));
19127   format %{ "replicateD $dst,$mem" %}
19128   ins_encode %{
19129     if (Matcher::vector_length(this) >= 4) {
19130       int vlen_enc = vector_length_encoding(this);
19131       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19132     } else {
19133       __ movddup($dst$$XMMRegister, $mem$$Address);
19134     }
19135   %}
19136   ins_pipe( pipe_slow );
19137 %}
19138 
19139 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19140 instruct ReplD_imm(vec dst, immD con) %{
19141   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19142   match(Set dst (Replicate con));
19143   format %{ "replicateD $dst,$con" %}
19144   ins_encode %{
19145     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19146     int vlen = Matcher::vector_length_in_bytes(this);
19147     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19148   %}
19149   ins_pipe( pipe_slow );
19150 %}
19151 
19152 instruct ReplD_zero(vec dst, immD0 zero) %{
19153   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19154   match(Set dst (Replicate zero));
19155   format %{ "replicateD $dst,$zero" %}
19156   ins_encode %{
19157     int vlen_enc = vector_length_encoding(this);
19158     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19159       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19160     } else {
19161       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19162     }
19163   %}
19164   ins_pipe( fpu_reg_reg );
19165 %}
19166 
19167 // ====================VECTOR INSERT=======================================
19168 
19169 instruct insert(vec dst, rRegI val, immU8 idx) %{
19170   predicate(Matcher::vector_length_in_bytes(n) < 32);
19171   match(Set dst (VectorInsert (Binary dst val) idx));
19172   format %{ "vector_insert $dst,$val,$idx" %}
19173   ins_encode %{
19174     assert(UseSSE >= 4, "required");
19175     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19176 
19177     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19178 
19179     assert(is_integral_type(elem_bt), "");
19180     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19181 
19182     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19183   %}
19184   ins_pipe( pipe_slow );
19185 %}
19186 
19187 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19188   predicate(Matcher::vector_length_in_bytes(n) == 32);
19189   match(Set dst (VectorInsert (Binary src val) idx));
19190   effect(TEMP vtmp);
19191   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19192   ins_encode %{
19193     int vlen_enc = Assembler::AVX_256bit;
19194     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19195     int elem_per_lane = 16/type2aelembytes(elem_bt);
19196     int log2epr = log2(elem_per_lane);
19197 
19198     assert(is_integral_type(elem_bt), "sanity");
19199     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19200 
19201     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19202     uint y_idx = ($idx$$constant >> log2epr) & 1;
19203     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19204     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19205     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19206   %}
19207   ins_pipe( pipe_slow );
19208 %}
19209 
19210 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19211   predicate(Matcher::vector_length_in_bytes(n) == 64);
19212   match(Set dst (VectorInsert (Binary src val) idx));
19213   effect(TEMP vtmp);
19214   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19215   ins_encode %{
19216     assert(UseAVX > 2, "sanity");
19217 
19218     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19219     int elem_per_lane = 16/type2aelembytes(elem_bt);
19220     int log2epr = log2(elem_per_lane);
19221 
19222     assert(is_integral_type(elem_bt), "");
19223     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19224 
19225     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19226     uint y_idx = ($idx$$constant >> log2epr) & 3;
19227     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19228     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19229     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19230   %}
19231   ins_pipe( pipe_slow );
19232 %}
19233 
19234 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19235   predicate(Matcher::vector_length(n) == 2);
19236   match(Set dst (VectorInsert (Binary dst val) idx));
19237   format %{ "vector_insert $dst,$val,$idx" %}
19238   ins_encode %{
19239     assert(UseSSE >= 4, "required");
19240     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19241     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19242 
19243     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19244   %}
19245   ins_pipe( pipe_slow );
19246 %}
19247 
19248 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19249   predicate(Matcher::vector_length(n) == 4);
19250   match(Set dst (VectorInsert (Binary src val) idx));
19251   effect(TEMP vtmp);
19252   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19253   ins_encode %{
19254     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19255     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19256 
19257     uint x_idx = $idx$$constant & right_n_bits(1);
19258     uint y_idx = ($idx$$constant >> 1) & 1;
19259     int vlen_enc = Assembler::AVX_256bit;
19260     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19261     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19262     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19263   %}
19264   ins_pipe( pipe_slow );
19265 %}
19266 
19267 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19268   predicate(Matcher::vector_length(n) == 8);
19269   match(Set dst (VectorInsert (Binary src val) idx));
19270   effect(TEMP vtmp);
19271   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19272   ins_encode %{
19273     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19274     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19275 
19276     uint x_idx = $idx$$constant & right_n_bits(1);
19277     uint y_idx = ($idx$$constant >> 1) & 3;
19278     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19279     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19280     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19281   %}
19282   ins_pipe( pipe_slow );
19283 %}
19284 
19285 instruct insertF(vec dst, regF val, immU8 idx) %{
19286   predicate(Matcher::vector_length(n) < 8);
19287   match(Set dst (VectorInsert (Binary dst val) idx));
19288   format %{ "vector_insert $dst,$val,$idx" %}
19289   ins_encode %{
19290     assert(UseSSE >= 4, "sanity");
19291 
19292     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19293     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19294 
19295     uint x_idx = $idx$$constant & right_n_bits(2);
19296     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19297   %}
19298   ins_pipe( pipe_slow );
19299 %}
19300 
19301 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19302   predicate(Matcher::vector_length(n) >= 8);
19303   match(Set dst (VectorInsert (Binary src val) idx));
19304   effect(TEMP vtmp);
19305   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19306   ins_encode %{
19307     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19308     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19309 
19310     int vlen = Matcher::vector_length(this);
19311     uint x_idx = $idx$$constant & right_n_bits(2);
19312     if (vlen == 8) {
19313       uint y_idx = ($idx$$constant >> 2) & 1;
19314       int vlen_enc = Assembler::AVX_256bit;
19315       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19316       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19317       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19318     } else {
19319       assert(vlen == 16, "sanity");
19320       uint y_idx = ($idx$$constant >> 2) & 3;
19321       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19322       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19323       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19324     }
19325   %}
19326   ins_pipe( pipe_slow );
19327 %}
19328 
19329 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19330   predicate(Matcher::vector_length(n) == 2);
19331   match(Set dst (VectorInsert (Binary dst val) idx));
19332   effect(TEMP tmp);
19333   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19334   ins_encode %{
19335     assert(UseSSE >= 4, "sanity");
19336     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19337     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19338 
19339     __ movq($tmp$$Register, $val$$XMMRegister);
19340     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19341   %}
19342   ins_pipe( pipe_slow );
19343 %}
19344 
19345 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19346   predicate(Matcher::vector_length(n) == 4);
19347   match(Set dst (VectorInsert (Binary src val) idx));
19348   effect(TEMP vtmp, TEMP tmp);
19349   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19350   ins_encode %{
19351     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19352     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19353 
19354     uint x_idx = $idx$$constant & right_n_bits(1);
19355     uint y_idx = ($idx$$constant >> 1) & 1;
19356     int vlen_enc = Assembler::AVX_256bit;
19357     __ movq($tmp$$Register, $val$$XMMRegister);
19358     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19359     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19360     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19361   %}
19362   ins_pipe( pipe_slow );
19363 %}
19364 
19365 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19366   predicate(Matcher::vector_length(n) == 8);
19367   match(Set dst (VectorInsert (Binary src val) idx));
19368   effect(TEMP tmp, TEMP vtmp);
19369   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19370   ins_encode %{
19371     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19372     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19373 
19374     uint x_idx = $idx$$constant & right_n_bits(1);
19375     uint y_idx = ($idx$$constant >> 1) & 3;
19376     __ movq($tmp$$Register, $val$$XMMRegister);
19377     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19378     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19379     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19380   %}
19381   ins_pipe( pipe_slow );
19382 %}
19383 
19384 // ====================REDUCTION ARITHMETIC=======================================
19385 
19386 // =======================Int Reduction==========================================
19387 
19388 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19389   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19390   match(Set dst (AddReductionVI src1 src2));
19391   match(Set dst (MulReductionVI src1 src2));
19392   match(Set dst (AndReductionV  src1 src2));
19393   match(Set dst ( OrReductionV  src1 src2));
19394   match(Set dst (XorReductionV  src1 src2));
19395   match(Set dst (MinReductionV  src1 src2));
19396   match(Set dst (MaxReductionV  src1 src2));
19397   match(Set dst (UMinReductionV  src1 src2));
19398   match(Set dst (UMaxReductionV  src1 src2));
19399   effect(TEMP vtmp1, TEMP vtmp2);
19400   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19401   ins_encode %{
19402     int opcode = this->ideal_Opcode();
19403     int vlen = Matcher::vector_length(this, $src2);
19404     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19405   %}
19406   ins_pipe( pipe_slow );
19407 %}
19408 
19409 // =======================Long Reduction==========================================
19410 
19411 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19412   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19413   match(Set dst (AddReductionVL src1 src2));
19414   match(Set dst (MulReductionVL src1 src2));
19415   match(Set dst (AndReductionV  src1 src2));
19416   match(Set dst ( OrReductionV  src1 src2));
19417   match(Set dst (XorReductionV  src1 src2));
19418   match(Set dst (MinReductionV  src1 src2));
19419   match(Set dst (MaxReductionV  src1 src2));
19420   match(Set dst (UMinReductionV  src1 src2));
19421   match(Set dst (UMaxReductionV  src1 src2));
19422   effect(TEMP vtmp1, TEMP vtmp2);
19423   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19424   ins_encode %{
19425     int opcode = this->ideal_Opcode();
19426     int vlen = Matcher::vector_length(this, $src2);
19427     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19428   %}
19429   ins_pipe( pipe_slow );
19430 %}
19431 
19432 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19433   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19434   match(Set dst (AddReductionVL src1 src2));
19435   match(Set dst (MulReductionVL src1 src2));
19436   match(Set dst (AndReductionV  src1 src2));
19437   match(Set dst ( OrReductionV  src1 src2));
19438   match(Set dst (XorReductionV  src1 src2));
19439   match(Set dst (MinReductionV  src1 src2));
19440   match(Set dst (MaxReductionV  src1 src2));
19441   match(Set dst (UMinReductionV  src1 src2));
19442   match(Set dst (UMaxReductionV  src1 src2));
19443   effect(TEMP vtmp1, TEMP vtmp2);
19444   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19445   ins_encode %{
19446     int opcode = this->ideal_Opcode();
19447     int vlen = Matcher::vector_length(this, $src2);
19448     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19449   %}
19450   ins_pipe( pipe_slow );
19451 %}
19452 
19453 // =======================Float Reduction==========================================
19454 
19455 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19456   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19457   match(Set dst (AddReductionVF dst src));
19458   match(Set dst (MulReductionVF dst src));
19459   effect(TEMP dst, TEMP vtmp);
19460   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
19461   ins_encode %{
19462     int opcode = this->ideal_Opcode();
19463     int vlen = Matcher::vector_length(this, $src);
19464     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19465   %}
19466   ins_pipe( pipe_slow );
19467 %}
19468 
19469 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19470   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19471   match(Set dst (AddReductionVF dst src));
19472   match(Set dst (MulReductionVF dst src));
19473   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19474   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19475   ins_encode %{
19476     int opcode = this->ideal_Opcode();
19477     int vlen = Matcher::vector_length(this, $src);
19478     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19479   %}
19480   ins_pipe( pipe_slow );
19481 %}
19482 
19483 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19484   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19485   match(Set dst (AddReductionVF dst src));
19486   match(Set dst (MulReductionVF dst src));
19487   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19488   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19489   ins_encode %{
19490     int opcode = this->ideal_Opcode();
19491     int vlen = Matcher::vector_length(this, $src);
19492     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19493   %}
19494   ins_pipe( pipe_slow );
19495 %}
19496 
19497 
19498 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19499   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19500   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19501   // src1 contains reduction identity
19502   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19503   match(Set dst (AddReductionVF src1 src2));
19504   match(Set dst (MulReductionVF src1 src2));
19505   effect(TEMP dst);
19506   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
19507   ins_encode %{
19508     int opcode = this->ideal_Opcode();
19509     int vlen = Matcher::vector_length(this, $src2);
19510     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19511   %}
19512   ins_pipe( pipe_slow );
19513 %}
19514 
19515 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19516   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19517   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19518   // src1 contains reduction identity
19519   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19520   match(Set dst (AddReductionVF src1 src2));
19521   match(Set dst (MulReductionVF src1 src2));
19522   effect(TEMP dst, TEMP vtmp);
19523   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19524   ins_encode %{
19525     int opcode = this->ideal_Opcode();
19526     int vlen = Matcher::vector_length(this, $src2);
19527     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19528   %}
19529   ins_pipe( pipe_slow );
19530 %}
19531 
19532 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19533   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19534   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19535   // src1 contains reduction identity
19536   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19537   match(Set dst (AddReductionVF src1 src2));
19538   match(Set dst (MulReductionVF src1 src2));
19539   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19540   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19541   ins_encode %{
19542     int opcode = this->ideal_Opcode();
19543     int vlen = Matcher::vector_length(this, $src2);
19544     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19545   %}
19546   ins_pipe( pipe_slow );
19547 %}
19548 
19549 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19550   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19551   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19552   // src1 contains reduction identity
19553   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19554   match(Set dst (AddReductionVF src1 src2));
19555   match(Set dst (MulReductionVF src1 src2));
19556   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19557   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19558   ins_encode %{
19559     int opcode = this->ideal_Opcode();
19560     int vlen = Matcher::vector_length(this, $src2);
19561     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19562   %}
19563   ins_pipe( pipe_slow );
19564 %}
19565 
19566 // =======================Double Reduction==========================================
19567 
19568 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19569   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19570   match(Set dst (AddReductionVD dst src));
19571   match(Set dst (MulReductionVD dst src));
19572   effect(TEMP dst, TEMP vtmp);
19573   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19574   ins_encode %{
19575     int opcode = this->ideal_Opcode();
19576     int vlen = Matcher::vector_length(this, $src);
19577     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19578 %}
19579   ins_pipe( pipe_slow );
19580 %}
19581 
19582 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19583   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19584   match(Set dst (AddReductionVD dst src));
19585   match(Set dst (MulReductionVD dst src));
19586   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19587   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19588   ins_encode %{
19589     int opcode = this->ideal_Opcode();
19590     int vlen = Matcher::vector_length(this, $src);
19591     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19592   %}
19593   ins_pipe( pipe_slow );
19594 %}
19595 
19596 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19597   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19598   match(Set dst (AddReductionVD dst src));
19599   match(Set dst (MulReductionVD dst src));
19600   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19601   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19602   ins_encode %{
19603     int opcode = this->ideal_Opcode();
19604     int vlen = Matcher::vector_length(this, $src);
19605     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19606   %}
19607   ins_pipe( pipe_slow );
19608 %}
19609 
19610 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19611   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19612   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19613   // src1 contains reduction identity
19614   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19615   match(Set dst (AddReductionVD src1 src2));
19616   match(Set dst (MulReductionVD src1 src2));
19617   effect(TEMP dst);
19618   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19619   ins_encode %{
19620     int opcode = this->ideal_Opcode();
19621     int vlen = Matcher::vector_length(this, $src2);
19622     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19623 %}
19624   ins_pipe( pipe_slow );
19625 %}
19626 
19627 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19628   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19629   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19630   // src1 contains reduction identity
19631   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19632   match(Set dst (AddReductionVD src1 src2));
19633   match(Set dst (MulReductionVD src1 src2));
19634   effect(TEMP dst, TEMP vtmp);
19635   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19636   ins_encode %{
19637     int opcode = this->ideal_Opcode();
19638     int vlen = Matcher::vector_length(this, $src2);
19639     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19640   %}
19641   ins_pipe( pipe_slow );
19642 %}
19643 
19644 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19645   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19646   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19647   // src1 contains reduction identity
19648   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19649   match(Set dst (AddReductionVD src1 src2));
19650   match(Set dst (MulReductionVD src1 src2));
19651   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19652   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19653   ins_encode %{
19654     int opcode = this->ideal_Opcode();
19655     int vlen = Matcher::vector_length(this, $src2);
19656     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19657   %}
19658   ins_pipe( pipe_slow );
19659 %}
19660 
19661 // =======================Byte Reduction==========================================
19662 
19663 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19664   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19665   match(Set dst (AddReductionVI src1 src2));
19666   match(Set dst (AndReductionV  src1 src2));
19667   match(Set dst ( OrReductionV  src1 src2));
19668   match(Set dst (XorReductionV  src1 src2));
19669   match(Set dst (MinReductionV  src1 src2));
19670   match(Set dst (MaxReductionV  src1 src2));
19671   match(Set dst (UMinReductionV  src1 src2));
19672   match(Set dst (UMaxReductionV  src1 src2));
19673   effect(TEMP vtmp1, TEMP vtmp2);
19674   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19675   ins_encode %{
19676     int opcode = this->ideal_Opcode();
19677     int vlen = Matcher::vector_length(this, $src2);
19678     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19679   %}
19680   ins_pipe( pipe_slow );
19681 %}
19682 
19683 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19684   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19685   match(Set dst (AddReductionVI src1 src2));
19686   match(Set dst (AndReductionV  src1 src2));
19687   match(Set dst ( OrReductionV  src1 src2));
19688   match(Set dst (XorReductionV  src1 src2));
19689   match(Set dst (MinReductionV  src1 src2));
19690   match(Set dst (MaxReductionV  src1 src2));
19691   match(Set dst (UMinReductionV  src1 src2));
19692   match(Set dst (UMaxReductionV  src1 src2));
19693   effect(TEMP vtmp1, TEMP vtmp2);
19694   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19695   ins_encode %{
19696     int opcode = this->ideal_Opcode();
19697     int vlen = Matcher::vector_length(this, $src2);
19698     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19699   %}
19700   ins_pipe( pipe_slow );
19701 %}
19702 
19703 // =======================Short Reduction==========================================
19704 
19705 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19706   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19707   match(Set dst (AddReductionVI src1 src2));
19708   match(Set dst (MulReductionVI src1 src2));
19709   match(Set dst (AndReductionV  src1 src2));
19710   match(Set dst ( OrReductionV  src1 src2));
19711   match(Set dst (XorReductionV  src1 src2));
19712   match(Set dst (MinReductionV  src1 src2));
19713   match(Set dst (MaxReductionV  src1 src2));
19714   match(Set dst (UMinReductionV  src1 src2));
19715   match(Set dst (UMaxReductionV  src1 src2));
19716   effect(TEMP vtmp1, TEMP vtmp2);
19717   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19718   ins_encode %{
19719     int opcode = this->ideal_Opcode();
19720     int vlen = Matcher::vector_length(this, $src2);
19721     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19722   %}
19723   ins_pipe( pipe_slow );
19724 %}
19725 
19726 // =======================Mul Reduction==========================================
19727 
19728 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19729   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19730             Matcher::vector_length(n->in(2)) <= 32); // src2
19731   match(Set dst (MulReductionVI src1 src2));
19732   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19733   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19734   ins_encode %{
19735     int opcode = this->ideal_Opcode();
19736     int vlen = Matcher::vector_length(this, $src2);
19737     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19738   %}
19739   ins_pipe( pipe_slow );
19740 %}
19741 
19742 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19743   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19744             Matcher::vector_length(n->in(2)) == 64); // src2
19745   match(Set dst (MulReductionVI src1 src2));
19746   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19747   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19748   ins_encode %{
19749     int opcode = this->ideal_Opcode();
19750     int vlen = Matcher::vector_length(this, $src2);
19751     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19752   %}
19753   ins_pipe( pipe_slow );
19754 %}
19755 
19756 //--------------------Min/Max Float Reduction --------------------
19757 // Float Min Reduction
19758 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19759                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19760   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19761             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19762              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19763             Matcher::vector_length(n->in(2)) == 2);
19764   match(Set dst (MinReductionV src1 src2));
19765   match(Set dst (MaxReductionV src1 src2));
19766   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19767   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19768   ins_encode %{
19769     assert(UseAVX > 0, "sanity");
19770 
19771     int opcode = this->ideal_Opcode();
19772     int vlen = Matcher::vector_length(this, $src2);
19773     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19774                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19775   %}
19776   ins_pipe( pipe_slow );
19777 %}
19778 
19779 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19780                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19781   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19782             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19783              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19784             Matcher::vector_length(n->in(2)) >= 4);
19785   match(Set dst (MinReductionV src1 src2));
19786   match(Set dst (MaxReductionV src1 src2));
19787   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19788   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19789   ins_encode %{
19790     assert(UseAVX > 0, "sanity");
19791 
19792     int opcode = this->ideal_Opcode();
19793     int vlen = Matcher::vector_length(this, $src2);
19794     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19795                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19796   %}
19797   ins_pipe( pipe_slow );
19798 %}
19799 
19800 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19801                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19802   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19803             Matcher::vector_length(n->in(2)) == 2);
19804   match(Set dst (MinReductionV dst src));
19805   match(Set dst (MaxReductionV dst src));
19806   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19807   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19808   ins_encode %{
19809     assert(UseAVX > 0, "sanity");
19810 
19811     int opcode = this->ideal_Opcode();
19812     int vlen = Matcher::vector_length(this, $src);
19813     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19814                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19815   %}
19816   ins_pipe( pipe_slow );
19817 %}
19818 
19819 
19820 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19821                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19822   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19823             Matcher::vector_length(n->in(2)) >= 4);
19824   match(Set dst (MinReductionV dst src));
19825   match(Set dst (MaxReductionV dst src));
19826   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19827   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19828   ins_encode %{
19829     assert(UseAVX > 0, "sanity");
19830 
19831     int opcode = this->ideal_Opcode();
19832     int vlen = Matcher::vector_length(this, $src);
19833     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19834                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19835   %}
19836   ins_pipe( pipe_slow );
19837 %}
19838 
19839 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19840   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19841             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19842              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19843             Matcher::vector_length(n->in(2)) == 2);
19844   match(Set dst (MinReductionV src1 src2));
19845   match(Set dst (MaxReductionV src1 src2));
19846   effect(TEMP dst, TEMP xtmp1);
19847   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19848   ins_encode %{
19849     int opcode = this->ideal_Opcode();
19850     int vlen = Matcher::vector_length(this, $src2);
19851     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19852                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19853   %}
19854   ins_pipe( pipe_slow );
19855 %}
19856 
19857 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19858   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19859             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19860              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19861             Matcher::vector_length(n->in(2)) >= 4);
19862   match(Set dst (MinReductionV src1 src2));
19863   match(Set dst (MaxReductionV src1 src2));
19864   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19865   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19866   ins_encode %{
19867     int opcode = this->ideal_Opcode();
19868     int vlen = Matcher::vector_length(this, $src2);
19869     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19870                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19871   %}
19872   ins_pipe( pipe_slow );
19873 %}
19874 
19875 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19876   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19877             Matcher::vector_length(n->in(2)) == 2);
19878   match(Set dst (MinReductionV dst src));
19879   match(Set dst (MaxReductionV dst src));
19880   effect(TEMP dst, TEMP xtmp1);
19881   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19882   ins_encode %{
19883     int opcode = this->ideal_Opcode();
19884     int vlen = Matcher::vector_length(this, $src);
19885     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19886                          $xtmp1$$XMMRegister);
19887   %}
19888   ins_pipe( pipe_slow );
19889 %}
19890 
19891 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19892   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19893             Matcher::vector_length(n->in(2)) >= 4);
19894   match(Set dst (MinReductionV dst src));
19895   match(Set dst (MaxReductionV dst src));
19896   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19897   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19898   ins_encode %{
19899     int opcode = this->ideal_Opcode();
19900     int vlen = Matcher::vector_length(this, $src);
19901     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19902                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19903   %}
19904   ins_pipe( pipe_slow );
19905 %}
19906 
19907 //--------------------Min Double Reduction --------------------
19908 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19909                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19910   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19911             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19912              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19913             Matcher::vector_length(n->in(2)) == 2);
19914   match(Set dst (MinReductionV src1 src2));
19915   match(Set dst (MaxReductionV src1 src2));
19916   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19917   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19918   ins_encode %{
19919     assert(UseAVX > 0, "sanity");
19920 
19921     int opcode = this->ideal_Opcode();
19922     int vlen = Matcher::vector_length(this, $src2);
19923     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19924                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19925   %}
19926   ins_pipe( pipe_slow );
19927 %}
19928 
19929 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19930                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19931   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19932             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19933              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19934             Matcher::vector_length(n->in(2)) >= 4);
19935   match(Set dst (MinReductionV src1 src2));
19936   match(Set dst (MaxReductionV src1 src2));
19937   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19938   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19939   ins_encode %{
19940     assert(UseAVX > 0, "sanity");
19941 
19942     int opcode = this->ideal_Opcode();
19943     int vlen = Matcher::vector_length(this, $src2);
19944     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19945                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19946   %}
19947   ins_pipe( pipe_slow );
19948 %}
19949 
19950 
19951 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19952                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19953   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19954             Matcher::vector_length(n->in(2)) == 2);
19955   match(Set dst (MinReductionV dst src));
19956   match(Set dst (MaxReductionV dst src));
19957   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19958   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19959   ins_encode %{
19960     assert(UseAVX > 0, "sanity");
19961 
19962     int opcode = this->ideal_Opcode();
19963     int vlen = Matcher::vector_length(this, $src);
19964     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19965                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19966   %}
19967   ins_pipe( pipe_slow );
19968 %}
19969 
19970 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19971                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19972   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19973             Matcher::vector_length(n->in(2)) >= 4);
19974   match(Set dst (MinReductionV dst src));
19975   match(Set dst (MaxReductionV dst src));
19976   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19977   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19978   ins_encode %{
19979     assert(UseAVX > 0, "sanity");
19980 
19981     int opcode = this->ideal_Opcode();
19982     int vlen = Matcher::vector_length(this, $src);
19983     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19984                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19985   %}
19986   ins_pipe( pipe_slow );
19987 %}
19988 
19989 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19990   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19991             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19992              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19993             Matcher::vector_length(n->in(2)) == 2);
19994   match(Set dst (MinReductionV src1 src2));
19995   match(Set dst (MaxReductionV src1 src2));
19996   effect(TEMP dst, TEMP xtmp1);
19997   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19998   ins_encode %{
19999     int opcode = this->ideal_Opcode();
20000     int vlen = Matcher::vector_length(this, $src2);
20001     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20002                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
20003   %}
20004   ins_pipe( pipe_slow );
20005 %}
20006 
20007 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20008   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20009             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20010              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20011             Matcher::vector_length(n->in(2)) >= 4);
20012   match(Set dst (MinReductionV src1 src2));
20013   match(Set dst (MaxReductionV src1 src2));
20014   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20015   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20016   ins_encode %{
20017     int opcode = this->ideal_Opcode();
20018     int vlen = Matcher::vector_length(this, $src2);
20019     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20020                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20021   %}
20022   ins_pipe( pipe_slow );
20023 %}
20024 
20025 
20026 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20027   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20028             Matcher::vector_length(n->in(2)) == 2);
20029   match(Set dst (MinReductionV dst src));
20030   match(Set dst (MaxReductionV dst src));
20031   effect(TEMP dst, TEMP xtmp1);
20032   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20033   ins_encode %{
20034     int opcode = this->ideal_Opcode();
20035     int vlen = Matcher::vector_length(this, $src);
20036     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20037                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20038   %}
20039   ins_pipe( pipe_slow );
20040 %}
20041 
20042 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20043   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20044             Matcher::vector_length(n->in(2)) >= 4);
20045   match(Set dst (MinReductionV dst src));
20046   match(Set dst (MaxReductionV dst src));
20047   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20048   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20049   ins_encode %{
20050     int opcode = this->ideal_Opcode();
20051     int vlen = Matcher::vector_length(this, $src);
20052     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20053                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20054   %}
20055   ins_pipe( pipe_slow );
20056 %}
20057 
20058 // ====================VECTOR ARITHMETIC=======================================
20059 
20060 // --------------------------------- ADD --------------------------------------
20061 
20062 // Bytes vector add
20063 instruct vaddB(vec dst, vec src) %{
20064   predicate(UseAVX == 0);
20065   match(Set dst (AddVB dst src));
20066   format %{ "paddb   $dst,$src\t! add packedB" %}
20067   ins_encode %{
20068     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20069   %}
20070   ins_pipe( pipe_slow );
20071 %}
20072 
20073 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20074   predicate(UseAVX > 0);
20075   match(Set dst (AddVB src1 src2));
20076   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
20077   ins_encode %{
20078     int vlen_enc = vector_length_encoding(this);
20079     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20080   %}
20081   ins_pipe( pipe_slow );
20082 %}
20083 
20084 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20085   predicate((UseAVX > 0) &&
20086             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20087   match(Set dst (AddVB src (LoadVector mem)));
20088   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
20089   ins_encode %{
20090     int vlen_enc = vector_length_encoding(this);
20091     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20092   %}
20093   ins_pipe( pipe_slow );
20094 %}
20095 
20096 // Shorts/Chars vector add
20097 instruct vaddS(vec dst, vec src) %{
20098   predicate(UseAVX == 0);
20099   match(Set dst (AddVS dst src));
20100   format %{ "paddw   $dst,$src\t! add packedS" %}
20101   ins_encode %{
20102     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20103   %}
20104   ins_pipe( pipe_slow );
20105 %}
20106 
20107 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20108   predicate(UseAVX > 0);
20109   match(Set dst (AddVS src1 src2));
20110   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
20111   ins_encode %{
20112     int vlen_enc = vector_length_encoding(this);
20113     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20114   %}
20115   ins_pipe( pipe_slow );
20116 %}
20117 
20118 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20119   predicate((UseAVX > 0) &&
20120             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20121   match(Set dst (AddVS src (LoadVector mem)));
20122   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
20123   ins_encode %{
20124     int vlen_enc = vector_length_encoding(this);
20125     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20126   %}
20127   ins_pipe( pipe_slow );
20128 %}
20129 
20130 // Integers vector add
20131 instruct vaddI(vec dst, vec src) %{
20132   predicate(UseAVX == 0);
20133   match(Set dst (AddVI dst src));
20134   format %{ "paddd   $dst,$src\t! add packedI" %}
20135   ins_encode %{
20136     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20137   %}
20138   ins_pipe( pipe_slow );
20139 %}
20140 
20141 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20142   predicate(UseAVX > 0);
20143   match(Set dst (AddVI src1 src2));
20144   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
20145   ins_encode %{
20146     int vlen_enc = vector_length_encoding(this);
20147     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20148   %}
20149   ins_pipe( pipe_slow );
20150 %}
20151 
20152 
20153 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20154   predicate((UseAVX > 0) &&
20155             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20156   match(Set dst (AddVI src (LoadVector mem)));
20157   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
20158   ins_encode %{
20159     int vlen_enc = vector_length_encoding(this);
20160     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20161   %}
20162   ins_pipe( pipe_slow );
20163 %}
20164 
20165 // Longs vector add
20166 instruct vaddL(vec dst, vec src) %{
20167   predicate(UseAVX == 0);
20168   match(Set dst (AddVL dst src));
20169   format %{ "paddq   $dst,$src\t! add packedL" %}
20170   ins_encode %{
20171     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20172   %}
20173   ins_pipe( pipe_slow );
20174 %}
20175 
20176 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20177   predicate(UseAVX > 0);
20178   match(Set dst (AddVL src1 src2));
20179   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
20180   ins_encode %{
20181     int vlen_enc = vector_length_encoding(this);
20182     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20183   %}
20184   ins_pipe( pipe_slow );
20185 %}
20186 
20187 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20188   predicate((UseAVX > 0) &&
20189             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20190   match(Set dst (AddVL src (LoadVector mem)));
20191   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
20192   ins_encode %{
20193     int vlen_enc = vector_length_encoding(this);
20194     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20195   %}
20196   ins_pipe( pipe_slow );
20197 %}
20198 
20199 // Floats vector add
20200 instruct vaddF(vec dst, vec src) %{
20201   predicate(UseAVX == 0);
20202   match(Set dst (AddVF dst src));
20203   format %{ "addps   $dst,$src\t! add packedF" %}
20204   ins_encode %{
20205     __ addps($dst$$XMMRegister, $src$$XMMRegister);
20206   %}
20207   ins_pipe( pipe_slow );
20208 %}
20209 
20210 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20211   predicate(UseAVX > 0);
20212   match(Set dst (AddVF src1 src2));
20213   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
20214   ins_encode %{
20215     int vlen_enc = vector_length_encoding(this);
20216     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20217   %}
20218   ins_pipe( pipe_slow );
20219 %}
20220 
20221 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20222   predicate((UseAVX > 0) &&
20223             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20224   match(Set dst (AddVF src (LoadVector mem)));
20225   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
20226   ins_encode %{
20227     int vlen_enc = vector_length_encoding(this);
20228     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20229   %}
20230   ins_pipe( pipe_slow );
20231 %}
20232 
20233 // Doubles vector add
20234 instruct vaddD(vec dst, vec src) %{
20235   predicate(UseAVX == 0);
20236   match(Set dst (AddVD dst src));
20237   format %{ "addpd   $dst,$src\t! add packedD" %}
20238   ins_encode %{
20239     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20240   %}
20241   ins_pipe( pipe_slow );
20242 %}
20243 
20244 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20245   predicate(UseAVX > 0);
20246   match(Set dst (AddVD src1 src2));
20247   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
20248   ins_encode %{
20249     int vlen_enc = vector_length_encoding(this);
20250     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20251   %}
20252   ins_pipe( pipe_slow );
20253 %}
20254 
20255 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20256   predicate((UseAVX > 0) &&
20257             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20258   match(Set dst (AddVD src (LoadVector mem)));
20259   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
20260   ins_encode %{
20261     int vlen_enc = vector_length_encoding(this);
20262     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20263   %}
20264   ins_pipe( pipe_slow );
20265 %}
20266 
20267 // --------------------------------- SUB --------------------------------------
20268 
20269 // Bytes vector sub
20270 instruct vsubB(vec dst, vec src) %{
20271   predicate(UseAVX == 0);
20272   match(Set dst (SubVB dst src));
20273   format %{ "psubb   $dst,$src\t! sub packedB" %}
20274   ins_encode %{
20275     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20276   %}
20277   ins_pipe( pipe_slow );
20278 %}
20279 
20280 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20281   predicate(UseAVX > 0);
20282   match(Set dst (SubVB src1 src2));
20283   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
20284   ins_encode %{
20285     int vlen_enc = vector_length_encoding(this);
20286     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20287   %}
20288   ins_pipe( pipe_slow );
20289 %}
20290 
20291 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20292   predicate((UseAVX > 0) &&
20293             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20294   match(Set dst (SubVB src (LoadVector mem)));
20295   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
20296   ins_encode %{
20297     int vlen_enc = vector_length_encoding(this);
20298     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20299   %}
20300   ins_pipe( pipe_slow );
20301 %}
20302 
20303 // Shorts/Chars vector sub
20304 instruct vsubS(vec dst, vec src) %{
20305   predicate(UseAVX == 0);
20306   match(Set dst (SubVS dst src));
20307   format %{ "psubw   $dst,$src\t! sub packedS" %}
20308   ins_encode %{
20309     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20310   %}
20311   ins_pipe( pipe_slow );
20312 %}
20313 
20314 
20315 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20316   predicate(UseAVX > 0);
20317   match(Set dst (SubVS src1 src2));
20318   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
20319   ins_encode %{
20320     int vlen_enc = vector_length_encoding(this);
20321     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20322   %}
20323   ins_pipe( pipe_slow );
20324 %}
20325 
20326 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20327   predicate((UseAVX > 0) &&
20328             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20329   match(Set dst (SubVS src (LoadVector mem)));
20330   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
20331   ins_encode %{
20332     int vlen_enc = vector_length_encoding(this);
20333     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20334   %}
20335   ins_pipe( pipe_slow );
20336 %}
20337 
20338 // Integers vector sub
20339 instruct vsubI(vec dst, vec src) %{
20340   predicate(UseAVX == 0);
20341   match(Set dst (SubVI dst src));
20342   format %{ "psubd   $dst,$src\t! sub packedI" %}
20343   ins_encode %{
20344     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20345   %}
20346   ins_pipe( pipe_slow );
20347 %}
20348 
20349 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20350   predicate(UseAVX > 0);
20351   match(Set dst (SubVI src1 src2));
20352   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
20353   ins_encode %{
20354     int vlen_enc = vector_length_encoding(this);
20355     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20356   %}
20357   ins_pipe( pipe_slow );
20358 %}
20359 
20360 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20361   predicate((UseAVX > 0) &&
20362             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20363   match(Set dst (SubVI src (LoadVector mem)));
20364   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
20365   ins_encode %{
20366     int vlen_enc = vector_length_encoding(this);
20367     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20368   %}
20369   ins_pipe( pipe_slow );
20370 %}
20371 
20372 // Longs vector sub
20373 instruct vsubL(vec dst, vec src) %{
20374   predicate(UseAVX == 0);
20375   match(Set dst (SubVL dst src));
20376   format %{ "psubq   $dst,$src\t! sub packedL" %}
20377   ins_encode %{
20378     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20379   %}
20380   ins_pipe( pipe_slow );
20381 %}
20382 
20383 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20384   predicate(UseAVX > 0);
20385   match(Set dst (SubVL src1 src2));
20386   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
20387   ins_encode %{
20388     int vlen_enc = vector_length_encoding(this);
20389     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20390   %}
20391   ins_pipe( pipe_slow );
20392 %}
20393 
20394 
20395 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20396   predicate((UseAVX > 0) &&
20397             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20398   match(Set dst (SubVL src (LoadVector mem)));
20399   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
20400   ins_encode %{
20401     int vlen_enc = vector_length_encoding(this);
20402     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20403   %}
20404   ins_pipe( pipe_slow );
20405 %}
20406 
20407 // Floats vector sub
20408 instruct vsubF(vec dst, vec src) %{
20409   predicate(UseAVX == 0);
20410   match(Set dst (SubVF dst src));
20411   format %{ "subps   $dst,$src\t! sub packedF" %}
20412   ins_encode %{
20413     __ subps($dst$$XMMRegister, $src$$XMMRegister);
20414   %}
20415   ins_pipe( pipe_slow );
20416 %}
20417 
20418 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20419   predicate(UseAVX > 0);
20420   match(Set dst (SubVF src1 src2));
20421   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
20422   ins_encode %{
20423     int vlen_enc = vector_length_encoding(this);
20424     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20425   %}
20426   ins_pipe( pipe_slow );
20427 %}
20428 
20429 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20430   predicate((UseAVX > 0) &&
20431             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20432   match(Set dst (SubVF src (LoadVector mem)));
20433   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
20434   ins_encode %{
20435     int vlen_enc = vector_length_encoding(this);
20436     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20437   %}
20438   ins_pipe( pipe_slow );
20439 %}
20440 
20441 // Doubles vector sub
20442 instruct vsubD(vec dst, vec src) %{
20443   predicate(UseAVX == 0);
20444   match(Set dst (SubVD dst src));
20445   format %{ "subpd   $dst,$src\t! sub packedD" %}
20446   ins_encode %{
20447     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20448   %}
20449   ins_pipe( pipe_slow );
20450 %}
20451 
20452 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20453   predicate(UseAVX > 0);
20454   match(Set dst (SubVD src1 src2));
20455   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
20456   ins_encode %{
20457     int vlen_enc = vector_length_encoding(this);
20458     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20459   %}
20460   ins_pipe( pipe_slow );
20461 %}
20462 
20463 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20464   predicate((UseAVX > 0) &&
20465             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20466   match(Set dst (SubVD src (LoadVector mem)));
20467   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
20468   ins_encode %{
20469     int vlen_enc = vector_length_encoding(this);
20470     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20471   %}
20472   ins_pipe( pipe_slow );
20473 %}
20474 
20475 // --------------------------------- MUL --------------------------------------
20476 
20477 // Byte vector mul
20478 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20479   predicate(Matcher::vector_length_in_bytes(n) <= 8);
20480   match(Set dst (MulVB src1 src2));
20481   effect(TEMP dst, TEMP xtmp);
20482   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20483   ins_encode %{
20484     assert(UseSSE > 3, "required");
20485     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20486     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20487     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20488     __ psllw($dst$$XMMRegister, 8);
20489     __ psrlw($dst$$XMMRegister, 8);
20490     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20491   %}
20492   ins_pipe( pipe_slow );
20493 %}
20494 
20495 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20496   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20497   match(Set dst (MulVB src1 src2));
20498   effect(TEMP dst, TEMP xtmp);
20499   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20500   ins_encode %{
20501     assert(UseSSE > 3, "required");
20502     // Odd-index elements
20503     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20504     __ psrlw($dst$$XMMRegister, 8);
20505     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20506     __ psrlw($xtmp$$XMMRegister, 8);
20507     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20508     __ psllw($dst$$XMMRegister, 8);
20509     // Even-index elements
20510     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20511     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20512     __ psllw($xtmp$$XMMRegister, 8);
20513     __ psrlw($xtmp$$XMMRegister, 8);
20514     // Combine
20515     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20516   %}
20517   ins_pipe( pipe_slow );
20518 %}
20519 
20520 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20521   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20522   match(Set dst (MulVB src1 src2));
20523   effect(TEMP xtmp1, TEMP xtmp2);
20524   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20525   ins_encode %{
20526     int vlen_enc = vector_length_encoding(this);
20527     // Odd-index elements
20528     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20529     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20530     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20531     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20532     // Even-index elements
20533     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20534     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20535     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20536     // Combine
20537     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20538   %}
20539   ins_pipe( pipe_slow );
20540 %}
20541 
20542 // Shorts/Chars vector mul
20543 instruct vmulS(vec dst, vec src) %{
20544   predicate(UseAVX == 0);
20545   match(Set dst (MulVS dst src));
20546   format %{ "pmullw  $dst,$src\t! mul packedS" %}
20547   ins_encode %{
20548     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20549   %}
20550   ins_pipe( pipe_slow );
20551 %}
20552 
20553 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20554   predicate(UseAVX > 0);
20555   match(Set dst (MulVS src1 src2));
20556   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20557   ins_encode %{
20558     int vlen_enc = vector_length_encoding(this);
20559     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20560   %}
20561   ins_pipe( pipe_slow );
20562 %}
20563 
20564 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20565   predicate((UseAVX > 0) &&
20566             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20567   match(Set dst (MulVS src (LoadVector mem)));
20568   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20569   ins_encode %{
20570     int vlen_enc = vector_length_encoding(this);
20571     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20572   %}
20573   ins_pipe( pipe_slow );
20574 %}
20575 
20576 // Integers vector mul
20577 instruct vmulI(vec dst, vec src) %{
20578   predicate(UseAVX == 0);
20579   match(Set dst (MulVI dst src));
20580   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20581   ins_encode %{
20582     assert(UseSSE > 3, "required");
20583     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20584   %}
20585   ins_pipe( pipe_slow );
20586 %}
20587 
20588 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20589   predicate(UseAVX > 0);
20590   match(Set dst (MulVI src1 src2));
20591   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20592   ins_encode %{
20593     int vlen_enc = vector_length_encoding(this);
20594     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20595   %}
20596   ins_pipe( pipe_slow );
20597 %}
20598 
20599 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20600   predicate((UseAVX > 0) &&
20601             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20602   match(Set dst (MulVI src (LoadVector mem)));
20603   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20604   ins_encode %{
20605     int vlen_enc = vector_length_encoding(this);
20606     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20607   %}
20608   ins_pipe( pipe_slow );
20609 %}
20610 
20611 // Longs vector mul
20612 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20613   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20614              VM_Version::supports_avx512dq()) ||
20615             VM_Version::supports_avx512vldq());
20616   match(Set dst (MulVL src1 src2));
20617   ins_cost(500);
20618   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20619   ins_encode %{
20620     assert(UseAVX > 2, "required");
20621     int vlen_enc = vector_length_encoding(this);
20622     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20623   %}
20624   ins_pipe( pipe_slow );
20625 %}
20626 
20627 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20628   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20629              VM_Version::supports_avx512dq()) ||
20630             (Matcher::vector_length_in_bytes(n) > 8 &&
20631              VM_Version::supports_avx512vldq()));
20632   match(Set dst (MulVL src (LoadVector mem)));
20633   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20634   ins_cost(500);
20635   ins_encode %{
20636     assert(UseAVX > 2, "required");
20637     int vlen_enc = vector_length_encoding(this);
20638     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20639   %}
20640   ins_pipe( pipe_slow );
20641 %}
20642 
20643 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20644   predicate(UseAVX == 0);
20645   match(Set dst (MulVL src1 src2));
20646   ins_cost(500);
20647   effect(TEMP dst, TEMP xtmp);
20648   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20649   ins_encode %{
20650     assert(VM_Version::supports_sse4_1(), "required");
20651     // Get the lo-hi products, only the lower 32 bits is in concerns
20652     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20653     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20654     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20655     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20656     __ psllq($dst$$XMMRegister, 32);
20657     // Get the lo-lo products
20658     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20659     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20660     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20661   %}
20662   ins_pipe( pipe_slow );
20663 %}
20664 
20665 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20666   predicate(UseAVX > 0 &&
20667             ((Matcher::vector_length_in_bytes(n) == 64 &&
20668               !VM_Version::supports_avx512dq()) ||
20669              (Matcher::vector_length_in_bytes(n) < 64 &&
20670               !VM_Version::supports_avx512vldq())));
20671   match(Set dst (MulVL src1 src2));
20672   effect(TEMP xtmp1, TEMP xtmp2);
20673   ins_cost(500);
20674   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20675   ins_encode %{
20676     int vlen_enc = vector_length_encoding(this);
20677     // Get the lo-hi products, only the lower 32 bits is in concerns
20678     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20679     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20680     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20681     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20682     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20683     // Get the lo-lo products
20684     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20685     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20686   %}
20687   ins_pipe( pipe_slow );
20688 %}
20689 
20690 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20691   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20692   match(Set dst (MulVL src1 src2));
20693   ins_cost(100);
20694   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20695   ins_encode %{
20696     int vlen_enc = vector_length_encoding(this);
20697     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20698   %}
20699   ins_pipe( pipe_slow );
20700 %}
20701 
20702 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20703   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20704   match(Set dst (MulVL src1 src2));
20705   ins_cost(100);
20706   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20707   ins_encode %{
20708     int vlen_enc = vector_length_encoding(this);
20709     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20710   %}
20711   ins_pipe( pipe_slow );
20712 %}
20713 
20714 // Floats vector mul
20715 instruct vmulF(vec dst, vec src) %{
20716   predicate(UseAVX == 0);
20717   match(Set dst (MulVF dst src));
20718   format %{ "mulps   $dst,$src\t! mul packedF" %}
20719   ins_encode %{
20720     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20721   %}
20722   ins_pipe( pipe_slow );
20723 %}
20724 
20725 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20726   predicate(UseAVX > 0);
20727   match(Set dst (MulVF src1 src2));
20728   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
20729   ins_encode %{
20730     int vlen_enc = vector_length_encoding(this);
20731     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20732   %}
20733   ins_pipe( pipe_slow );
20734 %}
20735 
20736 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20737   predicate((UseAVX > 0) &&
20738             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20739   match(Set dst (MulVF src (LoadVector mem)));
20740   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
20741   ins_encode %{
20742     int vlen_enc = vector_length_encoding(this);
20743     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20744   %}
20745   ins_pipe( pipe_slow );
20746 %}
20747 
20748 // Doubles vector mul
20749 instruct vmulD(vec dst, vec src) %{
20750   predicate(UseAVX == 0);
20751   match(Set dst (MulVD dst src));
20752   format %{ "mulpd   $dst,$src\t! mul packedD" %}
20753   ins_encode %{
20754     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20755   %}
20756   ins_pipe( pipe_slow );
20757 %}
20758 
20759 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20760   predicate(UseAVX > 0);
20761   match(Set dst (MulVD src1 src2));
20762   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
20763   ins_encode %{
20764     int vlen_enc = vector_length_encoding(this);
20765     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20766   %}
20767   ins_pipe( pipe_slow );
20768 %}
20769 
20770 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20771   predicate((UseAVX > 0) &&
20772             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20773   match(Set dst (MulVD src (LoadVector mem)));
20774   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
20775   ins_encode %{
20776     int vlen_enc = vector_length_encoding(this);
20777     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20778   %}
20779   ins_pipe( pipe_slow );
20780 %}
20781 
20782 // --------------------------------- DIV --------------------------------------
20783 
20784 // Floats vector div
20785 instruct vdivF(vec dst, vec src) %{
20786   predicate(UseAVX == 0);
20787   match(Set dst (DivVF dst src));
20788   format %{ "divps   $dst,$src\t! div packedF" %}
20789   ins_encode %{
20790     __ divps($dst$$XMMRegister, $src$$XMMRegister);
20791   %}
20792   ins_pipe( pipe_slow );
20793 %}
20794 
20795 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20796   predicate(UseAVX > 0);
20797   match(Set dst (DivVF src1 src2));
20798   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
20799   ins_encode %{
20800     int vlen_enc = vector_length_encoding(this);
20801     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20802   %}
20803   ins_pipe( pipe_slow );
20804 %}
20805 
20806 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20807   predicate((UseAVX > 0) &&
20808             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20809   match(Set dst (DivVF src (LoadVector mem)));
20810   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
20811   ins_encode %{
20812     int vlen_enc = vector_length_encoding(this);
20813     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20814   %}
20815   ins_pipe( pipe_slow );
20816 %}
20817 
20818 // Doubles vector div
20819 instruct vdivD(vec dst, vec src) %{
20820   predicate(UseAVX == 0);
20821   match(Set dst (DivVD dst src));
20822   format %{ "divpd   $dst,$src\t! div packedD" %}
20823   ins_encode %{
20824     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20825   %}
20826   ins_pipe( pipe_slow );
20827 %}
20828 
20829 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20830   predicate(UseAVX > 0);
20831   match(Set dst (DivVD src1 src2));
20832   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
20833   ins_encode %{
20834     int vlen_enc = vector_length_encoding(this);
20835     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20836   %}
20837   ins_pipe( pipe_slow );
20838 %}
20839 
20840 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20841   predicate((UseAVX > 0) &&
20842             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20843   match(Set dst (DivVD src (LoadVector mem)));
20844   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
20845   ins_encode %{
20846     int vlen_enc = vector_length_encoding(this);
20847     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20848   %}
20849   ins_pipe( pipe_slow );
20850 %}
20851 
20852 // ------------------------------ MinMax ---------------------------------------
20853 
20854 // Byte, Short, Int vector Min/Max
20855 instruct minmax_reg_sse(vec dst, vec src) %{
20856   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20857             UseAVX == 0);
20858   match(Set dst (MinV dst src));
20859   match(Set dst (MaxV dst src));
20860   format %{ "vector_minmax  $dst,$src\t!  " %}
20861   ins_encode %{
20862     assert(UseSSE >= 4, "required");
20863 
20864     int opcode = this->ideal_Opcode();
20865     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20866     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20867   %}
20868   ins_pipe( pipe_slow );
20869 %}
20870 
20871 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20872   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20873             UseAVX > 0);
20874   match(Set dst (MinV src1 src2));
20875   match(Set dst (MaxV src1 src2));
20876   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
20877   ins_encode %{
20878     int opcode = this->ideal_Opcode();
20879     int vlen_enc = vector_length_encoding(this);
20880     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20881 
20882     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20883   %}
20884   ins_pipe( pipe_slow );
20885 %}
20886 
20887 // Long vector Min/Max
20888 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20889   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20890             UseAVX == 0);
20891   match(Set dst (MinV dst src));
20892   match(Set dst (MaxV src dst));
20893   effect(TEMP dst, TEMP tmp);
20894   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
20895   ins_encode %{
20896     assert(UseSSE >= 4, "required");
20897 
20898     int opcode = this->ideal_Opcode();
20899     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20900     assert(elem_bt == T_LONG, "sanity");
20901 
20902     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20903   %}
20904   ins_pipe( pipe_slow );
20905 %}
20906 
20907 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20908   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20909             UseAVX > 0 && !VM_Version::supports_avx512vl());
20910   match(Set dst (MinV src1 src2));
20911   match(Set dst (MaxV src1 src2));
20912   effect(TEMP dst);
20913   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
20914   ins_encode %{
20915     int vlen_enc = vector_length_encoding(this);
20916     int opcode = this->ideal_Opcode();
20917     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20918     assert(elem_bt == T_LONG, "sanity");
20919 
20920     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20921   %}
20922   ins_pipe( pipe_slow );
20923 %}
20924 
20925 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20926   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20927             Matcher::vector_element_basic_type(n) == T_LONG);
20928   match(Set dst (MinV src1 src2));
20929   match(Set dst (MaxV src1 src2));
20930   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
20931   ins_encode %{
20932     assert(UseAVX > 2, "required");
20933 
20934     int vlen_enc = vector_length_encoding(this);
20935     int opcode = this->ideal_Opcode();
20936     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20937     assert(elem_bt == T_LONG, "sanity");
20938 
20939     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20940   %}
20941   ins_pipe( pipe_slow );
20942 %}
20943 
20944 // Float/Double vector Min/Max
20945 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20946   predicate(VM_Version::supports_avx10_2() &&
20947             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20948   match(Set dst (MinV a b));
20949   match(Set dst (MaxV a b));
20950   format %{ "vector_minmaxFP  $dst, $a, $b" %}
20951   ins_encode %{
20952     int vlen_enc = vector_length_encoding(this);
20953     int opcode = this->ideal_Opcode();
20954     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20955     __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20956   %}
20957   ins_pipe( pipe_slow );
20958 %}
20959 
20960 // Float/Double vector Min/Max
20961 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20962   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20963             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20964             UseAVX > 0);
20965   match(Set dst (MinV a b));
20966   match(Set dst (MaxV a b));
20967   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20968   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20969   ins_encode %{
20970     assert(UseAVX > 0, "required");
20971 
20972     int opcode = this->ideal_Opcode();
20973     int vlen_enc = vector_length_encoding(this);
20974     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20975 
20976     __ vminmax_fp(opcode, elem_bt,
20977                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20978                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20979   %}
20980   ins_pipe( pipe_slow );
20981 %}
20982 
20983 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20984   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20985             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20986   match(Set dst (MinV a b));
20987   match(Set dst (MaxV a b));
20988   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20989   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20990   ins_encode %{
20991     assert(UseAVX > 2, "required");
20992 
20993     int opcode = this->ideal_Opcode();
20994     int vlen_enc = vector_length_encoding(this);
20995     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20996 
20997     __ evminmax_fp(opcode, elem_bt,
20998                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20999                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21000   %}
21001   ins_pipe( pipe_slow );
21002 %}
21003 
21004 // ------------------------------ Unsigned vector Min/Max ----------------------
21005 
21006 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21007   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21008   match(Set dst (UMinV a b));
21009   match(Set dst (UMaxV a b));
21010   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21011   ins_encode %{
21012     int opcode = this->ideal_Opcode();
21013     int vlen_enc = vector_length_encoding(this);
21014     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21015     assert(is_integral_type(elem_bt), "");
21016     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21017   %}
21018   ins_pipe( pipe_slow );
21019 %}
21020 
21021 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21022   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21023   match(Set dst (UMinV a (LoadVector b)));
21024   match(Set dst (UMaxV a (LoadVector b)));
21025   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21026   ins_encode %{
21027     int opcode = this->ideal_Opcode();
21028     int vlen_enc = vector_length_encoding(this);
21029     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21030     assert(is_integral_type(elem_bt), "");
21031     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21032   %}
21033   ins_pipe( pipe_slow );
21034 %}
21035 
21036 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21037   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21038   match(Set dst (UMinV a b));
21039   match(Set dst (UMaxV a b));
21040   effect(TEMP xtmp1, TEMP xtmp2);
21041   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21042   ins_encode %{
21043     int opcode = this->ideal_Opcode();
21044     int vlen_enc = vector_length_encoding(this);
21045     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21046   %}
21047   ins_pipe( pipe_slow );
21048 %}
21049 
21050 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21051   match(Set dst (UMinV (Binary dst src2) mask));
21052   match(Set dst (UMaxV (Binary dst src2) mask));
21053   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21054   ins_encode %{
21055     int vlen_enc = vector_length_encoding(this);
21056     BasicType bt = Matcher::vector_element_basic_type(this);
21057     int opc = this->ideal_Opcode();
21058     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21059                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21060   %}
21061   ins_pipe( pipe_slow );
21062 %}
21063 
21064 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21065   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21066   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21067   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21068   ins_encode %{
21069     int vlen_enc = vector_length_encoding(this);
21070     BasicType bt = Matcher::vector_element_basic_type(this);
21071     int opc = this->ideal_Opcode();
21072     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21073                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21074   %}
21075   ins_pipe( pipe_slow );
21076 %}
21077 
21078 // --------------------------------- Signum/CopySign ---------------------------
21079 
21080 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21081   match(Set dst (SignumF dst (Binary zero one)));
21082   effect(KILL cr);
21083   format %{ "signumF $dst, $dst" %}
21084   ins_encode %{
21085     int opcode = this->ideal_Opcode();
21086     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21087   %}
21088   ins_pipe( pipe_slow );
21089 %}
21090 
21091 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21092   match(Set dst (SignumD dst (Binary zero one)));
21093   effect(KILL cr);
21094   format %{ "signumD $dst, $dst" %}
21095   ins_encode %{
21096     int opcode = this->ideal_Opcode();
21097     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21098   %}
21099   ins_pipe( pipe_slow );
21100 %}
21101 
21102 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21103   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21104   match(Set dst (SignumVF src (Binary zero one)));
21105   match(Set dst (SignumVD src (Binary zero one)));
21106   effect(TEMP dst, TEMP xtmp1);
21107   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21108   ins_encode %{
21109     int opcode = this->ideal_Opcode();
21110     int vec_enc = vector_length_encoding(this);
21111     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21112                          $xtmp1$$XMMRegister, vec_enc);
21113   %}
21114   ins_pipe( pipe_slow );
21115 %}
21116 
21117 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21118   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21119   match(Set dst (SignumVF src (Binary zero one)));
21120   match(Set dst (SignumVD src (Binary zero one)));
21121   effect(TEMP dst, TEMP ktmp1);
21122   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21123   ins_encode %{
21124     int opcode = this->ideal_Opcode();
21125     int vec_enc = vector_length_encoding(this);
21126     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21127                           $ktmp1$$KRegister, vec_enc);
21128   %}
21129   ins_pipe( pipe_slow );
21130 %}
21131 
21132 // ---------------------------------------
21133 // For copySign use 0xE4 as writemask for vpternlog
21134 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21135 // C (xmm2) is set to 0x7FFFFFFF
21136 // Wherever xmm2 is 0, we want to pick from B (sign)
21137 // Wherever xmm2 is 1, we want to pick from A (src)
21138 //
21139 // A B C Result
21140 // 0 0 0 0
21141 // 0 0 1 0
21142 // 0 1 0 1
21143 // 0 1 1 0
21144 // 1 0 0 0
21145 // 1 0 1 1
21146 // 1 1 0 1
21147 // 1 1 1 1
21148 //
21149 // Result going from high bit to low bit is 0x11100100 = 0xe4
21150 // ---------------------------------------
21151 
21152 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21153   match(Set dst (CopySignF dst src));
21154   effect(TEMP tmp1, TEMP tmp2);
21155   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21156   ins_encode %{
21157     __ movl($tmp2$$Register, 0x7FFFFFFF);
21158     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21159     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21160   %}
21161   ins_pipe( pipe_slow );
21162 %}
21163 
21164 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21165   match(Set dst (CopySignD dst (Binary src zero)));
21166   ins_cost(100);
21167   effect(TEMP tmp1, TEMP tmp2);
21168   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21169   ins_encode %{
21170     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21171     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21172     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21173   %}
21174   ins_pipe( pipe_slow );
21175 %}
21176 
21177 //----------------------------- CompressBits/ExpandBits ------------------------
21178 
21179 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21180   predicate(n->bottom_type()->isa_int());
21181   match(Set dst (CompressBits src mask));
21182   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21183   ins_encode %{
21184     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21185   %}
21186   ins_pipe( pipe_slow );
21187 %}
21188 
21189 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21190   predicate(n->bottom_type()->isa_int());
21191   match(Set dst (ExpandBits src mask));
21192   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21193   ins_encode %{
21194     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21195   %}
21196   ins_pipe( pipe_slow );
21197 %}
21198 
21199 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21200   predicate(n->bottom_type()->isa_int());
21201   match(Set dst (CompressBits src (LoadI mask)));
21202   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21203   ins_encode %{
21204     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21205   %}
21206   ins_pipe( pipe_slow );
21207 %}
21208 
21209 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21210   predicate(n->bottom_type()->isa_int());
21211   match(Set dst (ExpandBits src (LoadI mask)));
21212   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21213   ins_encode %{
21214     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21215   %}
21216   ins_pipe( pipe_slow );
21217 %}
21218 
21219 // --------------------------------- Sqrt --------------------------------------
21220 
21221 instruct vsqrtF_reg(vec dst, vec src) %{
21222   match(Set dst (SqrtVF src));
21223   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
21224   ins_encode %{
21225     assert(UseAVX > 0, "required");
21226     int vlen_enc = vector_length_encoding(this);
21227     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21228   %}
21229   ins_pipe( pipe_slow );
21230 %}
21231 
21232 instruct vsqrtF_mem(vec dst, memory mem) %{
21233   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21234   match(Set dst (SqrtVF (LoadVector mem)));
21235   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
21236   ins_encode %{
21237     assert(UseAVX > 0, "required");
21238     int vlen_enc = vector_length_encoding(this);
21239     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21240   %}
21241   ins_pipe( pipe_slow );
21242 %}
21243 
21244 // Floating point vector sqrt
21245 instruct vsqrtD_reg(vec dst, vec src) %{
21246   match(Set dst (SqrtVD src));
21247   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
21248   ins_encode %{
21249     assert(UseAVX > 0, "required");
21250     int vlen_enc = vector_length_encoding(this);
21251     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21252   %}
21253   ins_pipe( pipe_slow );
21254 %}
21255 
21256 instruct vsqrtD_mem(vec dst, memory mem) %{
21257   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21258   match(Set dst (SqrtVD (LoadVector mem)));
21259   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
21260   ins_encode %{
21261     assert(UseAVX > 0, "required");
21262     int vlen_enc = vector_length_encoding(this);
21263     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21264   %}
21265   ins_pipe( pipe_slow );
21266 %}
21267 
21268 // ------------------------------ Shift ---------------------------------------
21269 
21270 // Left and right shift count vectors are the same on x86
21271 // (only lowest bits of xmm reg are used for count).
21272 instruct vshiftcnt(vec dst, rRegI cnt) %{
21273   match(Set dst (LShiftCntV cnt));
21274   match(Set dst (RShiftCntV cnt));
21275   format %{ "movdl    $dst,$cnt\t! load shift count" %}
21276   ins_encode %{
21277     __ movdl($dst$$XMMRegister, $cnt$$Register);
21278   %}
21279   ins_pipe( pipe_slow );
21280 %}
21281 
21282 // Byte vector shift
21283 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21284   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21285   match(Set dst ( LShiftVB src shift));
21286   match(Set dst ( RShiftVB src shift));
21287   match(Set dst (URShiftVB src shift));
21288   effect(TEMP dst, USE src, USE shift, TEMP tmp);
21289   format %{"vector_byte_shift $dst,$src,$shift" %}
21290   ins_encode %{
21291     assert(UseSSE > 3, "required");
21292     int opcode = this->ideal_Opcode();
21293     bool sign = (opcode != Op_URShiftVB);
21294     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21295     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21296     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21297     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21298     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21299   %}
21300   ins_pipe( pipe_slow );
21301 %}
21302 
21303 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21304   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21305             UseAVX <= 1);
21306   match(Set dst ( LShiftVB src shift));
21307   match(Set dst ( RShiftVB src shift));
21308   match(Set dst (URShiftVB src shift));
21309   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21310   format %{"vector_byte_shift $dst,$src,$shift" %}
21311   ins_encode %{
21312     assert(UseSSE > 3, "required");
21313     int opcode = this->ideal_Opcode();
21314     bool sign = (opcode != Op_URShiftVB);
21315     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21316     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21317     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21318     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21319     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21320     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21321     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21322     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21323     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21324   %}
21325   ins_pipe( pipe_slow );
21326 %}
21327 
21328 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21329   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21330             UseAVX > 1);
21331   match(Set dst ( LShiftVB src shift));
21332   match(Set dst ( RShiftVB src shift));
21333   match(Set dst (URShiftVB src shift));
21334   effect(TEMP dst, TEMP tmp);
21335   format %{"vector_byte_shift $dst,$src,$shift" %}
21336   ins_encode %{
21337     int opcode = this->ideal_Opcode();
21338     bool sign = (opcode != Op_URShiftVB);
21339     int vlen_enc = Assembler::AVX_256bit;
21340     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21341     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21342     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21343     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21344     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21345   %}
21346   ins_pipe( pipe_slow );
21347 %}
21348 
21349 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21350   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21351   match(Set dst ( LShiftVB src shift));
21352   match(Set dst ( RShiftVB src shift));
21353   match(Set dst (URShiftVB src shift));
21354   effect(TEMP dst, TEMP tmp);
21355   format %{"vector_byte_shift $dst,$src,$shift" %}
21356   ins_encode %{
21357     assert(UseAVX > 1, "required");
21358     int opcode = this->ideal_Opcode();
21359     bool sign = (opcode != Op_URShiftVB);
21360     int vlen_enc = Assembler::AVX_256bit;
21361     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21362     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21363     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21364     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21365     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21366     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21367     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21368     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21369     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21370   %}
21371   ins_pipe( pipe_slow );
21372 %}
21373 
21374 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21375   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21376   match(Set dst ( LShiftVB src shift));
21377   match(Set dst  (RShiftVB src shift));
21378   match(Set dst (URShiftVB src shift));
21379   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21380   format %{"vector_byte_shift $dst,$src,$shift" %}
21381   ins_encode %{
21382     assert(UseAVX > 2, "required");
21383     int opcode = this->ideal_Opcode();
21384     bool sign = (opcode != Op_URShiftVB);
21385     int vlen_enc = Assembler::AVX_512bit;
21386     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21387     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21388     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21389     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21390     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21391     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21392     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21393     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21394     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21395     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21396     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21397     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21398   %}
21399   ins_pipe( pipe_slow );
21400 %}
21401 
21402 // Shorts vector logical right shift produces incorrect Java result
21403 // for negative data because java code convert short value into int with
21404 // sign extension before a shift. But char vectors are fine since chars are
21405 // unsigned values.
21406 // Shorts/Chars vector left shift
21407 instruct vshiftS(vec dst, vec src, vec shift) %{
21408   predicate(!n->as_ShiftV()->is_var_shift());
21409   match(Set dst ( LShiftVS src shift));
21410   match(Set dst ( RShiftVS src shift));
21411   match(Set dst (URShiftVS src shift));
21412   effect(TEMP dst, USE src, USE shift);
21413   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
21414   ins_encode %{
21415     int opcode = this->ideal_Opcode();
21416     if (UseAVX > 0) {
21417       int vlen_enc = vector_length_encoding(this);
21418       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21419     } else {
21420       int vlen = Matcher::vector_length(this);
21421       if (vlen == 2) {
21422         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21423         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21424       } else if (vlen == 4) {
21425         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21426         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21427       } else {
21428         assert (vlen == 8, "sanity");
21429         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21430         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21431       }
21432     }
21433   %}
21434   ins_pipe( pipe_slow );
21435 %}
21436 
21437 // Integers vector left shift
21438 instruct vshiftI(vec dst, vec src, vec shift) %{
21439   predicate(!n->as_ShiftV()->is_var_shift());
21440   match(Set dst ( LShiftVI src shift));
21441   match(Set dst ( RShiftVI src shift));
21442   match(Set dst (URShiftVI src shift));
21443   effect(TEMP dst, USE src, USE shift);
21444   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
21445   ins_encode %{
21446     int opcode = this->ideal_Opcode();
21447     if (UseAVX > 0) {
21448       int vlen_enc = vector_length_encoding(this);
21449       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21450     } else {
21451       int vlen = Matcher::vector_length(this);
21452       if (vlen == 2) {
21453         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21454         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21455       } else {
21456         assert(vlen == 4, "sanity");
21457         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21458         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21459       }
21460     }
21461   %}
21462   ins_pipe( pipe_slow );
21463 %}
21464 
21465 // Integers vector left constant shift
21466 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21467   match(Set dst (LShiftVI src (LShiftCntV shift)));
21468   match(Set dst (RShiftVI src (RShiftCntV shift)));
21469   match(Set dst (URShiftVI src (RShiftCntV shift)));
21470   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
21471   ins_encode %{
21472     int opcode = this->ideal_Opcode();
21473     if (UseAVX > 0) {
21474       int vector_len = vector_length_encoding(this);
21475       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21476     } else {
21477       int vlen = Matcher::vector_length(this);
21478       if (vlen == 2) {
21479         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21480         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21481       } else {
21482         assert(vlen == 4, "sanity");
21483         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21484         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21485       }
21486     }
21487   %}
21488   ins_pipe( pipe_slow );
21489 %}
21490 
21491 // Longs vector shift
21492 instruct vshiftL(vec dst, vec src, vec shift) %{
21493   predicate(!n->as_ShiftV()->is_var_shift());
21494   match(Set dst ( LShiftVL src shift));
21495   match(Set dst (URShiftVL src shift));
21496   effect(TEMP dst, USE src, USE shift);
21497   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
21498   ins_encode %{
21499     int opcode = this->ideal_Opcode();
21500     if (UseAVX > 0) {
21501       int vlen_enc = vector_length_encoding(this);
21502       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21503     } else {
21504       assert(Matcher::vector_length(this) == 2, "");
21505       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21506       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21507     }
21508   %}
21509   ins_pipe( pipe_slow );
21510 %}
21511 
21512 // Longs vector constant shift
21513 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21514   match(Set dst (LShiftVL src (LShiftCntV shift)));
21515   match(Set dst (URShiftVL src (RShiftCntV shift)));
21516   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
21517   ins_encode %{
21518     int opcode = this->ideal_Opcode();
21519     if (UseAVX > 0) {
21520       int vector_len = vector_length_encoding(this);
21521       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21522     } else {
21523       assert(Matcher::vector_length(this) == 2, "");
21524       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21525       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21526     }
21527   %}
21528   ins_pipe( pipe_slow );
21529 %}
21530 
21531 // -------------------ArithmeticRightShift -----------------------------------
21532 // Long vector arithmetic right shift
21533 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21534   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21535   match(Set dst (RShiftVL src shift));
21536   effect(TEMP dst, TEMP tmp);
21537   format %{ "vshiftq $dst,$src,$shift" %}
21538   ins_encode %{
21539     uint vlen = Matcher::vector_length(this);
21540     if (vlen == 2) {
21541       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21542       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21543       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21544       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21545       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21546       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21547     } else {
21548       assert(vlen == 4, "sanity");
21549       assert(UseAVX > 1, "required");
21550       int vlen_enc = Assembler::AVX_256bit;
21551       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21552       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21553       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21554       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21555       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21556     }
21557   %}
21558   ins_pipe( pipe_slow );
21559 %}
21560 
21561 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21562   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21563   match(Set dst (RShiftVL src shift));
21564   format %{ "vshiftq $dst,$src,$shift" %}
21565   ins_encode %{
21566     int vlen_enc = vector_length_encoding(this);
21567     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21568   %}
21569   ins_pipe( pipe_slow );
21570 %}
21571 
21572 // ------------------- Variable Shift -----------------------------
21573 // Byte variable shift
21574 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21575   predicate(Matcher::vector_length(n) <= 8 &&
21576             n->as_ShiftV()->is_var_shift() &&
21577             !VM_Version::supports_avx512bw());
21578   match(Set dst ( LShiftVB src shift));
21579   match(Set dst ( RShiftVB src shift));
21580   match(Set dst (URShiftVB src shift));
21581   effect(TEMP dst, TEMP vtmp);
21582   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21583   ins_encode %{
21584     assert(UseAVX >= 2, "required");
21585 
21586     int opcode = this->ideal_Opcode();
21587     int vlen_enc = Assembler::AVX_128bit;
21588     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21589     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21590   %}
21591   ins_pipe( pipe_slow );
21592 %}
21593 
21594 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21595   predicate(Matcher::vector_length(n) == 16 &&
21596             n->as_ShiftV()->is_var_shift() &&
21597             !VM_Version::supports_avx512bw());
21598   match(Set dst ( LShiftVB src shift));
21599   match(Set dst ( RShiftVB src shift));
21600   match(Set dst (URShiftVB src shift));
21601   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21602   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21603   ins_encode %{
21604     assert(UseAVX >= 2, "required");
21605 
21606     int opcode = this->ideal_Opcode();
21607     int vlen_enc = Assembler::AVX_128bit;
21608     // Shift lower half and get word result in dst
21609     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21610 
21611     // Shift upper half and get word result in vtmp1
21612     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21613     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21614     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21615 
21616     // Merge and down convert the two word results to byte in dst
21617     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21618   %}
21619   ins_pipe( pipe_slow );
21620 %}
21621 
21622 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21623   predicate(Matcher::vector_length(n) == 32 &&
21624             n->as_ShiftV()->is_var_shift() &&
21625             !VM_Version::supports_avx512bw());
21626   match(Set dst ( LShiftVB src shift));
21627   match(Set dst ( RShiftVB src shift));
21628   match(Set dst (URShiftVB src shift));
21629   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21630   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21631   ins_encode %{
21632     assert(UseAVX >= 2, "required");
21633 
21634     int opcode = this->ideal_Opcode();
21635     int vlen_enc = Assembler::AVX_128bit;
21636     // Process lower 128 bits and get result in dst
21637     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21638     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21639     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21640     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21641     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21642 
21643     // Process higher 128 bits and get result in vtmp3
21644     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21645     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21646     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21647     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21648     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21649     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21650     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21651 
21652     // Merge the two results in dst
21653     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21654   %}
21655   ins_pipe( pipe_slow );
21656 %}
21657 
21658 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21659   predicate(Matcher::vector_length(n) <= 32 &&
21660             n->as_ShiftV()->is_var_shift() &&
21661             VM_Version::supports_avx512bw());
21662   match(Set dst ( LShiftVB src shift));
21663   match(Set dst ( RShiftVB src shift));
21664   match(Set dst (URShiftVB src shift));
21665   effect(TEMP dst, TEMP vtmp);
21666   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21667   ins_encode %{
21668     assert(UseAVX > 2, "required");
21669 
21670     int opcode = this->ideal_Opcode();
21671     int vlen_enc = vector_length_encoding(this);
21672     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21673   %}
21674   ins_pipe( pipe_slow );
21675 %}
21676 
21677 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21678   predicate(Matcher::vector_length(n) == 64 &&
21679             n->as_ShiftV()->is_var_shift() &&
21680             VM_Version::supports_avx512bw());
21681   match(Set dst ( LShiftVB src shift));
21682   match(Set dst ( RShiftVB src shift));
21683   match(Set dst (URShiftVB src shift));
21684   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21685   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21686   ins_encode %{
21687     assert(UseAVX > 2, "required");
21688 
21689     int opcode = this->ideal_Opcode();
21690     int vlen_enc = Assembler::AVX_256bit;
21691     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21692     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21693     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21694     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21695     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21696   %}
21697   ins_pipe( pipe_slow );
21698 %}
21699 
21700 // Short variable shift
21701 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21702   predicate(Matcher::vector_length(n) <= 8 &&
21703             n->as_ShiftV()->is_var_shift() &&
21704             !VM_Version::supports_avx512bw());
21705   match(Set dst ( LShiftVS src shift));
21706   match(Set dst ( RShiftVS src shift));
21707   match(Set dst (URShiftVS src shift));
21708   effect(TEMP dst, TEMP vtmp);
21709   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21710   ins_encode %{
21711     assert(UseAVX >= 2, "required");
21712 
21713     int opcode = this->ideal_Opcode();
21714     bool sign = (opcode != Op_URShiftVS);
21715     int vlen_enc = Assembler::AVX_256bit;
21716     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21717     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21718     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21719     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21720     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21721     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21722   %}
21723   ins_pipe( pipe_slow );
21724 %}
21725 
21726 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21727   predicate(Matcher::vector_length(n) == 16 &&
21728             n->as_ShiftV()->is_var_shift() &&
21729             !VM_Version::supports_avx512bw());
21730   match(Set dst ( LShiftVS src shift));
21731   match(Set dst ( RShiftVS src shift));
21732   match(Set dst (URShiftVS src shift));
21733   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21734   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21735   ins_encode %{
21736     assert(UseAVX >= 2, "required");
21737 
21738     int opcode = this->ideal_Opcode();
21739     bool sign = (opcode != Op_URShiftVS);
21740     int vlen_enc = Assembler::AVX_256bit;
21741     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21742     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21743     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21744     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21745     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21746 
21747     // Shift upper half, with result in dst using vtmp1 as TEMP
21748     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21749     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21750     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21751     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21752     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21753     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21754 
21755     // Merge lower and upper half result into dst
21756     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21757     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21758   %}
21759   ins_pipe( pipe_slow );
21760 %}
21761 
21762 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21763   predicate(n->as_ShiftV()->is_var_shift() &&
21764             VM_Version::supports_avx512bw());
21765   match(Set dst ( LShiftVS src shift));
21766   match(Set dst ( RShiftVS src shift));
21767   match(Set dst (URShiftVS src shift));
21768   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21769   ins_encode %{
21770     assert(UseAVX > 2, "required");
21771 
21772     int opcode = this->ideal_Opcode();
21773     int vlen_enc = vector_length_encoding(this);
21774     if (!VM_Version::supports_avx512vl()) {
21775       vlen_enc = Assembler::AVX_512bit;
21776     }
21777     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21778   %}
21779   ins_pipe( pipe_slow );
21780 %}
21781 
21782 //Integer variable shift
21783 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21784   predicate(n->as_ShiftV()->is_var_shift());
21785   match(Set dst ( LShiftVI src shift));
21786   match(Set dst ( RShiftVI src shift));
21787   match(Set dst (URShiftVI src shift));
21788   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21789   ins_encode %{
21790     assert(UseAVX >= 2, "required");
21791 
21792     int opcode = this->ideal_Opcode();
21793     int vlen_enc = vector_length_encoding(this);
21794     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21795   %}
21796   ins_pipe( pipe_slow );
21797 %}
21798 
21799 //Long variable shift
21800 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21801   predicate(n->as_ShiftV()->is_var_shift());
21802   match(Set dst ( LShiftVL src shift));
21803   match(Set dst (URShiftVL src shift));
21804   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21805   ins_encode %{
21806     assert(UseAVX >= 2, "required");
21807 
21808     int opcode = this->ideal_Opcode();
21809     int vlen_enc = vector_length_encoding(this);
21810     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21811   %}
21812   ins_pipe( pipe_slow );
21813 %}
21814 
21815 //Long variable right shift arithmetic
21816 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21817   predicate(Matcher::vector_length(n) <= 4 &&
21818             n->as_ShiftV()->is_var_shift() &&
21819             UseAVX == 2);
21820   match(Set dst (RShiftVL src shift));
21821   effect(TEMP dst, TEMP vtmp);
21822   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21823   ins_encode %{
21824     int opcode = this->ideal_Opcode();
21825     int vlen_enc = vector_length_encoding(this);
21826     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21827                  $vtmp$$XMMRegister);
21828   %}
21829   ins_pipe( pipe_slow );
21830 %}
21831 
21832 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21833   predicate(n->as_ShiftV()->is_var_shift() &&
21834             UseAVX > 2);
21835   match(Set dst (RShiftVL src shift));
21836   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21837   ins_encode %{
21838     int opcode = this->ideal_Opcode();
21839     int vlen_enc = vector_length_encoding(this);
21840     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21841   %}
21842   ins_pipe( pipe_slow );
21843 %}
21844 
21845 // --------------------------------- AND --------------------------------------
21846 
21847 instruct vand(vec dst, vec src) %{
21848   predicate(UseAVX == 0);
21849   match(Set dst (AndV dst src));
21850   format %{ "pand    $dst,$src\t! and vectors" %}
21851   ins_encode %{
21852     __ pand($dst$$XMMRegister, $src$$XMMRegister);
21853   %}
21854   ins_pipe( pipe_slow );
21855 %}
21856 
21857 instruct vand_reg(vec dst, vec src1, vec src2) %{
21858   predicate(UseAVX > 0);
21859   match(Set dst (AndV src1 src2));
21860   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
21861   ins_encode %{
21862     int vlen_enc = vector_length_encoding(this);
21863     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21864   %}
21865   ins_pipe( pipe_slow );
21866 %}
21867 
21868 instruct vand_mem(vec dst, vec src, memory mem) %{
21869   predicate((UseAVX > 0) &&
21870             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21871   match(Set dst (AndV src (LoadVector mem)));
21872   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
21873   ins_encode %{
21874     int vlen_enc = vector_length_encoding(this);
21875     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21876   %}
21877   ins_pipe( pipe_slow );
21878 %}
21879 
21880 // --------------------------------- OR ---------------------------------------
21881 
21882 instruct vor(vec dst, vec src) %{
21883   predicate(UseAVX == 0);
21884   match(Set dst (OrV dst src));
21885   format %{ "por     $dst,$src\t! or vectors" %}
21886   ins_encode %{
21887     __ por($dst$$XMMRegister, $src$$XMMRegister);
21888   %}
21889   ins_pipe( pipe_slow );
21890 %}
21891 
21892 instruct vor_reg(vec dst, vec src1, vec src2) %{
21893   predicate(UseAVX > 0);
21894   match(Set dst (OrV src1 src2));
21895   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
21896   ins_encode %{
21897     int vlen_enc = vector_length_encoding(this);
21898     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21899   %}
21900   ins_pipe( pipe_slow );
21901 %}
21902 
21903 instruct vor_mem(vec dst, vec src, memory mem) %{
21904   predicate((UseAVX > 0) &&
21905             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21906   match(Set dst (OrV src (LoadVector mem)));
21907   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
21908   ins_encode %{
21909     int vlen_enc = vector_length_encoding(this);
21910     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21911   %}
21912   ins_pipe( pipe_slow );
21913 %}
21914 
21915 // --------------------------------- XOR --------------------------------------
21916 
21917 instruct vxor(vec dst, vec src) %{
21918   predicate(UseAVX == 0);
21919   match(Set dst (XorV dst src));
21920   format %{ "pxor    $dst,$src\t! xor vectors" %}
21921   ins_encode %{
21922     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21923   %}
21924   ins_pipe( pipe_slow );
21925 %}
21926 
21927 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21928   predicate(UseAVX > 0);
21929   match(Set dst (XorV src1 src2));
21930   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
21931   ins_encode %{
21932     int vlen_enc = vector_length_encoding(this);
21933     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21934   %}
21935   ins_pipe( pipe_slow );
21936 %}
21937 
21938 instruct vxor_mem(vec dst, vec src, memory mem) %{
21939   predicate((UseAVX > 0) &&
21940             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21941   match(Set dst (XorV src (LoadVector mem)));
21942   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
21943   ins_encode %{
21944     int vlen_enc = vector_length_encoding(this);
21945     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21946   %}
21947   ins_pipe( pipe_slow );
21948 %}
21949 
21950 // --------------------------------- VectorCast --------------------------------------
21951 
21952 instruct vcastBtoX(vec dst, vec src) %{
21953   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21954   match(Set dst (VectorCastB2X src));
21955   format %{ "vector_cast_b2x $dst,$src\t!" %}
21956   ins_encode %{
21957     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21958     int vlen_enc = vector_length_encoding(this);
21959     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21960   %}
21961   ins_pipe( pipe_slow );
21962 %}
21963 
21964 instruct vcastBtoD(legVec dst, legVec src) %{
21965   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21966   match(Set dst (VectorCastB2X src));
21967   format %{ "vector_cast_b2x $dst,$src\t!" %}
21968   ins_encode %{
21969     int vlen_enc = vector_length_encoding(this);
21970     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21971   %}
21972   ins_pipe( pipe_slow );
21973 %}
21974 
21975 instruct castStoX(vec dst, vec src) %{
21976   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21977             Matcher::vector_length(n->in(1)) <= 8 && // src
21978             Matcher::vector_element_basic_type(n) == T_BYTE);
21979   match(Set dst (VectorCastS2X src));
21980   format %{ "vector_cast_s2x $dst,$src" %}
21981   ins_encode %{
21982     assert(UseAVX > 0, "required");
21983 
21984     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21985     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21986   %}
21987   ins_pipe( pipe_slow );
21988 %}
21989 
21990 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21991   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21992             Matcher::vector_length(n->in(1)) == 16 && // src
21993             Matcher::vector_element_basic_type(n) == T_BYTE);
21994   effect(TEMP dst, TEMP vtmp);
21995   match(Set dst (VectorCastS2X src));
21996   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21997   ins_encode %{
21998     assert(UseAVX > 0, "required");
21999 
22000     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22001     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22002     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22003     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22004   %}
22005   ins_pipe( pipe_slow );
22006 %}
22007 
22008 instruct vcastStoX_evex(vec dst, vec src) %{
22009   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22010             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22011   match(Set dst (VectorCastS2X src));
22012   format %{ "vector_cast_s2x $dst,$src\t!" %}
22013   ins_encode %{
22014     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22015     int src_vlen_enc = vector_length_encoding(this, $src);
22016     int vlen_enc = vector_length_encoding(this);
22017     switch (to_elem_bt) {
22018       case T_BYTE:
22019         if (!VM_Version::supports_avx512vl()) {
22020           vlen_enc = Assembler::AVX_512bit;
22021         }
22022         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22023         break;
22024       case T_INT:
22025         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22026         break;
22027       case T_FLOAT:
22028         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22029         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22030         break;
22031       case T_LONG:
22032         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22033         break;
22034       case T_DOUBLE: {
22035         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22036         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22037         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22038         break;
22039       }
22040       default:
22041         ShouldNotReachHere();
22042     }
22043   %}
22044   ins_pipe( pipe_slow );
22045 %}
22046 
22047 instruct castItoX(vec dst, vec src) %{
22048   predicate(UseAVX <= 2 &&
22049             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22050             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22051   match(Set dst (VectorCastI2X src));
22052   format %{ "vector_cast_i2x $dst,$src" %}
22053   ins_encode %{
22054     assert(UseAVX > 0, "required");
22055 
22056     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22057     int vlen_enc = vector_length_encoding(this, $src);
22058 
22059     if (to_elem_bt == T_BYTE) {
22060       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22061       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22062       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22063     } else {
22064       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22065       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22066       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22067     }
22068   %}
22069   ins_pipe( pipe_slow );
22070 %}
22071 
22072 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22073   predicate(UseAVX <= 2 &&
22074             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22075             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22076   match(Set dst (VectorCastI2X src));
22077   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22078   effect(TEMP dst, TEMP vtmp);
22079   ins_encode %{
22080     assert(UseAVX > 0, "required");
22081 
22082     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22083     int vlen_enc = vector_length_encoding(this, $src);
22084 
22085     if (to_elem_bt == T_BYTE) {
22086       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22087       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22088       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22089       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22090     } else {
22091       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22092       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22093       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22094       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22095     }
22096   %}
22097   ins_pipe( pipe_slow );
22098 %}
22099 
22100 instruct vcastItoX_evex(vec dst, vec src) %{
22101   predicate(UseAVX > 2 ||
22102             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22103   match(Set dst (VectorCastI2X src));
22104   format %{ "vector_cast_i2x $dst,$src\t!" %}
22105   ins_encode %{
22106     assert(UseAVX > 0, "required");
22107 
22108     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22109     int src_vlen_enc = vector_length_encoding(this, $src);
22110     int dst_vlen_enc = vector_length_encoding(this);
22111     switch (dst_elem_bt) {
22112       case T_BYTE:
22113         if (!VM_Version::supports_avx512vl()) {
22114           src_vlen_enc = Assembler::AVX_512bit;
22115         }
22116         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22117         break;
22118       case T_SHORT:
22119         if (!VM_Version::supports_avx512vl()) {
22120           src_vlen_enc = Assembler::AVX_512bit;
22121         }
22122         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22123         break;
22124       case T_FLOAT:
22125         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22126         break;
22127       case T_LONG:
22128         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22129         break;
22130       case T_DOUBLE:
22131         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22132         break;
22133       default:
22134         ShouldNotReachHere();
22135     }
22136   %}
22137   ins_pipe( pipe_slow );
22138 %}
22139 
22140 instruct vcastLtoBS(vec dst, vec src) %{
22141   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22142             UseAVX <= 2);
22143   match(Set dst (VectorCastL2X src));
22144   format %{ "vector_cast_l2x  $dst,$src" %}
22145   ins_encode %{
22146     assert(UseAVX > 0, "required");
22147 
22148     int vlen = Matcher::vector_length_in_bytes(this, $src);
22149     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
22150     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22151                                                       : ExternalAddress(vector_int_to_short_mask());
22152     if (vlen <= 16) {
22153       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22154       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22155       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22156     } else {
22157       assert(vlen <= 32, "required");
22158       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22159       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22160       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22161       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22162     }
22163     if (to_elem_bt == T_BYTE) {
22164       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22165     }
22166   %}
22167   ins_pipe( pipe_slow );
22168 %}
22169 
22170 instruct vcastLtoX_evex(vec dst, vec src) %{
22171   predicate(UseAVX > 2 ||
22172             (Matcher::vector_element_basic_type(n) == T_INT ||
22173              Matcher::vector_element_basic_type(n) == T_FLOAT ||
22174              Matcher::vector_element_basic_type(n) == T_DOUBLE));
22175   match(Set dst (VectorCastL2X src));
22176   format %{ "vector_cast_l2x  $dst,$src\t!" %}
22177   ins_encode %{
22178     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22179     int vlen = Matcher::vector_length_in_bytes(this, $src);
22180     int vlen_enc = vector_length_encoding(this, $src);
22181     switch (to_elem_bt) {
22182       case T_BYTE:
22183         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22184           vlen_enc = Assembler::AVX_512bit;
22185         }
22186         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22187         break;
22188       case T_SHORT:
22189         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22190           vlen_enc = Assembler::AVX_512bit;
22191         }
22192         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22193         break;
22194       case T_INT:
22195         if (vlen == 8) {
22196           if ($dst$$XMMRegister != $src$$XMMRegister) {
22197             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22198           }
22199         } else if (vlen == 16) {
22200           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22201         } else if (vlen == 32) {
22202           if (UseAVX > 2) {
22203             if (!VM_Version::supports_avx512vl()) {
22204               vlen_enc = Assembler::AVX_512bit;
22205             }
22206             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22207           } else {
22208             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22209             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22210           }
22211         } else { // vlen == 64
22212           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22213         }
22214         break;
22215       case T_FLOAT:
22216         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22217         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22218         break;
22219       case T_DOUBLE:
22220         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22221         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22222         break;
22223 
22224       default: assert(false, "%s", type2name(to_elem_bt));
22225     }
22226   %}
22227   ins_pipe( pipe_slow );
22228 %}
22229 
22230 instruct vcastFtoD_reg(vec dst, vec src) %{
22231   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22232   match(Set dst (VectorCastF2X src));
22233   format %{ "vector_cast_f2d  $dst,$src\t!" %}
22234   ins_encode %{
22235     int vlen_enc = vector_length_encoding(this);
22236     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22237   %}
22238   ins_pipe( pipe_slow );
22239 %}
22240 
22241 
22242 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22243   predicate(!VM_Version::supports_avx10_2() &&
22244             !VM_Version::supports_avx512vl() &&
22245             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22246             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22247             is_integral_type(Matcher::vector_element_basic_type(n)));
22248   match(Set dst (VectorCastF2X src));
22249   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22250   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22251   ins_encode %{
22252     int vlen_enc = vector_length_encoding(this, $src);
22253     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22254     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22255     // 32 bit addresses for register indirect addressing mode since stub constants
22256     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22257     // However, targets are free to increase this limit, but having a large code cache size
22258     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22259     // cap we save a temporary register allocation which in limiting case can prevent
22260     // spilling in high register pressure blocks.
22261     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22262                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22263                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22264   %}
22265   ins_pipe( pipe_slow );
22266 %}
22267 
22268 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22269   predicate(!VM_Version::supports_avx10_2() &&
22270             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22271             is_integral_type(Matcher::vector_element_basic_type(n)));
22272   match(Set dst (VectorCastF2X src));
22273   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22274   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22275   ins_encode %{
22276     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22277     if (to_elem_bt == T_LONG) {
22278       int vlen_enc = vector_length_encoding(this);
22279       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22280                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22281                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22282     } else {
22283       int vlen_enc = vector_length_encoding(this, $src);
22284       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22285                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22286                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22287     }
22288   %}
22289   ins_pipe( pipe_slow );
22290 %}
22291 
22292 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22293   predicate(VM_Version::supports_avx10_2() &&
22294             is_integral_type(Matcher::vector_element_basic_type(n)));
22295   match(Set dst (VectorCastF2X src));
22296   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22297   ins_encode %{
22298     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22299     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22300     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22301   %}
22302   ins_pipe( pipe_slow );
22303 %}
22304 
22305 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22306   predicate(VM_Version::supports_avx10_2() &&
22307             is_integral_type(Matcher::vector_element_basic_type(n)));
22308   match(Set dst (VectorCastF2X (LoadVector src)));
22309   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22310   ins_encode %{
22311     int vlen = Matcher::vector_length(this);
22312     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22313     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22314     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22315   %}
22316   ins_pipe( pipe_slow );
22317 %}
22318 
22319 instruct vcastDtoF_reg(vec dst, vec src) %{
22320   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22321   match(Set dst (VectorCastD2X src));
22322   format %{ "vector_cast_d2x  $dst,$src\t!" %}
22323   ins_encode %{
22324     int vlen_enc = vector_length_encoding(this, $src);
22325     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22326   %}
22327   ins_pipe( pipe_slow );
22328 %}
22329 
22330 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22331   predicate(!VM_Version::supports_avx10_2() &&
22332             !VM_Version::supports_avx512vl() &&
22333             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22334             is_integral_type(Matcher::vector_element_basic_type(n)));
22335   match(Set dst (VectorCastD2X src));
22336   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22337   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22338   ins_encode %{
22339     int vlen_enc = vector_length_encoding(this, $src);
22340     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22341     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22342                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22343                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22344   %}
22345   ins_pipe( pipe_slow );
22346 %}
22347 
22348 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22349   predicate(!VM_Version::supports_avx10_2() &&
22350             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22351             is_integral_type(Matcher::vector_element_basic_type(n)));
22352   match(Set dst (VectorCastD2X src));
22353   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22354   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22355   ins_encode %{
22356     int vlen_enc = vector_length_encoding(this, $src);
22357     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22358     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22359                               ExternalAddress(vector_float_signflip());
22360     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22361                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22362   %}
22363   ins_pipe( pipe_slow );
22364 %}
22365 
22366 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22367   predicate(VM_Version::supports_avx10_2() &&
22368             is_integral_type(Matcher::vector_element_basic_type(n)));
22369   match(Set dst (VectorCastD2X src));
22370   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22371   ins_encode %{
22372     int vlen_enc = vector_length_encoding(this, $src);
22373     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22374     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22375   %}
22376   ins_pipe( pipe_slow );
22377 %}
22378 
22379 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22380   predicate(VM_Version::supports_avx10_2() &&
22381             is_integral_type(Matcher::vector_element_basic_type(n)));
22382   match(Set dst (VectorCastD2X (LoadVector src)));
22383   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22384   ins_encode %{
22385     int vlen = Matcher::vector_length(this);
22386     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22387     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22388     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22389   %}
22390   ins_pipe( pipe_slow );
22391 %}
22392 
22393 instruct vucast(vec dst, vec src) %{
22394   match(Set dst (VectorUCastB2X src));
22395   match(Set dst (VectorUCastS2X src));
22396   match(Set dst (VectorUCastI2X src));
22397   format %{ "vector_ucast $dst,$src\t!" %}
22398   ins_encode %{
22399     assert(UseAVX > 0, "required");
22400 
22401     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22402     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22403     int vlen_enc = vector_length_encoding(this);
22404     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22405   %}
22406   ins_pipe( pipe_slow );
22407 %}
22408 
22409 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22410   predicate(!VM_Version::supports_avx512vl() &&
22411             Matcher::vector_length_in_bytes(n) < 64 &&
22412             Matcher::vector_element_basic_type(n) == T_INT);
22413   match(Set dst (RoundVF src));
22414   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22415   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22416   ins_encode %{
22417     int vlen_enc = vector_length_encoding(this);
22418     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22419     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22420                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22421                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22422   %}
22423   ins_pipe( pipe_slow );
22424 %}
22425 
22426 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22427   predicate((VM_Version::supports_avx512vl() ||
22428              Matcher::vector_length_in_bytes(n) == 64) &&
22429              Matcher::vector_element_basic_type(n) == T_INT);
22430   match(Set dst (RoundVF src));
22431   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22432   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22433   ins_encode %{
22434     int vlen_enc = vector_length_encoding(this);
22435     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22436     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22437                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22438                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22439   %}
22440   ins_pipe( pipe_slow );
22441 %}
22442 
22443 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22444   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22445   match(Set dst (RoundVD src));
22446   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
22447   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22448   ins_encode %{
22449     int vlen_enc = vector_length_encoding(this);
22450     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22451     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22452                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22453                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22454   %}
22455   ins_pipe( pipe_slow );
22456 %}
22457 
22458 // --------------------------------- VectorMaskCmp --------------------------------------
22459 
22460 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22461   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22462             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
22463             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22464             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22465   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22466   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22467   ins_encode %{
22468     int vlen_enc = vector_length_encoding(this, $src1);
22469     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22470     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22471       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22472     } else {
22473       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22474     }
22475   %}
22476   ins_pipe( pipe_slow );
22477 %}
22478 
22479 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22480   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22481             n->bottom_type()->isa_vectmask() == nullptr &&
22482             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22483   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22484   effect(TEMP ktmp);
22485   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22486   ins_encode %{
22487     int vlen_enc = Assembler::AVX_512bit;
22488     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22489     KRegister mask = k0; // The comparison itself is not being masked.
22490     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22491       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22492       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22493     } else {
22494       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22495       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22496     }
22497   %}
22498   ins_pipe( pipe_slow );
22499 %}
22500 
22501 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22502   predicate(n->bottom_type()->isa_vectmask() &&
22503             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22504   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22505   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22506   ins_encode %{
22507     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22508     int vlen_enc = vector_length_encoding(this, $src1);
22509     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22510     KRegister mask = k0; // The comparison itself is not being masked.
22511     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22512       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22513     } else {
22514       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22515     }
22516   %}
22517   ins_pipe( pipe_slow );
22518 %}
22519 
22520 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22521   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22522             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22523             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22524             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22525             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22526             (n->in(2)->get_int() == BoolTest::eq ||
22527              n->in(2)->get_int() == BoolTest::lt ||
22528              n->in(2)->get_int() == BoolTest::gt)); // cond
22529   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22530   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22531   ins_encode %{
22532     int vlen_enc = vector_length_encoding(this, $src1);
22533     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22534     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22535     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22536   %}
22537   ins_pipe( pipe_slow );
22538 %}
22539 
22540 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22541   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22542             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22543             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22544             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22545             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22546             (n->in(2)->get_int() == BoolTest::ne ||
22547              n->in(2)->get_int() == BoolTest::le ||
22548              n->in(2)->get_int() == BoolTest::ge)); // cond
22549   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22550   effect(TEMP dst, TEMP xtmp);
22551   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22552   ins_encode %{
22553     int vlen_enc = vector_length_encoding(this, $src1);
22554     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22555     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22556     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22557   %}
22558   ins_pipe( pipe_slow );
22559 %}
22560 
22561 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22562   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22563             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22564             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22565             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22566             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22567   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22568   effect(TEMP dst, TEMP xtmp);
22569   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22570   ins_encode %{
22571     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22572     int vlen_enc = vector_length_encoding(this, $src1);
22573     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22574     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22575 
22576     if (vlen_enc == Assembler::AVX_128bit) {
22577       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22578     } else {
22579       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22580     }
22581     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22582     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22583     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22584   %}
22585   ins_pipe( pipe_slow );
22586 %}
22587 
22588 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22589   predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22590              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22591              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22592   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22593   effect(TEMP ktmp);
22594   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22595   ins_encode %{
22596     assert(UseAVX > 2, "required");
22597 
22598     int vlen_enc = vector_length_encoding(this, $src1);
22599     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22600     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22601     KRegister mask = k0; // The comparison itself is not being masked.
22602     bool merge = false;
22603     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22604 
22605     switch (src1_elem_bt) {
22606       case T_INT: {
22607         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22608         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22609         break;
22610       }
22611       case T_LONG: {
22612         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22613         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22614         break;
22615       }
22616       default: assert(false, "%s", type2name(src1_elem_bt));
22617     }
22618   %}
22619   ins_pipe( pipe_slow );
22620 %}
22621 
22622 
22623 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22624   predicate(n->bottom_type()->isa_vectmask() &&
22625             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22626   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22627   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22628   ins_encode %{
22629     assert(UseAVX > 2, "required");
22630     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22631 
22632     int vlen_enc = vector_length_encoding(this, $src1);
22633     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22634     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22635     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22636 
22637     // Comparison i
22638     switch (src1_elem_bt) {
22639       case T_BYTE: {
22640         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22641         break;
22642       }
22643       case T_SHORT: {
22644         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22645         break;
22646       }
22647       case T_INT: {
22648         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22649         break;
22650       }
22651       case T_LONG: {
22652         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22653         break;
22654       }
22655       default: assert(false, "%s", type2name(src1_elem_bt));
22656     }
22657   %}
22658   ins_pipe( pipe_slow );
22659 %}
22660 
22661 // Extract
22662 
22663 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22664   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22665   match(Set dst (ExtractI src idx));
22666   match(Set dst (ExtractS src idx));
22667   match(Set dst (ExtractB src idx));
22668   format %{ "extractI $dst,$src,$idx\t!" %}
22669   ins_encode %{
22670     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22671 
22672     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22673     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22674   %}
22675   ins_pipe( pipe_slow );
22676 %}
22677 
22678 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22679   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22680             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
22681   match(Set dst (ExtractI src idx));
22682   match(Set dst (ExtractS src idx));
22683   match(Set dst (ExtractB src idx));
22684   effect(TEMP vtmp);
22685   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22686   ins_encode %{
22687     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22688 
22689     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22690     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22691     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22692   %}
22693   ins_pipe( pipe_slow );
22694 %}
22695 
22696 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22697   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22698   match(Set dst (ExtractL src idx));
22699   format %{ "extractL $dst,$src,$idx\t!" %}
22700   ins_encode %{
22701     assert(UseSSE >= 4, "required");
22702     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22703 
22704     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22705   %}
22706   ins_pipe( pipe_slow );
22707 %}
22708 
22709 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22710   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22711             Matcher::vector_length(n->in(1)) == 8);  // src
22712   match(Set dst (ExtractL src idx));
22713   effect(TEMP vtmp);
22714   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22715   ins_encode %{
22716     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22717 
22718     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22719     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22720   %}
22721   ins_pipe( pipe_slow );
22722 %}
22723 
22724 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22725   predicate(Matcher::vector_length(n->in(1)) <= 4);
22726   match(Set dst (ExtractF src idx));
22727   effect(TEMP dst, TEMP vtmp);
22728   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22729   ins_encode %{
22730     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22731 
22732     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22733   %}
22734   ins_pipe( pipe_slow );
22735 %}
22736 
22737 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22738   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22739             Matcher::vector_length(n->in(1)/*src*/) == 16);
22740   match(Set dst (ExtractF src idx));
22741   effect(TEMP vtmp);
22742   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22743   ins_encode %{
22744     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22745 
22746     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22747     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22748   %}
22749   ins_pipe( pipe_slow );
22750 %}
22751 
22752 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22753   predicate(Matcher::vector_length(n->in(1)) == 2); // src
22754   match(Set dst (ExtractD src idx));
22755   format %{ "extractD $dst,$src,$idx\t!" %}
22756   ins_encode %{
22757     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22758 
22759     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22760   %}
22761   ins_pipe( pipe_slow );
22762 %}
22763 
22764 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22765   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22766             Matcher::vector_length(n->in(1)) == 8);  // src
22767   match(Set dst (ExtractD src idx));
22768   effect(TEMP vtmp);
22769   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22770   ins_encode %{
22771     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22772 
22773     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22774     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22775   %}
22776   ins_pipe( pipe_slow );
22777 %}
22778 
22779 // --------------------------------- Vector Blend --------------------------------------
22780 
22781 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22782   predicate(UseAVX == 0);
22783   match(Set dst (VectorBlend (Binary dst src) mask));
22784   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
22785   effect(TEMP tmp);
22786   ins_encode %{
22787     assert(UseSSE >= 4, "required");
22788 
22789     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22790       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22791     }
22792     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22793   %}
22794   ins_pipe( pipe_slow );
22795 %}
22796 
22797 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22798   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22799             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22800             Matcher::vector_length_in_bytes(n) <= 32 &&
22801             is_integral_type(Matcher::vector_element_basic_type(n)));
22802   match(Set dst (VectorBlend (Binary src1 src2) mask));
22803   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22804   ins_encode %{
22805     int vlen_enc = vector_length_encoding(this);
22806     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22807   %}
22808   ins_pipe( pipe_slow );
22809 %}
22810 
22811 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22812   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22813             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22814             Matcher::vector_length_in_bytes(n) <= 32 &&
22815             !is_integral_type(Matcher::vector_element_basic_type(n)));
22816   match(Set dst (VectorBlend (Binary src1 src2) mask));
22817   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22818   ins_encode %{
22819     int vlen_enc = vector_length_encoding(this);
22820     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22821   %}
22822   ins_pipe( pipe_slow );
22823 %}
22824 
22825 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22826   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22827             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22828             Matcher::vector_length_in_bytes(n) <= 32);
22829   match(Set dst (VectorBlend (Binary src1 src2) mask));
22830   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22831   effect(TEMP vtmp, TEMP dst);
22832   ins_encode %{
22833     int vlen_enc = vector_length_encoding(this);
22834     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22835     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22836     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
22837   %}
22838   ins_pipe( pipe_slow );
22839 %}
22840 
22841 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22842   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22843             n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22844   match(Set dst (VectorBlend (Binary src1 src2) mask));
22845   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22846   effect(TEMP ktmp);
22847   ins_encode %{
22848      int vlen_enc = Assembler::AVX_512bit;
22849      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22850     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22851     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22852   %}
22853   ins_pipe( pipe_slow );
22854 %}
22855 
22856 
22857 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22858   predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22859             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22860              VM_Version::supports_avx512bw()));
22861   match(Set dst (VectorBlend (Binary src1 src2) mask));
22862   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22863   ins_encode %{
22864     int vlen_enc = vector_length_encoding(this);
22865     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22866     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22867   %}
22868   ins_pipe( pipe_slow );
22869 %}
22870 
22871 // --------------------------------- ABS --------------------------------------
22872 // a = |a|
22873 instruct vabsB_reg(vec dst, vec src) %{
22874   match(Set dst (AbsVB  src));
22875   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22876   ins_encode %{
22877     uint vlen = Matcher::vector_length(this);
22878     if (vlen <= 16) {
22879       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22880     } else {
22881       int vlen_enc = vector_length_encoding(this);
22882       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22883     }
22884   %}
22885   ins_pipe( pipe_slow );
22886 %}
22887 
22888 instruct vabsS_reg(vec dst, vec src) %{
22889   match(Set dst (AbsVS  src));
22890   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22891   ins_encode %{
22892     uint vlen = Matcher::vector_length(this);
22893     if (vlen <= 8) {
22894       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22895     } else {
22896       int vlen_enc = vector_length_encoding(this);
22897       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22898     }
22899   %}
22900   ins_pipe( pipe_slow );
22901 %}
22902 
22903 instruct vabsI_reg(vec dst, vec src) %{
22904   match(Set dst (AbsVI  src));
22905   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22906   ins_encode %{
22907     uint vlen = Matcher::vector_length(this);
22908     if (vlen <= 4) {
22909       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22910     } else {
22911       int vlen_enc = vector_length_encoding(this);
22912       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22913     }
22914   %}
22915   ins_pipe( pipe_slow );
22916 %}
22917 
22918 instruct vabsL_reg(vec dst, vec src) %{
22919   match(Set dst (AbsVL  src));
22920   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22921   ins_encode %{
22922     assert(UseAVX > 2, "required");
22923     int vlen_enc = vector_length_encoding(this);
22924     if (!VM_Version::supports_avx512vl()) {
22925       vlen_enc = Assembler::AVX_512bit;
22926     }
22927     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22928   %}
22929   ins_pipe( pipe_slow );
22930 %}
22931 
22932 // --------------------------------- ABSNEG --------------------------------------
22933 
22934 instruct vabsnegF(vec dst, vec src) %{
22935   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22936   match(Set dst (AbsVF src));
22937   match(Set dst (NegVF src));
22938   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22939   ins_cost(150);
22940   ins_encode %{
22941     int opcode = this->ideal_Opcode();
22942     int vlen = Matcher::vector_length(this);
22943     if (vlen == 2) {
22944       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22945     } else {
22946       assert(vlen == 8 || vlen == 16, "required");
22947       int vlen_enc = vector_length_encoding(this);
22948       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22949     }
22950   %}
22951   ins_pipe( pipe_slow );
22952 %}
22953 
22954 instruct vabsneg4F(vec dst) %{
22955   predicate(Matcher::vector_length(n) == 4);
22956   match(Set dst (AbsVF dst));
22957   match(Set dst (NegVF dst));
22958   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22959   ins_cost(150);
22960   ins_encode %{
22961     int opcode = this->ideal_Opcode();
22962     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22963   %}
22964   ins_pipe( pipe_slow );
22965 %}
22966 
22967 instruct vabsnegD(vec dst, vec src) %{
22968   match(Set dst (AbsVD  src));
22969   match(Set dst (NegVD  src));
22970   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22971   ins_encode %{
22972     int opcode = this->ideal_Opcode();
22973     uint vlen = Matcher::vector_length(this);
22974     if (vlen == 2) {
22975       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22976     } else {
22977       int vlen_enc = vector_length_encoding(this);
22978       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22979     }
22980   %}
22981   ins_pipe( pipe_slow );
22982 %}
22983 
22984 //------------------------------------- VectorTest --------------------------------------------
22985 
22986 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22987   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22988   match(Set cr (VectorTest src1 src2));
22989   effect(TEMP vtmp);
22990   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
22991   ins_encode %{
22992     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22993     int vlen = Matcher::vector_length_in_bytes(this, $src1);
22994     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22995   %}
22996   ins_pipe( pipe_slow );
22997 %}
22998 
22999 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23000   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23001   match(Set cr (VectorTest src1 src2));
23002   format %{ "vptest_ge16  $src1, $src2\n\t" %}
23003   ins_encode %{
23004     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23005     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23006     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23007   %}
23008   ins_pipe( pipe_slow );
23009 %}
23010 
23011 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23012   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23013              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23014             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23015   match(Set cr (VectorTest src1 src2));
23016   effect(TEMP tmp);
23017   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23018   ins_encode %{
23019     uint masklen = Matcher::vector_length(this, $src1);
23020     __ kmovwl($tmp$$Register, $src1$$KRegister);
23021     __ andl($tmp$$Register, (1 << masklen) - 1);
23022     __ cmpl($tmp$$Register, (1 << masklen) - 1);
23023   %}
23024   ins_pipe( pipe_slow );
23025 %}
23026 
23027 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23028   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23029              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23030             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23031   match(Set cr (VectorTest src1 src2));
23032   effect(TEMP tmp);
23033   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23034   ins_encode %{
23035     uint masklen = Matcher::vector_length(this, $src1);
23036     __ kmovwl($tmp$$Register, $src1$$KRegister);
23037     __ andl($tmp$$Register, (1 << masklen) - 1);
23038   %}
23039   ins_pipe( pipe_slow );
23040 %}
23041 
23042 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23043   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23044             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23045   match(Set cr (VectorTest src1 src2));
23046   format %{ "ktest_ge8  $src1, $src2\n\t" %}
23047   ins_encode %{
23048     uint masklen = Matcher::vector_length(this, $src1);
23049     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23050   %}
23051   ins_pipe( pipe_slow );
23052 %}
23053 
23054 //------------------------------------- LoadMask --------------------------------------------
23055 
23056 instruct loadMask(legVec dst, legVec src) %{
23057   predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23058   match(Set dst (VectorLoadMask src));
23059   effect(TEMP dst);
23060   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23061   ins_encode %{
23062     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23063     BasicType elem_bt = Matcher::vector_element_basic_type(this);
23064     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23065   %}
23066   ins_pipe( pipe_slow );
23067 %}
23068 
23069 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23070   predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23071   match(Set dst (VectorLoadMask src));
23072   effect(TEMP xtmp);
23073   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23074   ins_encode %{
23075     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23076                         true, Assembler::AVX_512bit);
23077   %}
23078   ins_pipe( pipe_slow );
23079 %}
23080 
23081 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
23082   predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23083   match(Set dst (VectorLoadMask src));
23084   effect(TEMP xtmp);
23085   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23086   ins_encode %{
23087     int vlen_enc = vector_length_encoding(in(1));
23088     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23089                         false, vlen_enc);
23090   %}
23091   ins_pipe( pipe_slow );
23092 %}
23093 
23094 //------------------------------------- StoreMask --------------------------------------------
23095 
23096 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23097   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23098   match(Set dst (VectorStoreMask src size));
23099   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23100   ins_encode %{
23101     int vlen = Matcher::vector_length(this);
23102     if (vlen <= 16 && UseAVX <= 2) {
23103       assert(UseSSE >= 3, "required");
23104       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23105     } else {
23106       assert(UseAVX > 0, "required");
23107       int src_vlen_enc = vector_length_encoding(this, $src);
23108       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23109     }
23110   %}
23111   ins_pipe( pipe_slow );
23112 %}
23113 
23114 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23115   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23116   match(Set dst (VectorStoreMask src size));
23117   effect(TEMP_DEF dst, TEMP xtmp);
23118   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23119   ins_encode %{
23120     int vlen_enc = Assembler::AVX_128bit;
23121     int vlen = Matcher::vector_length(this);
23122     if (vlen <= 8) {
23123       assert(UseSSE >= 3, "required");
23124       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23125       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23126       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23127     } else {
23128       assert(UseAVX > 0, "required");
23129       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23130       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23131       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23132     }
23133   %}
23134   ins_pipe( pipe_slow );
23135 %}
23136 
23137 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23138   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23139   match(Set dst (VectorStoreMask src size));
23140   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23141   effect(TEMP_DEF dst, TEMP xtmp);
23142   ins_encode %{
23143     int vlen_enc = Assembler::AVX_128bit;
23144     int vlen = Matcher::vector_length(this);
23145     if (vlen <= 4) {
23146       assert(UseSSE >= 3, "required");
23147       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23148       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23149       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23150       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23151     } else {
23152       assert(UseAVX > 0, "required");
23153       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23154       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23155       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23156       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23157       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23158     }
23159   %}
23160   ins_pipe( pipe_slow );
23161 %}
23162 
23163 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23164   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23165   match(Set dst (VectorStoreMask src size));
23166   effect(TEMP_DEF dst, TEMP xtmp);
23167   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23168   ins_encode %{
23169     assert(UseSSE >= 3, "required");
23170     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23171     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23172     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23173     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23174     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23175   %}
23176   ins_pipe( pipe_slow );
23177 %}
23178 
23179 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23180   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23181   match(Set dst (VectorStoreMask src size));
23182   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23183   effect(TEMP_DEF dst, TEMP vtmp);
23184   ins_encode %{
23185     int vlen_enc = Assembler::AVX_128bit;
23186     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23187     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23188     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23189     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23190     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23191     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23192     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23193   %}
23194   ins_pipe( pipe_slow );
23195 %}
23196 
23197 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23198   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23199   match(Set dst (VectorStoreMask src size));
23200   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23201   ins_encode %{
23202     int src_vlen_enc = vector_length_encoding(this, $src);
23203     int dst_vlen_enc = vector_length_encoding(this);
23204     if (!VM_Version::supports_avx512vl()) {
23205       src_vlen_enc = Assembler::AVX_512bit;
23206     }
23207     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23208     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23209   %}
23210   ins_pipe( pipe_slow );
23211 %}
23212 
23213 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23214   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23215   match(Set dst (VectorStoreMask src size));
23216   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23217   ins_encode %{
23218     int src_vlen_enc = vector_length_encoding(this, $src);
23219     int dst_vlen_enc = vector_length_encoding(this);
23220     if (!VM_Version::supports_avx512vl()) {
23221       src_vlen_enc = Assembler::AVX_512bit;
23222     }
23223     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23224     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23225   %}
23226   ins_pipe( pipe_slow );
23227 %}
23228 
23229 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23230   predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23231   match(Set dst (VectorStoreMask mask size));
23232   effect(TEMP_DEF dst);
23233   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23234   ins_encode %{
23235     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23236     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23237                  false, Assembler::AVX_512bit, noreg);
23238     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23239   %}
23240   ins_pipe( pipe_slow );
23241 %}
23242 
23243 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23244   predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23245   match(Set dst (VectorStoreMask mask size));
23246   effect(TEMP_DEF dst);
23247   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23248   ins_encode %{
23249     int dst_vlen_enc = vector_length_encoding(this);
23250     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23251     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23252   %}
23253   ins_pipe( pipe_slow );
23254 %}
23255 
23256 instruct vmaskcast_evex(kReg dst) %{
23257   match(Set dst (VectorMaskCast dst));
23258   ins_cost(0);
23259   format %{ "vector_mask_cast $dst" %}
23260   ins_encode %{
23261     // empty
23262   %}
23263   ins_pipe(empty);
23264 %}
23265 
23266 instruct vmaskcast(vec dst) %{
23267   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23268   match(Set dst (VectorMaskCast dst));
23269   ins_cost(0);
23270   format %{ "vector_mask_cast $dst" %}
23271   ins_encode %{
23272     // empty
23273   %}
23274   ins_pipe(empty);
23275 %}
23276 
23277 instruct vmaskcast_avx(vec dst, vec src) %{
23278   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23279   match(Set dst (VectorMaskCast src));
23280   format %{ "vector_mask_cast $dst, $src" %}
23281   ins_encode %{
23282     int vlen = Matcher::vector_length(this);
23283     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23284     BasicType dst_bt = Matcher::vector_element_basic_type(this);
23285     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23286   %}
23287   ins_pipe(pipe_slow);
23288 %}
23289 
23290 //-------------------------------- Load Iota Indices ----------------------------------
23291 
23292 instruct loadIotaIndices(vec dst, immI_0 src) %{
23293   match(Set dst (VectorLoadConst src));
23294   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23295   ins_encode %{
23296      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23297      BasicType bt = Matcher::vector_element_basic_type(this);
23298      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23299   %}
23300   ins_pipe( pipe_slow );
23301 %}
23302 
23303 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23304   match(Set dst (PopulateIndex src1 src2));
23305   effect(TEMP dst, TEMP vtmp);
23306   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23307   ins_encode %{
23308      assert($src2$$constant == 1, "required");
23309      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23310      int vlen_enc = vector_length_encoding(this);
23311      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23312      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23313      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23314      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23315   %}
23316   ins_pipe( pipe_slow );
23317 %}
23318 
23319 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23320   match(Set dst (PopulateIndex src1 src2));
23321   effect(TEMP dst, TEMP vtmp);
23322   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23323   ins_encode %{
23324      assert($src2$$constant == 1, "required");
23325      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23326      int vlen_enc = vector_length_encoding(this);
23327      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23328      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23329      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23330      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23331   %}
23332   ins_pipe( pipe_slow );
23333 %}
23334 
23335 //-------------------------------- Rearrange ----------------------------------
23336 
23337 // LoadShuffle/Rearrange for Byte
23338 instruct rearrangeB(vec dst, vec shuffle) %{
23339   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23340             Matcher::vector_length(n) < 32);
23341   match(Set dst (VectorRearrange dst shuffle));
23342   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23343   ins_encode %{
23344     assert(UseSSE >= 4, "required");
23345     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23346   %}
23347   ins_pipe( pipe_slow );
23348 %}
23349 
23350 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23351   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23352             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23353   match(Set dst (VectorRearrange src shuffle));
23354   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23355   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23356   ins_encode %{
23357     assert(UseAVX >= 2, "required");
23358     // Swap src into vtmp1
23359     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23360     // Shuffle swapped src to get entries from other 128 bit lane
23361     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23362     // Shuffle original src to get entries from self 128 bit lane
23363     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23364     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23365     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23366     // Perform the blend
23367     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23368   %}
23369   ins_pipe( pipe_slow );
23370 %}
23371 
23372 
23373 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23374   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23375             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23376   match(Set dst (VectorRearrange src shuffle));
23377   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23378   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23379   ins_encode %{
23380     int vlen_enc = vector_length_encoding(this);
23381     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23382                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23383                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23384   %}
23385   ins_pipe( pipe_slow );
23386 %}
23387 
23388 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23389   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23390             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23391   match(Set dst (VectorRearrange src shuffle));
23392   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23393   ins_encode %{
23394     int vlen_enc = vector_length_encoding(this);
23395     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23396   %}
23397   ins_pipe( pipe_slow );
23398 %}
23399 
23400 // LoadShuffle/Rearrange for Short
23401 
23402 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23403   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23404             !VM_Version::supports_avx512bw());
23405   match(Set dst (VectorLoadShuffle src));
23406   effect(TEMP dst, TEMP vtmp);
23407   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23408   ins_encode %{
23409     // Create a byte shuffle mask from short shuffle mask
23410     // only byte shuffle instruction available on these platforms
23411     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23412     if (UseAVX == 0) {
23413       assert(vlen_in_bytes <= 16, "required");
23414       // Multiply each shuffle by two to get byte index
23415       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23416       __ psllw($vtmp$$XMMRegister, 1);
23417 
23418       // Duplicate to create 2 copies of byte index
23419       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23420       __ psllw($dst$$XMMRegister, 8);
23421       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23422 
23423       // Add one to get alternate byte index
23424       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23425       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23426     } else {
23427       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23428       int vlen_enc = vector_length_encoding(this);
23429       // Multiply each shuffle by two to get byte index
23430       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23431 
23432       // Duplicate to create 2 copies of byte index
23433       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
23434       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23435 
23436       // Add one to get alternate byte index
23437       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23438     }
23439   %}
23440   ins_pipe( pipe_slow );
23441 %}
23442 
23443 instruct rearrangeS(vec dst, vec shuffle) %{
23444   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23445             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23446   match(Set dst (VectorRearrange dst shuffle));
23447   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23448   ins_encode %{
23449     assert(UseSSE >= 4, "required");
23450     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23451   %}
23452   ins_pipe( pipe_slow );
23453 %}
23454 
23455 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23456   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23457             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23458   match(Set dst (VectorRearrange src shuffle));
23459   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23460   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23461   ins_encode %{
23462     assert(UseAVX >= 2, "required");
23463     // Swap src into vtmp1
23464     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23465     // Shuffle swapped src to get entries from other 128 bit lane
23466     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23467     // Shuffle original src to get entries from self 128 bit lane
23468     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23469     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23470     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23471     // Perform the blend
23472     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23473   %}
23474   ins_pipe( pipe_slow );
23475 %}
23476 
23477 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23478   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23479             VM_Version::supports_avx512bw());
23480   match(Set dst (VectorRearrange src shuffle));
23481   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23482   ins_encode %{
23483     int vlen_enc = vector_length_encoding(this);
23484     if (!VM_Version::supports_avx512vl()) {
23485       vlen_enc = Assembler::AVX_512bit;
23486     }
23487     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23488   %}
23489   ins_pipe( pipe_slow );
23490 %}
23491 
23492 // LoadShuffle/Rearrange for Integer and Float
23493 
23494 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23495   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23496             Matcher::vector_length(n) == 4 && UseAVX == 0);
23497   match(Set dst (VectorLoadShuffle src));
23498   effect(TEMP dst, TEMP vtmp);
23499   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23500   ins_encode %{
23501     assert(UseSSE >= 4, "required");
23502 
23503     // Create a byte shuffle mask from int shuffle mask
23504     // only byte shuffle instruction available on these platforms
23505 
23506     // Duplicate and multiply each shuffle by 4
23507     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23508     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23509     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23510     __ psllw($vtmp$$XMMRegister, 2);
23511 
23512     // Duplicate again to create 4 copies of byte index
23513     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23514     __ psllw($dst$$XMMRegister, 8);
23515     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23516 
23517     // Add 3,2,1,0 to get alternate byte index
23518     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23519     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23520   %}
23521   ins_pipe( pipe_slow );
23522 %}
23523 
23524 instruct rearrangeI(vec dst, vec shuffle) %{
23525   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23526             UseAVX == 0);
23527   match(Set dst (VectorRearrange dst shuffle));
23528   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23529   ins_encode %{
23530     assert(UseSSE >= 4, "required");
23531     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23532   %}
23533   ins_pipe( pipe_slow );
23534 %}
23535 
23536 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23537   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23538             UseAVX > 0);
23539   match(Set dst (VectorRearrange src shuffle));
23540   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23541   ins_encode %{
23542     int vlen_enc = vector_length_encoding(this);
23543     BasicType bt = Matcher::vector_element_basic_type(this);
23544     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23545   %}
23546   ins_pipe( pipe_slow );
23547 %}
23548 
23549 // LoadShuffle/Rearrange for Long and Double
23550 
23551 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23552   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23553             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23554   match(Set dst (VectorLoadShuffle src));
23555   effect(TEMP dst, TEMP vtmp);
23556   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23557   ins_encode %{
23558     assert(UseAVX >= 2, "required");
23559 
23560     int vlen_enc = vector_length_encoding(this);
23561     // Create a double word shuffle mask from long shuffle mask
23562     // only double word shuffle instruction available on these platforms
23563 
23564     // Multiply each shuffle by two to get double word index
23565     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23566 
23567     // Duplicate each double word shuffle
23568     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23569     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23570 
23571     // Add one to get alternate double word index
23572     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23573   %}
23574   ins_pipe( pipe_slow );
23575 %}
23576 
23577 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23578   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23579             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23580   match(Set dst (VectorRearrange src shuffle));
23581   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23582   ins_encode %{
23583     assert(UseAVX >= 2, "required");
23584 
23585     int vlen_enc = vector_length_encoding(this);
23586     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23587   %}
23588   ins_pipe( pipe_slow );
23589 %}
23590 
23591 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23592   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23593             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23594   match(Set dst (VectorRearrange src shuffle));
23595   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23596   ins_encode %{
23597     assert(UseAVX > 2, "required");
23598 
23599     int vlen_enc = vector_length_encoding(this);
23600     if (vlen_enc == Assembler::AVX_128bit) {
23601       vlen_enc = Assembler::AVX_256bit;
23602     }
23603     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23604   %}
23605   ins_pipe( pipe_slow );
23606 %}
23607 
23608 // --------------------------------- FMA --------------------------------------
23609 // a * b + c
23610 
23611 instruct vfmaF_reg(vec a, vec b, vec c) %{
23612   match(Set c (FmaVF  c (Binary a b)));
23613   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23614   ins_cost(150);
23615   ins_encode %{
23616     assert(UseFMA, "not enabled");
23617     int vlen_enc = vector_length_encoding(this);
23618     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23619   %}
23620   ins_pipe( pipe_slow );
23621 %}
23622 
23623 instruct vfmaF_mem(vec a, memory b, vec c) %{
23624   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23625   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23626   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23627   ins_cost(150);
23628   ins_encode %{
23629     assert(UseFMA, "not enabled");
23630     int vlen_enc = vector_length_encoding(this);
23631     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23632   %}
23633   ins_pipe( pipe_slow );
23634 %}
23635 
23636 instruct vfmaD_reg(vec a, vec b, vec c) %{
23637   match(Set c (FmaVD  c (Binary a b)));
23638   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23639   ins_cost(150);
23640   ins_encode %{
23641     assert(UseFMA, "not enabled");
23642     int vlen_enc = vector_length_encoding(this);
23643     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23644   %}
23645   ins_pipe( pipe_slow );
23646 %}
23647 
23648 instruct vfmaD_mem(vec a, memory b, vec c) %{
23649   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23650   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23651   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23652   ins_cost(150);
23653   ins_encode %{
23654     assert(UseFMA, "not enabled");
23655     int vlen_enc = vector_length_encoding(this);
23656     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23657   %}
23658   ins_pipe( pipe_slow );
23659 %}
23660 
23661 // --------------------------------- Vector Multiply Add --------------------------------------
23662 
23663 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23664   predicate(UseAVX == 0);
23665   match(Set dst (MulAddVS2VI dst src1));
23666   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23667   ins_encode %{
23668     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23669   %}
23670   ins_pipe( pipe_slow );
23671 %}
23672 
23673 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23674   predicate(UseAVX > 0);
23675   match(Set dst (MulAddVS2VI src1 src2));
23676   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23677   ins_encode %{
23678     int vlen_enc = vector_length_encoding(this);
23679     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23680   %}
23681   ins_pipe( pipe_slow );
23682 %}
23683 
23684 // --------------------------------- Vector Multiply Add Add ----------------------------------
23685 
23686 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23687   predicate(VM_Version::supports_avx512_vnni());
23688   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23689   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23690   ins_encode %{
23691     assert(UseAVX > 2, "required");
23692     int vlen_enc = vector_length_encoding(this);
23693     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23694   %}
23695   ins_pipe( pipe_slow );
23696   ins_cost(10);
23697 %}
23698 
23699 // --------------------------------- PopCount --------------------------------------
23700 
23701 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23702   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23703   match(Set dst (PopCountVI src));
23704   match(Set dst (PopCountVL src));
23705   format %{ "vector_popcount_integral $dst, $src" %}
23706   ins_encode %{
23707     int opcode = this->ideal_Opcode();
23708     int vlen_enc = vector_length_encoding(this, $src);
23709     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23710     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23711   %}
23712   ins_pipe( pipe_slow );
23713 %}
23714 
23715 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23716   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23717   match(Set dst (PopCountVI src mask));
23718   match(Set dst (PopCountVL src mask));
23719   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23720   ins_encode %{
23721     int vlen_enc = vector_length_encoding(this, $src);
23722     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23723     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23724     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23725   %}
23726   ins_pipe( pipe_slow );
23727 %}
23728 
23729 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23730   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23731   match(Set dst (PopCountVI src));
23732   match(Set dst (PopCountVL src));
23733   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23734   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23735   ins_encode %{
23736     int opcode = this->ideal_Opcode();
23737     int vlen_enc = vector_length_encoding(this, $src);
23738     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23739     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23740                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23741   %}
23742   ins_pipe( pipe_slow );
23743 %}
23744 
23745 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23746 
23747 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23748   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23749                                               Matcher::vector_length_in_bytes(n->in(1))));
23750   match(Set dst (CountTrailingZerosV src));
23751   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23752   ins_cost(400);
23753   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23754   ins_encode %{
23755     int vlen_enc = vector_length_encoding(this, $src);
23756     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23757     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23758                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23759   %}
23760   ins_pipe( pipe_slow );
23761 %}
23762 
23763 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23764   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23765             VM_Version::supports_avx512cd() &&
23766             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23767   match(Set dst (CountTrailingZerosV src));
23768   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23769   ins_cost(400);
23770   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23771   ins_encode %{
23772     int vlen_enc = vector_length_encoding(this, $src);
23773     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23774     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23775                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23776   %}
23777   ins_pipe( pipe_slow );
23778 %}
23779 
23780 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23781   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23782   match(Set dst (CountTrailingZerosV src));
23783   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23784   ins_cost(400);
23785   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23786   ins_encode %{
23787     int vlen_enc = vector_length_encoding(this, $src);
23788     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23789     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23790                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23791                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23792   %}
23793   ins_pipe( pipe_slow );
23794 %}
23795 
23796 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23797   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23798   match(Set dst (CountTrailingZerosV src));
23799   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23800   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23801   ins_encode %{
23802     int vlen_enc = vector_length_encoding(this, $src);
23803     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23804     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23805                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23806   %}
23807   ins_pipe( pipe_slow );
23808 %}
23809 
23810 
23811 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23812 
23813 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23814   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23815   effect(TEMP dst);
23816   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23817   ins_encode %{
23818     int vector_len = vector_length_encoding(this);
23819     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23820   %}
23821   ins_pipe( pipe_slow );
23822 %}
23823 
23824 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23825   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23826   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23827   effect(TEMP dst);
23828   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23829   ins_encode %{
23830     int vector_len = vector_length_encoding(this);
23831     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23832   %}
23833   ins_pipe( pipe_slow );
23834 %}
23835 
23836 // --------------------------------- Rotation Operations ----------------------------------
23837 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23838   match(Set dst (RotateLeftV src shift));
23839   match(Set dst (RotateRightV src shift));
23840   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23841   ins_encode %{
23842     int opcode      = this->ideal_Opcode();
23843     int vector_len  = vector_length_encoding(this);
23844     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23845     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23846   %}
23847   ins_pipe( pipe_slow );
23848 %}
23849 
23850 instruct vprorate(vec dst, vec src, vec shift) %{
23851   match(Set dst (RotateLeftV src shift));
23852   match(Set dst (RotateRightV src shift));
23853   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23854   ins_encode %{
23855     int opcode      = this->ideal_Opcode();
23856     int vector_len  = vector_length_encoding(this);
23857     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23858     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23859   %}
23860   ins_pipe( pipe_slow );
23861 %}
23862 
23863 // ---------------------------------- Masked Operations ------------------------------------
23864 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23865   predicate(!n->in(3)->bottom_type()->isa_vectmask());
23866   match(Set dst (LoadVectorMasked mem mask));
23867   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23868   ins_encode %{
23869     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23870     int vlen_enc = vector_length_encoding(this);
23871     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23872   %}
23873   ins_pipe( pipe_slow );
23874 %}
23875 
23876 
23877 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23878   predicate(n->in(3)->bottom_type()->isa_vectmask());
23879   match(Set dst (LoadVectorMasked mem mask));
23880   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23881   ins_encode %{
23882     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
23883     int vector_len = vector_length_encoding(this);
23884     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23885   %}
23886   ins_pipe( pipe_slow );
23887 %}
23888 
23889 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23890   predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23891   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23892   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23893   ins_encode %{
23894     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23895     int vlen_enc = vector_length_encoding(src_node);
23896     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23897     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23898   %}
23899   ins_pipe( pipe_slow );
23900 %}
23901 
23902 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23903   predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23904   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23905   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23906   ins_encode %{
23907     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23908     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23909     int vlen_enc = vector_length_encoding(src_node);
23910     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23911   %}
23912   ins_pipe( pipe_slow );
23913 %}
23914 
23915 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23916   match(Set addr (VerifyVectorAlignment addr mask));
23917   effect(KILL cr);
23918   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23919   ins_encode %{
23920     Label Lskip;
23921     // check if masked bits of addr are zero
23922     __ testq($addr$$Register, $mask$$constant);
23923     __ jccb(Assembler::equal, Lskip);
23924     __ stop("verify_vector_alignment found a misaligned vector memory access");
23925     __ bind(Lskip);
23926   %}
23927   ins_pipe(pipe_slow);
23928 %}
23929 
23930 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23931   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23932   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23933   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23934   ins_encode %{
23935     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23936     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23937 
23938     Label DONE;
23939     int vlen_enc = vector_length_encoding(this, $src1);
23940     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23941 
23942     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23943     __ mov64($dst$$Register, -1L);
23944     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23945     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23946     __ jccb(Assembler::carrySet, DONE);
23947     __ kmovql($dst$$Register, $ktmp1$$KRegister);
23948     __ notq($dst$$Register);
23949     __ tzcntq($dst$$Register, $dst$$Register);
23950     __ bind(DONE);
23951   %}
23952   ins_pipe( pipe_slow );
23953 %}
23954 
23955 
23956 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23957   match(Set dst (VectorMaskGen len));
23958   effect(TEMP temp, KILL cr);
23959   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23960   ins_encode %{
23961     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23962   %}
23963   ins_pipe( pipe_slow );
23964 %}
23965 
23966 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23967   match(Set dst (VectorMaskGen len));
23968   format %{ "vector_mask_gen $len \t! vector mask generator" %}
23969   effect(TEMP temp);
23970   ins_encode %{
23971     __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
23972     __ kmovql($dst$$KRegister, $temp$$Register);
23973   %}
23974   ins_pipe( pipe_slow );
23975 %}
23976 
23977 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23978   predicate(n->in(1)->bottom_type()->isa_vectmask());
23979   match(Set dst (VectorMaskToLong mask));
23980   effect(TEMP dst, KILL cr);
23981   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23982   ins_encode %{
23983     int opcode = this->ideal_Opcode();
23984     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23985     int mask_len = Matcher::vector_length(this, $mask);
23986     int mask_size = mask_len * type2aelembytes(mbt);
23987     int vlen_enc = vector_length_encoding(this, $mask);
23988     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23989                              $dst$$Register, mask_len, mask_size, vlen_enc);
23990   %}
23991   ins_pipe( pipe_slow );
23992 %}
23993 
23994 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23995   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23996   match(Set dst (VectorMaskToLong mask));
23997   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23998   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23999   ins_encode %{
24000     int opcode = this->ideal_Opcode();
24001     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24002     int mask_len = Matcher::vector_length(this, $mask);
24003     int vlen_enc = vector_length_encoding(this, $mask);
24004     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24005                              $dst$$Register, mask_len, mbt, vlen_enc);
24006   %}
24007   ins_pipe( pipe_slow );
24008 %}
24009 
24010 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24011   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24012   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24013   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24014   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24015   ins_encode %{
24016     int opcode = this->ideal_Opcode();
24017     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24018     int mask_len = Matcher::vector_length(this, $mask);
24019     int vlen_enc = vector_length_encoding(this, $mask);
24020     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24021                              $dst$$Register, mask_len, mbt, vlen_enc);
24022   %}
24023   ins_pipe( pipe_slow );
24024 %}
24025 
24026 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24027   predicate(n->in(1)->bottom_type()->isa_vectmask());
24028   match(Set dst (VectorMaskTrueCount mask));
24029   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24030   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24031   ins_encode %{
24032     int opcode = this->ideal_Opcode();
24033     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24034     int mask_len = Matcher::vector_length(this, $mask);
24035     int mask_size = mask_len * type2aelembytes(mbt);
24036     int vlen_enc = vector_length_encoding(this, $mask);
24037     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24038                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24039   %}
24040   ins_pipe( pipe_slow );
24041 %}
24042 
24043 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24044   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24045   match(Set dst (VectorMaskTrueCount mask));
24046   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24047   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24048   ins_encode %{
24049     int opcode = this->ideal_Opcode();
24050     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24051     int mask_len = Matcher::vector_length(this, $mask);
24052     int vlen_enc = vector_length_encoding(this, $mask);
24053     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24054                              $tmp$$Register, mask_len, mbt, vlen_enc);
24055   %}
24056   ins_pipe( pipe_slow );
24057 %}
24058 
24059 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24060   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24061   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24062   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24063   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24064   ins_encode %{
24065     int opcode = this->ideal_Opcode();
24066     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24067     int mask_len = Matcher::vector_length(this, $mask);
24068     int vlen_enc = vector_length_encoding(this, $mask);
24069     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24070                              $tmp$$Register, mask_len, mbt, vlen_enc);
24071   %}
24072   ins_pipe( pipe_slow );
24073 %}
24074 
24075 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24076   predicate(n->in(1)->bottom_type()->isa_vectmask());
24077   match(Set dst (VectorMaskFirstTrue mask));
24078   match(Set dst (VectorMaskLastTrue mask));
24079   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24080   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24081   ins_encode %{
24082     int opcode = this->ideal_Opcode();
24083     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24084     int mask_len = Matcher::vector_length(this, $mask);
24085     int mask_size = mask_len * type2aelembytes(mbt);
24086     int vlen_enc = vector_length_encoding(this, $mask);
24087     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24088                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24089   %}
24090   ins_pipe( pipe_slow );
24091 %}
24092 
24093 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24094   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24095   match(Set dst (VectorMaskFirstTrue mask));
24096   match(Set dst (VectorMaskLastTrue mask));
24097   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24098   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24099   ins_encode %{
24100     int opcode = this->ideal_Opcode();
24101     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24102     int mask_len = Matcher::vector_length(this, $mask);
24103     int vlen_enc = vector_length_encoding(this, $mask);
24104     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24105                              $tmp$$Register, mask_len, mbt, vlen_enc);
24106   %}
24107   ins_pipe( pipe_slow );
24108 %}
24109 
24110 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24111   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24112   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24113   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24114   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24115   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24116   ins_encode %{
24117     int opcode = this->ideal_Opcode();
24118     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24119     int mask_len = Matcher::vector_length(this, $mask);
24120     int vlen_enc = vector_length_encoding(this, $mask);
24121     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24122                              $tmp$$Register, mask_len, mbt, vlen_enc);
24123   %}
24124   ins_pipe( pipe_slow );
24125 %}
24126 
24127 // --------------------------------- Compress/Expand Operations ---------------------------
24128 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24129   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24130   match(Set dst (CompressV src mask));
24131   match(Set dst (ExpandV src mask));
24132   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24133   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24134   ins_encode %{
24135     int opcode = this->ideal_Opcode();
24136     int vlen_enc = vector_length_encoding(this);
24137     BasicType bt  = Matcher::vector_element_basic_type(this);
24138     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24139                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24140   %}
24141   ins_pipe( pipe_slow );
24142 %}
24143 
24144 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24145   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24146   match(Set dst (CompressV src mask));
24147   match(Set dst (ExpandV src mask));
24148   format %{ "vector_compress_expand $dst, $src, $mask" %}
24149   ins_encode %{
24150     int opcode = this->ideal_Opcode();
24151     int vector_len = vector_length_encoding(this);
24152     BasicType bt  = Matcher::vector_element_basic_type(this);
24153     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24154   %}
24155   ins_pipe( pipe_slow );
24156 %}
24157 
24158 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24159   match(Set dst (CompressM mask));
24160   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24161   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24162   ins_encode %{
24163     assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24164     int mask_len = Matcher::vector_length(this);
24165     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24166   %}
24167   ins_pipe( pipe_slow );
24168 %}
24169 
24170 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24171 
24172 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24173   predicate(!VM_Version::supports_gfni());
24174   match(Set dst (ReverseV src));
24175   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24176   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24177   ins_encode %{
24178     int vec_enc = vector_length_encoding(this);
24179     BasicType bt = Matcher::vector_element_basic_type(this);
24180     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24181                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24182   %}
24183   ins_pipe( pipe_slow );
24184 %}
24185 
24186 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24187   predicate(VM_Version::supports_gfni());
24188   match(Set dst (ReverseV src));
24189   effect(TEMP dst, TEMP xtmp);
24190   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24191   ins_encode %{
24192     int vec_enc = vector_length_encoding(this);
24193     BasicType bt  = Matcher::vector_element_basic_type(this);
24194     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24195     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24196                                $xtmp$$XMMRegister);
24197   %}
24198   ins_pipe( pipe_slow );
24199 %}
24200 
24201 instruct vreverse_byte_reg(vec dst, vec src) %{
24202   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24203   match(Set dst (ReverseBytesV src));
24204   effect(TEMP dst);
24205   format %{ "vector_reverse_byte $dst, $src" %}
24206   ins_encode %{
24207     int vec_enc = vector_length_encoding(this);
24208     BasicType bt = Matcher::vector_element_basic_type(this);
24209     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24210   %}
24211   ins_pipe( pipe_slow );
24212 %}
24213 
24214 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24215   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24216   match(Set dst (ReverseBytesV src));
24217   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24218   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24219   ins_encode %{
24220     int vec_enc = vector_length_encoding(this);
24221     BasicType bt = Matcher::vector_element_basic_type(this);
24222     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24223                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24224   %}
24225   ins_pipe( pipe_slow );
24226 %}
24227 
24228 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24229 
24230 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24231   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24232                                               Matcher::vector_length_in_bytes(n->in(1))));
24233   match(Set dst (CountLeadingZerosV src));
24234   format %{ "vector_count_leading_zeros $dst, $src" %}
24235   ins_encode %{
24236      int vlen_enc = vector_length_encoding(this, $src);
24237      BasicType bt = Matcher::vector_element_basic_type(this, $src);
24238      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24239                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24240   %}
24241   ins_pipe( pipe_slow );
24242 %}
24243 
24244 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24245   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24246                                               Matcher::vector_length_in_bytes(n->in(1))));
24247   match(Set dst (CountLeadingZerosV src mask));
24248   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24249   ins_encode %{
24250     int vlen_enc = vector_length_encoding(this, $src);
24251     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24252     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24253     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24254                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24255   %}
24256   ins_pipe( pipe_slow );
24257 %}
24258 
24259 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24260   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24261             VM_Version::supports_avx512cd() &&
24262             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24263   match(Set dst (CountLeadingZerosV src));
24264   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24265   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24266   ins_encode %{
24267     int vlen_enc = vector_length_encoding(this, $src);
24268     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24269     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24270                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24271   %}
24272   ins_pipe( pipe_slow );
24273 %}
24274 
24275 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24276   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24277   match(Set dst (CountLeadingZerosV src));
24278   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24279   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24280   ins_encode %{
24281     int vlen_enc = vector_length_encoding(this, $src);
24282     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24283     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24284                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24285                                        $rtmp$$Register, true, vlen_enc);
24286   %}
24287   ins_pipe( pipe_slow );
24288 %}
24289 
24290 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24291   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24292             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24293   match(Set dst (CountLeadingZerosV src));
24294   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24295   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24296   ins_encode %{
24297     int vlen_enc = vector_length_encoding(this, $src);
24298     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24299     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24300                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24301   %}
24302   ins_pipe( pipe_slow );
24303 %}
24304 
24305 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24306   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24307             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24308   match(Set dst (CountLeadingZerosV src));
24309   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24310   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24311   ins_encode %{
24312     int vlen_enc = vector_length_encoding(this, $src);
24313     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24314     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24315                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24316   %}
24317   ins_pipe( pipe_slow );
24318 %}
24319 
24320 // ---------------------------------- Vector Masked Operations ------------------------------------
24321 
24322 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24323   match(Set dst (AddVB (Binary dst src2) mask));
24324   match(Set dst (AddVS (Binary dst src2) mask));
24325   match(Set dst (AddVI (Binary dst src2) mask));
24326   match(Set dst (AddVL (Binary dst src2) mask));
24327   match(Set dst (AddVF (Binary dst src2) mask));
24328   match(Set dst (AddVD (Binary dst src2) mask));
24329   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24330   ins_encode %{
24331     int vlen_enc = vector_length_encoding(this);
24332     BasicType bt = Matcher::vector_element_basic_type(this);
24333     int opc = this->ideal_Opcode();
24334     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24335                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24336   %}
24337   ins_pipe( pipe_slow );
24338 %}
24339 
24340 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24341   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24342   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24343   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24344   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24345   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24346   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24347   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24348   ins_encode %{
24349     int vlen_enc = vector_length_encoding(this);
24350     BasicType bt = Matcher::vector_element_basic_type(this);
24351     int opc = this->ideal_Opcode();
24352     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24353                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24354   %}
24355   ins_pipe( pipe_slow );
24356 %}
24357 
24358 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24359   match(Set dst (XorV (Binary dst src2) mask));
24360   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24361   ins_encode %{
24362     int vlen_enc = vector_length_encoding(this);
24363     BasicType bt = Matcher::vector_element_basic_type(this);
24364     int opc = this->ideal_Opcode();
24365     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24366                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24367   %}
24368   ins_pipe( pipe_slow );
24369 %}
24370 
24371 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24372   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24373   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24374   ins_encode %{
24375     int vlen_enc = vector_length_encoding(this);
24376     BasicType bt = Matcher::vector_element_basic_type(this);
24377     int opc = this->ideal_Opcode();
24378     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24379                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24380   %}
24381   ins_pipe( pipe_slow );
24382 %}
24383 
24384 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24385   match(Set dst (OrV (Binary dst src2) mask));
24386   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24387   ins_encode %{
24388     int vlen_enc = vector_length_encoding(this);
24389     BasicType bt = Matcher::vector_element_basic_type(this);
24390     int opc = this->ideal_Opcode();
24391     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24392                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24393   %}
24394   ins_pipe( pipe_slow );
24395 %}
24396 
24397 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24398   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24399   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24400   ins_encode %{
24401     int vlen_enc = vector_length_encoding(this);
24402     BasicType bt = Matcher::vector_element_basic_type(this);
24403     int opc = this->ideal_Opcode();
24404     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24405                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24406   %}
24407   ins_pipe( pipe_slow );
24408 %}
24409 
24410 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24411   match(Set dst (AndV (Binary dst src2) mask));
24412   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24413   ins_encode %{
24414     int vlen_enc = vector_length_encoding(this);
24415     BasicType bt = Matcher::vector_element_basic_type(this);
24416     int opc = this->ideal_Opcode();
24417     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24418                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24419   %}
24420   ins_pipe( pipe_slow );
24421 %}
24422 
24423 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24424   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24425   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24426   ins_encode %{
24427     int vlen_enc = vector_length_encoding(this);
24428     BasicType bt = Matcher::vector_element_basic_type(this);
24429     int opc = this->ideal_Opcode();
24430     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24431                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24432   %}
24433   ins_pipe( pipe_slow );
24434 %}
24435 
24436 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24437   match(Set dst (SubVB (Binary dst src2) mask));
24438   match(Set dst (SubVS (Binary dst src2) mask));
24439   match(Set dst (SubVI (Binary dst src2) mask));
24440   match(Set dst (SubVL (Binary dst src2) mask));
24441   match(Set dst (SubVF (Binary dst src2) mask));
24442   match(Set dst (SubVD (Binary dst src2) mask));
24443   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24444   ins_encode %{
24445     int vlen_enc = vector_length_encoding(this);
24446     BasicType bt = Matcher::vector_element_basic_type(this);
24447     int opc = this->ideal_Opcode();
24448     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24449                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24450   %}
24451   ins_pipe( pipe_slow );
24452 %}
24453 
24454 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24455   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24456   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24457   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24458   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24459   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24460   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24461   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24462   ins_encode %{
24463     int vlen_enc = vector_length_encoding(this);
24464     BasicType bt = Matcher::vector_element_basic_type(this);
24465     int opc = this->ideal_Opcode();
24466     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24467                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24468   %}
24469   ins_pipe( pipe_slow );
24470 %}
24471 
24472 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24473   match(Set dst (MulVS (Binary dst src2) mask));
24474   match(Set dst (MulVI (Binary dst src2) mask));
24475   match(Set dst (MulVL (Binary dst src2) mask));
24476   match(Set dst (MulVF (Binary dst src2) mask));
24477   match(Set dst (MulVD (Binary dst src2) mask));
24478   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24479   ins_encode %{
24480     int vlen_enc = vector_length_encoding(this);
24481     BasicType bt = Matcher::vector_element_basic_type(this);
24482     int opc = this->ideal_Opcode();
24483     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24484                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24485   %}
24486   ins_pipe( pipe_slow );
24487 %}
24488 
24489 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24490   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24491   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24492   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24493   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24494   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24495   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24496   ins_encode %{
24497     int vlen_enc = vector_length_encoding(this);
24498     BasicType bt = Matcher::vector_element_basic_type(this);
24499     int opc = this->ideal_Opcode();
24500     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24501                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24502   %}
24503   ins_pipe( pipe_slow );
24504 %}
24505 
24506 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24507   match(Set dst (SqrtVF dst mask));
24508   match(Set dst (SqrtVD dst mask));
24509   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24510   ins_encode %{
24511     int vlen_enc = vector_length_encoding(this);
24512     BasicType bt = Matcher::vector_element_basic_type(this);
24513     int opc = this->ideal_Opcode();
24514     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24515                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24516   %}
24517   ins_pipe( pipe_slow );
24518 %}
24519 
24520 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24521   match(Set dst (DivVF (Binary dst src2) mask));
24522   match(Set dst (DivVD (Binary dst src2) mask));
24523   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24524   ins_encode %{
24525     int vlen_enc = vector_length_encoding(this);
24526     BasicType bt = Matcher::vector_element_basic_type(this);
24527     int opc = this->ideal_Opcode();
24528     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24529                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24530   %}
24531   ins_pipe( pipe_slow );
24532 %}
24533 
24534 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24535   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24536   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24537   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24538   ins_encode %{
24539     int vlen_enc = vector_length_encoding(this);
24540     BasicType bt = Matcher::vector_element_basic_type(this);
24541     int opc = this->ideal_Opcode();
24542     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24543                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24544   %}
24545   ins_pipe( pipe_slow );
24546 %}
24547 
24548 
24549 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24550   match(Set dst (RotateLeftV (Binary dst shift) mask));
24551   match(Set dst (RotateRightV (Binary dst shift) mask));
24552   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24553   ins_encode %{
24554     int vlen_enc = vector_length_encoding(this);
24555     BasicType bt = Matcher::vector_element_basic_type(this);
24556     int opc = this->ideal_Opcode();
24557     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24558                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24559   %}
24560   ins_pipe( pipe_slow );
24561 %}
24562 
24563 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24564   match(Set dst (RotateLeftV (Binary dst src2) mask));
24565   match(Set dst (RotateRightV (Binary dst src2) mask));
24566   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24567   ins_encode %{
24568     int vlen_enc = vector_length_encoding(this);
24569     BasicType bt = Matcher::vector_element_basic_type(this);
24570     int opc = this->ideal_Opcode();
24571     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24572                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24573   %}
24574   ins_pipe( pipe_slow );
24575 %}
24576 
24577 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24578   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24579   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24580   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24581   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24582   ins_encode %{
24583     int vlen_enc = vector_length_encoding(this);
24584     BasicType bt = Matcher::vector_element_basic_type(this);
24585     int opc = this->ideal_Opcode();
24586     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24587                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24588   %}
24589   ins_pipe( pipe_slow );
24590 %}
24591 
24592 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24593   predicate(!n->as_ShiftV()->is_var_shift());
24594   match(Set dst (LShiftVS (Binary dst src2) mask));
24595   match(Set dst (LShiftVI (Binary dst src2) mask));
24596   match(Set dst (LShiftVL (Binary dst src2) mask));
24597   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24598   ins_encode %{
24599     int vlen_enc = vector_length_encoding(this);
24600     BasicType bt = Matcher::vector_element_basic_type(this);
24601     int opc = this->ideal_Opcode();
24602     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24603                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24604   %}
24605   ins_pipe( pipe_slow );
24606 %}
24607 
24608 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24609   predicate(n->as_ShiftV()->is_var_shift());
24610   match(Set dst (LShiftVS (Binary dst src2) mask));
24611   match(Set dst (LShiftVI (Binary dst src2) mask));
24612   match(Set dst (LShiftVL (Binary dst src2) mask));
24613   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24614   ins_encode %{
24615     int vlen_enc = vector_length_encoding(this);
24616     BasicType bt = Matcher::vector_element_basic_type(this);
24617     int opc = this->ideal_Opcode();
24618     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24619                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24620   %}
24621   ins_pipe( pipe_slow );
24622 %}
24623 
24624 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24625   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24626   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24627   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24628   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24629   ins_encode %{
24630     int vlen_enc = vector_length_encoding(this);
24631     BasicType bt = Matcher::vector_element_basic_type(this);
24632     int opc = this->ideal_Opcode();
24633     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24634                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24635   %}
24636   ins_pipe( pipe_slow );
24637 %}
24638 
24639 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24640   predicate(!n->as_ShiftV()->is_var_shift());
24641   match(Set dst (RShiftVS (Binary dst src2) mask));
24642   match(Set dst (RShiftVI (Binary dst src2) mask));
24643   match(Set dst (RShiftVL (Binary dst src2) mask));
24644   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24645   ins_encode %{
24646     int vlen_enc = vector_length_encoding(this);
24647     BasicType bt = Matcher::vector_element_basic_type(this);
24648     int opc = this->ideal_Opcode();
24649     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24650                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24651   %}
24652   ins_pipe( pipe_slow );
24653 %}
24654 
24655 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24656   predicate(n->as_ShiftV()->is_var_shift());
24657   match(Set dst (RShiftVS (Binary dst src2) mask));
24658   match(Set dst (RShiftVI (Binary dst src2) mask));
24659   match(Set dst (RShiftVL (Binary dst src2) mask));
24660   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24661   ins_encode %{
24662     int vlen_enc = vector_length_encoding(this);
24663     BasicType bt = Matcher::vector_element_basic_type(this);
24664     int opc = this->ideal_Opcode();
24665     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24666                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24667   %}
24668   ins_pipe( pipe_slow );
24669 %}
24670 
24671 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24672   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24673   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24674   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24675   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24676   ins_encode %{
24677     int vlen_enc = vector_length_encoding(this);
24678     BasicType bt = Matcher::vector_element_basic_type(this);
24679     int opc = this->ideal_Opcode();
24680     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24681                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24682   %}
24683   ins_pipe( pipe_slow );
24684 %}
24685 
24686 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24687   predicate(!n->as_ShiftV()->is_var_shift());
24688   match(Set dst (URShiftVS (Binary dst src2) mask));
24689   match(Set dst (URShiftVI (Binary dst src2) mask));
24690   match(Set dst (URShiftVL (Binary dst src2) mask));
24691   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24692   ins_encode %{
24693     int vlen_enc = vector_length_encoding(this);
24694     BasicType bt = Matcher::vector_element_basic_type(this);
24695     int opc = this->ideal_Opcode();
24696     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24697                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24698   %}
24699   ins_pipe( pipe_slow );
24700 %}
24701 
24702 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24703   predicate(n->as_ShiftV()->is_var_shift());
24704   match(Set dst (URShiftVS (Binary dst src2) mask));
24705   match(Set dst (URShiftVI (Binary dst src2) mask));
24706   match(Set dst (URShiftVL (Binary dst src2) mask));
24707   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24708   ins_encode %{
24709     int vlen_enc = vector_length_encoding(this);
24710     BasicType bt = Matcher::vector_element_basic_type(this);
24711     int opc = this->ideal_Opcode();
24712     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24713                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24714   %}
24715   ins_pipe( pipe_slow );
24716 %}
24717 
24718 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24719   match(Set dst (MaxV (Binary dst src2) mask));
24720   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24721   ins_encode %{
24722     int vlen_enc = vector_length_encoding(this);
24723     BasicType bt = Matcher::vector_element_basic_type(this);
24724     int opc = this->ideal_Opcode();
24725     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24726                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24727   %}
24728   ins_pipe( pipe_slow );
24729 %}
24730 
24731 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24732   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24733   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24734   ins_encode %{
24735     int vlen_enc = vector_length_encoding(this);
24736     BasicType bt = Matcher::vector_element_basic_type(this);
24737     int opc = this->ideal_Opcode();
24738     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24739                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24740   %}
24741   ins_pipe( pipe_slow );
24742 %}
24743 
24744 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24745   match(Set dst (MinV (Binary dst src2) mask));
24746   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24747   ins_encode %{
24748     int vlen_enc = vector_length_encoding(this);
24749     BasicType bt = Matcher::vector_element_basic_type(this);
24750     int opc = this->ideal_Opcode();
24751     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24752                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24753   %}
24754   ins_pipe( pipe_slow );
24755 %}
24756 
24757 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24758   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24759   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24760   ins_encode %{
24761     int vlen_enc = vector_length_encoding(this);
24762     BasicType bt = Matcher::vector_element_basic_type(this);
24763     int opc = this->ideal_Opcode();
24764     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24765                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24766   %}
24767   ins_pipe( pipe_slow );
24768 %}
24769 
24770 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24771   match(Set dst (VectorRearrange (Binary dst src2) mask));
24772   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24773   ins_encode %{
24774     int vlen_enc = vector_length_encoding(this);
24775     BasicType bt = Matcher::vector_element_basic_type(this);
24776     int opc = this->ideal_Opcode();
24777     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24778                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24779   %}
24780   ins_pipe( pipe_slow );
24781 %}
24782 
24783 instruct vabs_masked(vec dst, kReg mask) %{
24784   match(Set dst (AbsVB dst mask));
24785   match(Set dst (AbsVS dst mask));
24786   match(Set dst (AbsVI dst mask));
24787   match(Set dst (AbsVL dst mask));
24788   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24789   ins_encode %{
24790     int vlen_enc = vector_length_encoding(this);
24791     BasicType bt = Matcher::vector_element_basic_type(this);
24792     int opc = this->ideal_Opcode();
24793     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24794                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24795   %}
24796   ins_pipe( pipe_slow );
24797 %}
24798 
24799 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24800   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24801   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24802   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24803   ins_encode %{
24804     assert(UseFMA, "Needs FMA instructions support.");
24805     int vlen_enc = vector_length_encoding(this);
24806     BasicType bt = Matcher::vector_element_basic_type(this);
24807     int opc = this->ideal_Opcode();
24808     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24809                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24810   %}
24811   ins_pipe( pipe_slow );
24812 %}
24813 
24814 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24815   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24816   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24817   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24818   ins_encode %{
24819     assert(UseFMA, "Needs FMA instructions support.");
24820     int vlen_enc = vector_length_encoding(this);
24821     BasicType bt = Matcher::vector_element_basic_type(this);
24822     int opc = this->ideal_Opcode();
24823     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24824                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24825   %}
24826   ins_pipe( pipe_slow );
24827 %}
24828 
24829 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24830   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24831   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24832   ins_encode %{
24833     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24834     int vlen_enc = vector_length_encoding(this, $src1);
24835     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24836 
24837     // Comparison i
24838     switch (src1_elem_bt) {
24839       case T_BYTE: {
24840         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24841         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24842         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24843         break;
24844       }
24845       case T_SHORT: {
24846         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24847         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24848         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24849         break;
24850       }
24851       case T_INT: {
24852         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24853         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24854         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24855         break;
24856       }
24857       case T_LONG: {
24858         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24859         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24860         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24861         break;
24862       }
24863       case T_FLOAT: {
24864         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24865         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24866         break;
24867       }
24868       case T_DOUBLE: {
24869         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24870         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24871         break;
24872       }
24873       default: assert(false, "%s", type2name(src1_elem_bt)); break;
24874     }
24875   %}
24876   ins_pipe( pipe_slow );
24877 %}
24878 
24879 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24880   predicate(Matcher::vector_length(n) <= 32);
24881   match(Set dst (MaskAll src));
24882   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24883   ins_encode %{
24884     int mask_len = Matcher::vector_length(this);
24885     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24886   %}
24887   ins_pipe( pipe_slow );
24888 %}
24889 
24890 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24891   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24892   match(Set dst (XorVMask src (MaskAll cnt)));
24893   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24894   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24895   ins_encode %{
24896     uint masklen = Matcher::vector_length(this);
24897     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24898   %}
24899   ins_pipe( pipe_slow );
24900 %}
24901 
24902 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24903   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24904             (Matcher::vector_length(n) == 16) ||
24905             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24906   match(Set dst (XorVMask src (MaskAll cnt)));
24907   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24908   ins_encode %{
24909     uint masklen = Matcher::vector_length(this);
24910     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24911   %}
24912   ins_pipe( pipe_slow );
24913 %}
24914 
24915 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24916   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24917   match(Set dst (VectorLongToMask src));
24918   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24919   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24920   ins_encode %{
24921     int mask_len = Matcher::vector_length(this);
24922     int vec_enc  = vector_length_encoding(mask_len);
24923     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24924                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24925   %}
24926   ins_pipe( pipe_slow );
24927 %}
24928 
24929 
24930 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24931   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24932   match(Set dst (VectorLongToMask src));
24933   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24934   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24935   ins_encode %{
24936     int mask_len = Matcher::vector_length(this);
24937     assert(mask_len <= 32, "invalid mask length");
24938     int vec_enc  = vector_length_encoding(mask_len);
24939     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24940                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24941   %}
24942   ins_pipe( pipe_slow );
24943 %}
24944 
24945 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24946   predicate(n->bottom_type()->isa_vectmask());
24947   match(Set dst (VectorLongToMask src));
24948   format %{ "long_to_mask_evex $dst, $src\t!" %}
24949   ins_encode %{
24950     __ kmov($dst$$KRegister, $src$$Register);
24951   %}
24952   ins_pipe( pipe_slow );
24953 %}
24954 
24955 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24956   match(Set dst (AndVMask src1 src2));
24957   match(Set dst (OrVMask src1 src2));
24958   match(Set dst (XorVMask src1 src2));
24959   effect(TEMP kscratch);
24960   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24961   ins_encode %{
24962     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24963     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24964     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24965     uint masklen = Matcher::vector_length(this);
24966     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24967     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24968   %}
24969   ins_pipe( pipe_slow );
24970 %}
24971 
24972 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24973   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24974   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24975   ins_encode %{
24976     int vlen_enc = vector_length_encoding(this);
24977     BasicType bt = Matcher::vector_element_basic_type(this);
24978     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24979                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24980   %}
24981   ins_pipe( pipe_slow );
24982 %}
24983 
24984 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24985   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24986   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24987   ins_encode %{
24988     int vlen_enc = vector_length_encoding(this);
24989     BasicType bt = Matcher::vector_element_basic_type(this);
24990     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24991                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24992   %}
24993   ins_pipe( pipe_slow );
24994 %}
24995 
24996 instruct castMM(kReg dst)
24997 %{
24998   match(Set dst (CastVV dst));
24999 
25000   size(0);
25001   format %{ "# castVV of $dst" %}
25002   ins_encode(/* empty encoding */);
25003   ins_cost(0);
25004   ins_pipe(empty);
25005 %}
25006 
25007 instruct castVV(vec dst)
25008 %{
25009   match(Set dst (CastVV dst));
25010 
25011   size(0);
25012   format %{ "# castVV of $dst" %}
25013   ins_encode(/* empty encoding */);
25014   ins_cost(0);
25015   ins_pipe(empty);
25016 %}
25017 
25018 instruct castVVLeg(legVec dst)
25019 %{
25020   match(Set dst (CastVV dst));
25021 
25022   size(0);
25023   format %{ "# castVV of $dst" %}
25024   ins_encode(/* empty encoding */);
25025   ins_cost(0);
25026   ins_pipe(empty);
25027 %}
25028 
25029 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25030 %{
25031   match(Set dst (IsInfiniteF src));
25032   effect(TEMP ktmp, KILL cr);
25033   format %{ "float_class_check $dst, $src" %}
25034   ins_encode %{
25035     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25036     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25037   %}
25038   ins_pipe(pipe_slow);
25039 %}
25040 
25041 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25042 %{
25043   match(Set dst (IsInfiniteD src));
25044   effect(TEMP ktmp, KILL cr);
25045   format %{ "double_class_check $dst, $src" %}
25046   ins_encode %{
25047     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25048     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25049   %}
25050   ins_pipe(pipe_slow);
25051 %}
25052 
25053 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25054 %{
25055   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25056             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25057   match(Set dst (SaturatingAddV src1 src2));
25058   match(Set dst (SaturatingSubV src1 src2));
25059   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25060   ins_encode %{
25061     int vlen_enc = vector_length_encoding(this);
25062     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25063     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25064                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25065   %}
25066   ins_pipe(pipe_slow);
25067 %}
25068 
25069 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25070 %{
25071   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25072             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25073   match(Set dst (SaturatingAddV src1 src2));
25074   match(Set dst (SaturatingSubV src1 src2));
25075   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25076   ins_encode %{
25077     int vlen_enc = vector_length_encoding(this);
25078     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25079     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25080                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25081   %}
25082   ins_pipe(pipe_slow);
25083 %}
25084 
25085 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25086 %{
25087   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25088             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25089             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25090   match(Set dst (SaturatingAddV src1 src2));
25091   match(Set dst (SaturatingSubV src1 src2));
25092   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25093   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25094   ins_encode %{
25095     int vlen_enc = vector_length_encoding(this);
25096     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25097     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25098                                         $src1$$XMMRegister, $src2$$XMMRegister,
25099                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25100                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25101   %}
25102   ins_pipe(pipe_slow);
25103 %}
25104 
25105 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25106 %{
25107   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25108             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25109             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25110   match(Set dst (SaturatingAddV src1 src2));
25111   match(Set dst (SaturatingSubV src1 src2));
25112   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25113   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25114   ins_encode %{
25115     int vlen_enc = vector_length_encoding(this);
25116     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25117     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25118                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25119                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25120   %}
25121   ins_pipe(pipe_slow);
25122 %}
25123 
25124 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25125 %{
25126   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25127             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25128             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25129   match(Set dst (SaturatingAddV src1 src2));
25130   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25131   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25132   ins_encode %{
25133     int vlen_enc = vector_length_encoding(this);
25134     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25135     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25136                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25137   %}
25138   ins_pipe(pipe_slow);
25139 %}
25140 
25141 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25142 %{
25143   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25144             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25145             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25146   match(Set dst (SaturatingAddV src1 src2));
25147   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25148   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25149   ins_encode %{
25150     int vlen_enc = vector_length_encoding(this);
25151     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25152     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25153                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25154   %}
25155   ins_pipe(pipe_slow);
25156 %}
25157 
25158 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25159 %{
25160   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25161             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25162             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25163   match(Set dst (SaturatingSubV src1 src2));
25164   effect(TEMP ktmp);
25165   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25166   ins_encode %{
25167     int vlen_enc = vector_length_encoding(this);
25168     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25169     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25170                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25171   %}
25172   ins_pipe(pipe_slow);
25173 %}
25174 
25175 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25176 %{
25177   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25178             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25179             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25180   match(Set dst (SaturatingSubV src1 src2));
25181   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25182   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25183   ins_encode %{
25184     int vlen_enc = vector_length_encoding(this);
25185     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25186     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25187                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25188   %}
25189   ins_pipe(pipe_slow);
25190 %}
25191 
25192 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25193 %{
25194   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25195             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25196   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25197   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25198   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25199   ins_encode %{
25200     int vlen_enc = vector_length_encoding(this);
25201     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25202     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25203                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25204   %}
25205   ins_pipe(pipe_slow);
25206 %}
25207 
25208 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25209 %{
25210   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25211             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25212   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25213   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25214   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25215   ins_encode %{
25216     int vlen_enc = vector_length_encoding(this);
25217     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25218     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25219                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25220   %}
25221   ins_pipe(pipe_slow);
25222 %}
25223 
25224 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25225   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25226             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25227   match(Set dst (SaturatingAddV (Binary dst src) mask));
25228   match(Set dst (SaturatingSubV (Binary dst src) mask));
25229   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25230   ins_encode %{
25231     int vlen_enc = vector_length_encoding(this);
25232     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25233     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25234                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25235   %}
25236   ins_pipe( pipe_slow );
25237 %}
25238 
25239 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25240   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25241             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25242   match(Set dst (SaturatingAddV (Binary dst src) mask));
25243   match(Set dst (SaturatingSubV (Binary dst src) mask));
25244   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25245   ins_encode %{
25246     int vlen_enc = vector_length_encoding(this);
25247     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25248     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25249                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25250   %}
25251   ins_pipe( pipe_slow );
25252 %}
25253 
25254 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25255   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25256             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25257   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25258   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25259   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25260   ins_encode %{
25261     int vlen_enc = vector_length_encoding(this);
25262     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25263     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25264                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25265   %}
25266   ins_pipe( pipe_slow );
25267 %}
25268 
25269 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25270   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25271             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25272   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25273   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25274   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25275   ins_encode %{
25276     int vlen_enc = vector_length_encoding(this);
25277     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25278     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25279                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25280   %}
25281   ins_pipe( pipe_slow );
25282 %}
25283 
25284 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25285 %{
25286   match(Set index (SelectFromTwoVector (Binary index src1) src2));
25287   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25288   ins_encode %{
25289     int vlen_enc = vector_length_encoding(this);
25290     BasicType bt = Matcher::vector_element_basic_type(this);
25291     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25292   %}
25293   ins_pipe(pipe_slow);
25294 %}
25295 
25296 instruct reinterpretS2HF(regF dst, rRegI src)
25297 %{
25298   match(Set dst (ReinterpretS2HF src));
25299   format %{ "vmovw $dst, $src" %}
25300   ins_encode %{
25301     __ vmovw($dst$$XMMRegister, $src$$Register);
25302   %}
25303   ins_pipe(pipe_slow);
25304 %}
25305 
25306 instruct reinterpretHF2S(rRegI dst, regF src)
25307 %{
25308   match(Set dst (ReinterpretHF2S src));
25309   format %{ "vmovw $dst, $src" %}
25310   ins_encode %{
25311     __ vmovw($dst$$Register, $src$$XMMRegister);
25312   %}
25313   ins_pipe(pipe_slow);
25314 %}
25315 
25316 instruct convF2HFAndS2HF(regF dst, regF src)
25317 %{
25318   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25319   format %{ "convF2HFAndS2HF $dst, $src" %}
25320   ins_encode %{
25321     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25322   %}
25323   ins_pipe(pipe_slow);
25324 %}
25325 
25326 instruct convHF2SAndHF2F(regF dst, regF src)
25327 %{
25328   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25329   format %{ "convHF2SAndHF2F $dst, $src" %}
25330   ins_encode %{
25331     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25332   %}
25333   ins_pipe(pipe_slow);
25334 %}
25335 
25336 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25337 %{
25338   match(Set dst (SqrtHF src));
25339   format %{ "scalar_sqrt_fp16 $dst, $src" %}
25340   ins_encode %{
25341     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25342   %}
25343   ins_pipe(pipe_slow);
25344 %}
25345 
25346 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25347 %{
25348   match(Set dst (AddHF src1 src2));
25349   match(Set dst (DivHF src1 src2));
25350   match(Set dst (MulHF src1 src2));
25351   match(Set dst (SubHF src1 src2));
25352   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25353   ins_encode %{
25354     int opcode = this->ideal_Opcode();
25355     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25356   %}
25357   ins_pipe(pipe_slow);
25358 %}
25359 
25360 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25361 %{
25362   predicate(VM_Version::supports_avx10_2());
25363   match(Set dst (MaxHF src1 src2));
25364   match(Set dst (MinHF src1 src2));
25365   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25366   ins_encode %{
25367     int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25368     __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25369   %}
25370   ins_pipe( pipe_slow );
25371 %}
25372 
25373 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25374 %{
25375   predicate(!VM_Version::supports_avx10_2());
25376   match(Set dst (MaxHF src1 src2));
25377   match(Set dst (MinHF src1 src2));
25378   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25379   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25380   ins_encode %{
25381     int opcode = this->ideal_Opcode();
25382     __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25383                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25384   %}
25385   ins_pipe( pipe_slow );
25386 %}
25387 
25388 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25389 %{
25390   match(Set dst (FmaHF  src2 (Binary dst src1)));
25391   effect(DEF dst);
25392   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25393   ins_encode %{
25394     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25395   %}
25396   ins_pipe( pipe_slow );
25397 %}
25398 
25399 
25400 instruct vector_sqrt_HF_reg(vec dst, vec src)
25401 %{
25402   match(Set dst (SqrtVHF src));
25403   format %{ "vector_sqrt_fp16 $dst, $src" %}
25404   ins_encode %{
25405     int vlen_enc = vector_length_encoding(this);
25406     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25407   %}
25408   ins_pipe(pipe_slow);
25409 %}
25410 
25411 instruct vector_sqrt_HF_mem(vec dst, memory src)
25412 %{
25413   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25414   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25415   ins_encode %{
25416     int vlen_enc = vector_length_encoding(this);
25417     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25418   %}
25419   ins_pipe(pipe_slow);
25420 %}
25421 
25422 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25423 %{
25424   match(Set dst (AddVHF src1 src2));
25425   match(Set dst (DivVHF src1 src2));
25426   match(Set dst (MulVHF src1 src2));
25427   match(Set dst (SubVHF src1 src2));
25428   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25429   ins_encode %{
25430     int vlen_enc = vector_length_encoding(this);
25431     int opcode = this->ideal_Opcode();
25432     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25433   %}
25434   ins_pipe(pipe_slow);
25435 %}
25436 
25437 
25438 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25439 %{
25440   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25441   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25442   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25443   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25444   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25445   ins_encode %{
25446     int vlen_enc = vector_length_encoding(this);
25447     int opcode = this->ideal_Opcode();
25448     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25449   %}
25450   ins_pipe(pipe_slow);
25451 %}
25452 
25453 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25454 %{
25455   match(Set dst (FmaVHF src2 (Binary dst src1)));
25456   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25457   ins_encode %{
25458     int vlen_enc = vector_length_encoding(this);
25459     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25460   %}
25461   ins_pipe( pipe_slow );
25462 %}
25463 
25464 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25465 %{
25466   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25467   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25468   ins_encode %{
25469     int vlen_enc = vector_length_encoding(this);
25470     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25471   %}
25472   ins_pipe( pipe_slow );
25473 %}
25474 
25475 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25476 %{
25477   predicate(VM_Version::supports_avx10_2());
25478   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25479   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25480   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25481   ins_encode %{
25482     int vlen_enc = vector_length_encoding(this);
25483     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25484     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25485   %}
25486   ins_pipe( pipe_slow );
25487 %}
25488 
25489 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25490 %{
25491   predicate(VM_Version::supports_avx10_2());
25492   match(Set dst (MinVHF src1 src2));
25493   match(Set dst (MaxVHF src1 src2));
25494   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25495   ins_encode %{
25496     int vlen_enc = vector_length_encoding(this);
25497     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25498     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25499   %}
25500   ins_pipe( pipe_slow );
25501 %}
25502 
25503 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25504 %{
25505   predicate(!VM_Version::supports_avx10_2());
25506   match(Set dst (MinVHF src1 src2));
25507   match(Set dst (MaxVHF src1 src2));
25508   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25509   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25510   ins_encode %{
25511     int vlen_enc = vector_length_encoding(this);
25512     int opcode = this->ideal_Opcode();
25513     __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25514                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25515   %}
25516   ins_pipe( pipe_slow );
25517 %}
25518 
25519 //----------PEEPHOLE RULES-----------------------------------------------------
25520 // These must follow all instruction definitions as they use the names
25521 // defined in the instructions definitions.
25522 //
25523 // peeppredicate ( rule_predicate );
25524 // // the predicate unless which the peephole rule will be ignored
25525 //
25526 // peepmatch ( root_instr_name [preceding_instruction]* );
25527 //
25528 // peepprocedure ( procedure_name );
25529 // // provide a procedure name to perform the optimization, the procedure should
25530 // // reside in the architecture dependent peephole file, the method has the
25531 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25532 // // with the arguments being the basic block, the current node index inside the
25533 // // block, the register allocator, the functions upon invoked return a new node
25534 // // defined in peepreplace, and the rules of the nodes appearing in the
25535 // // corresponding peepmatch, the function return true if successful, else
25536 // // return false
25537 //
25538 // peepconstraint %{
25539 // (instruction_number.operand_name relational_op instruction_number.operand_name
25540 //  [, ...] );
25541 // // instruction numbers are zero-based using left to right order in peepmatch
25542 //
25543 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25544 // // provide an instruction_number.operand_name for each operand that appears
25545 // // in the replacement instruction's match rule
25546 //
25547 // ---------VM FLAGS---------------------------------------------------------
25548 //
25549 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25550 //
25551 // Each peephole rule is given an identifying number starting with zero and
25552 // increasing by one in the order seen by the parser.  An individual peephole
25553 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25554 // on the command-line.
25555 //
25556 // ---------CURRENT LIMITATIONS----------------------------------------------
25557 //
25558 // Only transformations inside a basic block (do we need more for peephole)
25559 //
25560 // ---------EXAMPLE----------------------------------------------------------
25561 //
25562 // // pertinent parts of existing instructions in architecture description
25563 // instruct movI(rRegI dst, rRegI src)
25564 // %{
25565 //   match(Set dst (CopyI src));
25566 // %}
25567 //
25568 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25569 // %{
25570 //   match(Set dst (AddI dst src));
25571 //   effect(KILL cr);
25572 // %}
25573 //
25574 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25575 // %{
25576 //   match(Set dst (AddI dst src));
25577 // %}
25578 //
25579 // 1. Simple replacement
25580 // - Only match adjacent instructions in same basic block
25581 // - Only equality constraints
25582 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25583 // - Only one replacement instruction
25584 //
25585 // // Change (inc mov) to lea
25586 // peephole %{
25587 //   // lea should only be emitted when beneficial
25588 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25589 //   // increment preceded by register-register move
25590 //   peepmatch ( incI_rReg movI );
25591 //   // require that the destination register of the increment
25592 //   // match the destination register of the move
25593 //   peepconstraint ( 0.dst == 1.dst );
25594 //   // construct a replacement instruction that sets
25595 //   // the destination to ( move's source register + one )
25596 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25597 // %}
25598 //
25599 // 2. Procedural replacement
25600 // - More flexible finding relevent nodes
25601 // - More flexible constraints
25602 // - More flexible transformations
25603 // - May utilise architecture-dependent API more effectively
25604 // - Currently only one replacement instruction due to adlc parsing capabilities
25605 //
25606 // // Change (inc mov) to lea
25607 // peephole %{
25608 //   // lea should only be emitted when beneficial
25609 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25610 //   // the rule numbers of these nodes inside are passed into the function below
25611 //   peepmatch ( incI_rReg movI );
25612 //   // the method that takes the responsibility of transformation
25613 //   peepprocedure ( inc_mov_to_lea );
25614 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25615 //   // node is passed into the function above
25616 //   peepreplace ( leaI_rReg_immI() );
25617 // %}
25618 
25619 // These instructions is not matched by the matcher but used by the peephole
25620 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25621 %{
25622   predicate(false);
25623   match(Set dst (AddI src1 src2));
25624   format %{ "leal    $dst, [$src1 + $src2]" %}
25625   ins_encode %{
25626     Register dst = $dst$$Register;
25627     Register src1 = $src1$$Register;
25628     Register src2 = $src2$$Register;
25629     if (src1 != rbp && src1 != r13) {
25630       __ leal(dst, Address(src1, src2, Address::times_1));
25631     } else {
25632       assert(src2 != rbp && src2 != r13, "");
25633       __ leal(dst, Address(src2, src1, Address::times_1));
25634     }
25635   %}
25636   ins_pipe(ialu_reg_reg);
25637 %}
25638 
25639 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25640 %{
25641   predicate(false);
25642   match(Set dst (AddI src1 src2));
25643   format %{ "leal    $dst, [$src1 + $src2]" %}
25644   ins_encode %{
25645     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25646   %}
25647   ins_pipe(ialu_reg_reg);
25648 %}
25649 
25650 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25651 %{
25652   predicate(false);
25653   match(Set dst (LShiftI src shift));
25654   format %{ "leal    $dst, [$src << $shift]" %}
25655   ins_encode %{
25656     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25657     Register src = $src$$Register;
25658     if (scale == Address::times_2 && src != rbp && src != r13) {
25659       __ leal($dst$$Register, Address(src, src, Address::times_1));
25660     } else {
25661       __ leal($dst$$Register, Address(noreg, src, scale));
25662     }
25663   %}
25664   ins_pipe(ialu_reg_reg);
25665 %}
25666 
25667 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25668 %{
25669   predicate(false);
25670   match(Set dst (AddL src1 src2));
25671   format %{ "leaq    $dst, [$src1 + $src2]" %}
25672   ins_encode %{
25673     Register dst = $dst$$Register;
25674     Register src1 = $src1$$Register;
25675     Register src2 = $src2$$Register;
25676     if (src1 != rbp && src1 != r13) {
25677       __ leaq(dst, Address(src1, src2, Address::times_1));
25678     } else {
25679       assert(src2 != rbp && src2 != r13, "");
25680       __ leaq(dst, Address(src2, src1, Address::times_1));
25681     }
25682   %}
25683   ins_pipe(ialu_reg_reg);
25684 %}
25685 
25686 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25687 %{
25688   predicate(false);
25689   match(Set dst (AddL src1 src2));
25690   format %{ "leaq    $dst, [$src1 + $src2]" %}
25691   ins_encode %{
25692     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25693   %}
25694   ins_pipe(ialu_reg_reg);
25695 %}
25696 
25697 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25698 %{
25699   predicate(false);
25700   match(Set dst (LShiftL src shift));
25701   format %{ "leaq    $dst, [$src << $shift]" %}
25702   ins_encode %{
25703     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25704     Register src = $src$$Register;
25705     if (scale == Address::times_2 && src != rbp && src != r13) {
25706       __ leaq($dst$$Register, Address(src, src, Address::times_1));
25707     } else {
25708       __ leaq($dst$$Register, Address(noreg, src, scale));
25709     }
25710   %}
25711   ins_pipe(ialu_reg_reg);
25712 %}
25713 
25714 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25715 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25716 // processors with at least partial ALU support for lea
25717 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25718 // beneficial for processors with full ALU support
25719 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25720 
25721 peephole
25722 %{
25723   peeppredicate(VM_Version::supports_fast_2op_lea());
25724   peepmatch (addI_rReg);
25725   peepprocedure (lea_coalesce_reg);
25726   peepreplace (leaI_rReg_rReg_peep());
25727 %}
25728 
25729 peephole
25730 %{
25731   peeppredicate(VM_Version::supports_fast_2op_lea());
25732   peepmatch (addI_rReg_imm);
25733   peepprocedure (lea_coalesce_imm);
25734   peepreplace (leaI_rReg_immI_peep());
25735 %}
25736 
25737 peephole
25738 %{
25739   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25740                 VM_Version::is_intel_cascade_lake());
25741   peepmatch (incI_rReg);
25742   peepprocedure (lea_coalesce_imm);
25743   peepreplace (leaI_rReg_immI_peep());
25744 %}
25745 
25746 peephole
25747 %{
25748   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25749                 VM_Version::is_intel_cascade_lake());
25750   peepmatch (decI_rReg);
25751   peepprocedure (lea_coalesce_imm);
25752   peepreplace (leaI_rReg_immI_peep());
25753 %}
25754 
25755 peephole
25756 %{
25757   peeppredicate(VM_Version::supports_fast_2op_lea());
25758   peepmatch (salI_rReg_immI2);
25759   peepprocedure (lea_coalesce_imm);
25760   peepreplace (leaI_rReg_immI2_peep());
25761 %}
25762 
25763 peephole
25764 %{
25765   peeppredicate(VM_Version::supports_fast_2op_lea());
25766   peepmatch (addL_rReg);
25767   peepprocedure (lea_coalesce_reg);
25768   peepreplace (leaL_rReg_rReg_peep());
25769 %}
25770 
25771 peephole
25772 %{
25773   peeppredicate(VM_Version::supports_fast_2op_lea());
25774   peepmatch (addL_rReg_imm);
25775   peepprocedure (lea_coalesce_imm);
25776   peepreplace (leaL_rReg_immL32_peep());
25777 %}
25778 
25779 peephole
25780 %{
25781   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25782                 VM_Version::is_intel_cascade_lake());
25783   peepmatch (incL_rReg);
25784   peepprocedure (lea_coalesce_imm);
25785   peepreplace (leaL_rReg_immL32_peep());
25786 %}
25787 
25788 peephole
25789 %{
25790   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25791                 VM_Version::is_intel_cascade_lake());
25792   peepmatch (decL_rReg);
25793   peepprocedure (lea_coalesce_imm);
25794   peepreplace (leaL_rReg_immL32_peep());
25795 %}
25796 
25797 peephole
25798 %{
25799   peeppredicate(VM_Version::supports_fast_2op_lea());
25800   peepmatch (salL_rReg_immI2);
25801   peepprocedure (lea_coalesce_imm);
25802   peepreplace (leaL_rReg_immI2_peep());
25803 %}
25804 
25805 peephole
25806 %{
25807   peepmatch (leaPCompressedOopOffset);
25808   peepprocedure (lea_remove_redundant);
25809 %}
25810 
25811 peephole
25812 %{
25813   peepmatch (leaP8Narrow);
25814   peepprocedure (lea_remove_redundant);
25815 %}
25816 
25817 peephole
25818 %{
25819   peepmatch (leaP32Narrow);
25820   peepprocedure (lea_remove_redundant);
25821 %}
25822 
25823 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25824 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25825 
25826 //int variant
25827 peephole
25828 %{
25829   peepmatch (testI_reg);
25830   peepprocedure (test_may_remove);
25831 %}
25832 
25833 //long variant
25834 peephole
25835 %{
25836   peepmatch (testL_reg);
25837   peepprocedure (test_may_remove);
25838 %}
25839 
25840 
25841 //----------SMARTSPILL RULES---------------------------------------------------
25842 // These must follow all instruction definitions as they use the names
25843 // defined in the instructions definitions.