1 //
    2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }
 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;
 1678 }
 1679 
 1680 // This could be in MacroAssembler but it's fairly C2 specific
 1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1682   Label exit;
 1683   __ jccb(Assembler::noParity, exit);
 1684   __ pushf();
 1685   //
 1686   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1687   // zero OF,AF,SF for NaN values.
 1688   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1689   // values returns 'less than' result (CF is set).
 1690   // Leave the rest of flags unchanged.
 1691   //
 1692   //    7 6 5 4 3 2 1 0
 1693   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1694   //    0 0 1 0 1 0 1 1   (0x2B)
 1695   //
 1696   __ andq(Address(rsp, 0), 0xffffff2b);
 1697   __ popf();
 1698   __ bind(exit);
 1699 }
 1700 
 1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1702   Label done;
 1703   __ movl(dst, -1);
 1704   __ jcc(Assembler::parity, done);
 1705   __ jcc(Assembler::below, done);
 1706   __ setcc(Assembler::notEqual, dst);
 1707   __ bind(done);
 1708 }
 1709 
 1710 // Math.min()    # Math.max()
 1711 // --------------------------
 1712 // ucomis[s/d]   #
 1713 // ja   -> b     # a
 1714 // jp   -> NaN   # NaN
 1715 // jb   -> a     # b
 1716 // je            #
 1717 // |-jz -> a | b # a & b
 1718 // |    -> a     #
 1719 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1720                             XMMRegister a, XMMRegister b,
 1721                             XMMRegister xmmt, Register rt,
 1722                             bool min, bool single) {
 1723 
 1724   Label nan, zero, below, above, done;
 1725 
 1726   if (single)
 1727     __ ucomiss(a, b);
 1728   else
 1729     __ ucomisd(a, b);
 1730 
 1731   if (dst->encoding() != (min ? b : a)->encoding())
 1732     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1733   else
 1734     __ jccb(Assembler::above, done);
 1735 
 1736   __ jccb(Assembler::parity, nan);  // PF=1
 1737   __ jccb(Assembler::below, below); // CF=1
 1738 
 1739   // equal
 1740   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
 1741   if (single) {
 1742     __ ucomiss(a, xmmt);
 1743     __ jccb(Assembler::equal, zero);
 1744 
 1745     __ movflt(dst, a);
 1746     __ jmp(done);
 1747   }
 1748   else {
 1749     __ ucomisd(a, xmmt);
 1750     __ jccb(Assembler::equal, zero);
 1751 
 1752     __ movdbl(dst, a);
 1753     __ jmp(done);
 1754   }
 1755 
 1756   __ bind(zero);
 1757   if (min)
 1758     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1759   else
 1760     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1761 
 1762   __ jmp(done);
 1763 
 1764   __ bind(above);
 1765   if (single)
 1766     __ movflt(dst, min ? b : a);
 1767   else
 1768     __ movdbl(dst, min ? b : a);
 1769 
 1770   __ jmp(done);
 1771 
 1772   __ bind(nan);
 1773   if (single) {
 1774     __ movl(rt, 0x7fc00000); // Float.NaN
 1775     __ movdl(dst, rt);
 1776   }
 1777   else {
 1778     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1779     __ movdq(dst, rt);
 1780   }
 1781   __ jmp(done);
 1782 
 1783   __ bind(below);
 1784   if (single)
 1785     __ movflt(dst, min ? a : b);
 1786   else
 1787     __ movdbl(dst, min ? a : b);
 1788 
 1789   __ bind(done);
 1790 }
 1791 
 1792 //=============================================================================
 1793 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1794 
 1795 int ConstantTable::calculate_table_base_offset() const {
 1796   return 0;  // absolute addressing, no offset
 1797 }
 1798 
 1799 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1800 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1801   ShouldNotReachHere();
 1802 }
 1803 
 1804 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1805   // Empty encoding
 1806 }
 1807 
 1808 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1809   return 0;
 1810 }
 1811 
 1812 #ifndef PRODUCT
 1813 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1814   st->print("# MachConstantBaseNode (empty encoding)");
 1815 }
 1816 #endif
 1817 
 1818 
 1819 //=============================================================================
 1820 #ifndef PRODUCT
 1821 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1822   Compile* C = ra_->C;
 1823 
 1824   int framesize = C->output()->frame_size_in_bytes();
 1825   int bangsize = C->output()->bang_size_in_bytes();
 1826   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1827   // Remove wordSize for return addr which is already pushed.
 1828   framesize -= wordSize;
 1829 
 1830   if (C->output()->need_stack_bang(bangsize)) {
 1831     framesize -= wordSize;
 1832     st->print("# stack bang (%d bytes)", bangsize);
 1833     st->print("\n\t");
 1834     st->print("pushq   rbp\t# Save rbp");
 1835     if (PreserveFramePointer) {
 1836         st->print("\n\t");
 1837         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1838     }
 1839     if (framesize) {
 1840       st->print("\n\t");
 1841       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1842     }
 1843   } else {
 1844     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1845     st->print("\n\t");
 1846     framesize -= wordSize;
 1847     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1848     if (PreserveFramePointer) {
 1849       st->print("\n\t");
 1850       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1851       if (framesize > 0) {
 1852         st->print("\n\t");
 1853         st->print("addq    rbp, #%d", framesize);
 1854       }
 1855     }
 1856   }
 1857 
 1858   if (VerifyStackAtCalls) {
 1859     st->print("\n\t");
 1860     framesize -= wordSize;
 1861     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1862 #ifdef ASSERT
 1863     st->print("\n\t");
 1864     st->print("# stack alignment check");
 1865 #endif
 1866   }
 1867   if (C->stub_function() != nullptr) {
 1868     st->print("\n\t");
 1869     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1870     st->print("\n\t");
 1871     st->print("je      fast_entry\t");
 1872     st->print("\n\t");
 1873     st->print("call    #nmethod_entry_barrier_stub\t");
 1874     st->print("\n\tfast_entry:");
 1875   }
 1876   st->cr();
 1877 }
 1878 #endif
 1879 
 1880 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1881   Compile* C = ra_->C;
 1882 
 1883   int framesize = C->output()->frame_size_in_bytes();
 1884   int bangsize = C->output()->bang_size_in_bytes();
 1885 
 1886   if (C->clinit_barrier_on_entry()) {
 1887     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1888     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1889 
 1890     Label L_skip_barrier;
 1891     Register klass = rscratch1;
 1892 
 1893     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1894     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1895 
 1896     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1897 
 1898     __ bind(L_skip_barrier);
 1899   }
 1900 
 1901   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
 1902 
 1903   C->output()->set_frame_complete(__ offset());
 1904 
 1905   if (C->has_mach_constant_base_node()) {
 1906     // NOTE: We set the table base offset here because users might be
 1907     // emitted before MachConstantBaseNode.
 1908     ConstantTable& constant_table = C->output()->constant_table();
 1909     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1910   }
 1911 }
 1912 
 1913 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1914 {
 1915   return MachNode::size(ra_); // too many variables; just compute it
 1916                               // the hard way
 1917 }
 1918 
 1919 int MachPrologNode::reloc() const
 1920 {
 1921   return 0; // a large enough number
 1922 }
 1923 
 1924 //=============================================================================
 1925 #ifndef PRODUCT
 1926 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1927 {
 1928   Compile* C = ra_->C;
 1929   if (generate_vzeroupper(C)) {
 1930     st->print("vzeroupper");
 1931     st->cr(); st->print("\t");
 1932   }
 1933 
 1934   int framesize = C->output()->frame_size_in_bytes();
 1935   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1936   // Remove word for return adr already pushed
 1937   // and RBP
 1938   framesize -= 2*wordSize;
 1939 
 1940   if (framesize) {
 1941     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1942     st->print("\t");
 1943   }
 1944 
 1945   st->print_cr("popq    rbp");
 1946   if (do_polling() && C->is_method_compilation()) {
 1947     st->print("\t");
 1948     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1949                  "ja      #safepoint_stub\t"
 1950                  "# Safepoint: poll for GC");
 1951   }
 1952 }
 1953 #endif
 1954 
 1955 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1956 {
 1957   Compile* C = ra_->C;
 1958 
 1959   if (generate_vzeroupper(C)) {
 1960     // Clear upper bits of YMM registers when current compiled code uses
 1961     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1962     __ vzeroupper();
 1963   }
 1964 
 1965   int framesize = C->output()->frame_size_in_bytes();
 1966   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1967   // Remove word for return adr already pushed
 1968   // and RBP
 1969   framesize -= 2*wordSize;
 1970 
 1971   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1972 
 1973   if (framesize) {
 1974     __ addq(rsp, framesize);
 1975   }
 1976 
 1977   __ popq(rbp);
 1978 
 1979   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1980     __ reserved_stack_check();
 1981   }
 1982 
 1983   if (do_polling() && C->is_method_compilation()) {
 1984     Label dummy_label;
 1985     Label* code_stub = &dummy_label;
 1986     if (!C->output()->in_scratch_emit_size()) {
 1987       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1988       C->output()->add_stub(stub);
 1989       code_stub = &stub->entry();
 1990     }
 1991     __ relocate(relocInfo::poll_return_type);
 1992     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 1993   }
 1994 }
 1995 
 1996 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 1997 {
 1998   return MachNode::size(ra_); // too many variables; just compute it
 1999                               // the hard way
 2000 }
 2001 
 2002 int MachEpilogNode::reloc() const
 2003 {
 2004   return 2; // a large enough number
 2005 }
 2006 
 2007 const Pipeline* MachEpilogNode::pipeline() const
 2008 {
 2009   return MachNode::pipeline_class();
 2010 }
 2011 
 2012 //=============================================================================
 2013 
 2014 enum RC {
 2015   rc_bad,
 2016   rc_int,
 2017   rc_kreg,
 2018   rc_float,
 2019   rc_stack
 2020 };
 2021 
 2022 static enum RC rc_class(OptoReg::Name reg)
 2023 {
 2024   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 2025 
 2026   if (OptoReg::is_stack(reg)) return rc_stack;
 2027 
 2028   VMReg r = OptoReg::as_VMReg(reg);
 2029 
 2030   if (r->is_Register()) return rc_int;
 2031 
 2032   if (r->is_KRegister()) return rc_kreg;
 2033 
 2034   assert(r->is_XMMRegister(), "must be");
 2035   return rc_float;
 2036 }
 2037 
 2038 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2039 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2040                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2041 
 2042 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2043                      int stack_offset, int reg, uint ireg, outputStream* st);
 2044 
 2045 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2046                                       int dst_offset, uint ireg, outputStream* st) {
 2047   if (masm) {
 2048     switch (ireg) {
 2049     case Op_VecS:
 2050       __ movq(Address(rsp, -8), rax);
 2051       __ movl(rax, Address(rsp, src_offset));
 2052       __ movl(Address(rsp, dst_offset), rax);
 2053       __ movq(rax, Address(rsp, -8));
 2054       break;
 2055     case Op_VecD:
 2056       __ pushq(Address(rsp, src_offset));
 2057       __ popq (Address(rsp, dst_offset));
 2058       break;
 2059     case Op_VecX:
 2060       __ pushq(Address(rsp, src_offset));
 2061       __ popq (Address(rsp, dst_offset));
 2062       __ pushq(Address(rsp, src_offset+8));
 2063       __ popq (Address(rsp, dst_offset+8));
 2064       break;
 2065     case Op_VecY:
 2066       __ vmovdqu(Address(rsp, -32), xmm0);
 2067       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2068       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2069       __ vmovdqu(xmm0, Address(rsp, -32));
 2070       break;
 2071     case Op_VecZ:
 2072       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2073       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2074       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2075       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2076       break;
 2077     default:
 2078       ShouldNotReachHere();
 2079     }
 2080 #ifndef PRODUCT
 2081   } else {
 2082     switch (ireg) {
 2083     case Op_VecS:
 2084       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2085                 "movl    rax, [rsp + #%d]\n\t"
 2086                 "movl    [rsp + #%d], rax\n\t"
 2087                 "movq    rax, [rsp - #8]",
 2088                 src_offset, dst_offset);
 2089       break;
 2090     case Op_VecD:
 2091       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2092                 "popq    [rsp + #%d]",
 2093                 src_offset, dst_offset);
 2094       break;
 2095      case Op_VecX:
 2096       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2097                 "popq    [rsp + #%d]\n\t"
 2098                 "pushq   [rsp + #%d]\n\t"
 2099                 "popq    [rsp + #%d]",
 2100                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2101       break;
 2102     case Op_VecY:
 2103       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2104                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2105                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2106                 "vmovdqu xmm0, [rsp - #32]",
 2107                 src_offset, dst_offset);
 2108       break;
 2109     case Op_VecZ:
 2110       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2111                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2112                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2113                 "vmovdqu xmm0, [rsp - #64]",
 2114                 src_offset, dst_offset);
 2115       break;
 2116     default:
 2117       ShouldNotReachHere();
 2118     }
 2119 #endif
 2120   }
 2121 }
 2122 
 2123 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2124                                        PhaseRegAlloc* ra_,
 2125                                        bool do_size,
 2126                                        outputStream* st) const {
 2127   assert(masm != nullptr || st  != nullptr, "sanity");
 2128   // Get registers to move
 2129   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2130   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2131   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2132   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2133 
 2134   enum RC src_second_rc = rc_class(src_second);
 2135   enum RC src_first_rc = rc_class(src_first);
 2136   enum RC dst_second_rc = rc_class(dst_second);
 2137   enum RC dst_first_rc = rc_class(dst_first);
 2138 
 2139   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2140          "must move at least 1 register" );
 2141 
 2142   if (src_first == dst_first && src_second == dst_second) {
 2143     // Self copy, no move
 2144     return 0;
 2145   }
 2146   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 2147     uint ireg = ideal_reg();
 2148     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2149     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2150     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2151       // mem -> mem
 2152       int src_offset = ra_->reg2offset(src_first);
 2153       int dst_offset = ra_->reg2offset(dst_first);
 2154       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2155     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2156       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2157     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2158       int stack_offset = ra_->reg2offset(dst_first);
 2159       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2160     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2161       int stack_offset = ra_->reg2offset(src_first);
 2162       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2163     } else {
 2164       ShouldNotReachHere();
 2165     }
 2166     return 0;
 2167   }
 2168   if (src_first_rc == rc_stack) {
 2169     // mem ->
 2170     if (dst_first_rc == rc_stack) {
 2171       // mem -> mem
 2172       assert(src_second != dst_first, "overlap");
 2173       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2174           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2175         // 64-bit
 2176         int src_offset = ra_->reg2offset(src_first);
 2177         int dst_offset = ra_->reg2offset(dst_first);
 2178         if (masm) {
 2179           __ pushq(Address(rsp, src_offset));
 2180           __ popq (Address(rsp, dst_offset));
 2181 #ifndef PRODUCT
 2182         } else {
 2183           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2184                     "popq    [rsp + #%d]",
 2185                      src_offset, dst_offset);
 2186 #endif
 2187         }
 2188       } else {
 2189         // 32-bit
 2190         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2191         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2192         // No pushl/popl, so:
 2193         int src_offset = ra_->reg2offset(src_first);
 2194         int dst_offset = ra_->reg2offset(dst_first);
 2195         if (masm) {
 2196           __ movq(Address(rsp, -8), rax);
 2197           __ movl(rax, Address(rsp, src_offset));
 2198           __ movl(Address(rsp, dst_offset), rax);
 2199           __ movq(rax, Address(rsp, -8));
 2200 #ifndef PRODUCT
 2201         } else {
 2202           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2203                     "movl    rax, [rsp + #%d]\n\t"
 2204                     "movl    [rsp + #%d], rax\n\t"
 2205                     "movq    rax, [rsp - #8]",
 2206                      src_offset, dst_offset);
 2207 #endif
 2208         }
 2209       }
 2210       return 0;
 2211     } else if (dst_first_rc == rc_int) {
 2212       // mem -> gpr
 2213       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2214           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2215         // 64-bit
 2216         int offset = ra_->reg2offset(src_first);
 2217         if (masm) {
 2218           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2219 #ifndef PRODUCT
 2220         } else {
 2221           st->print("movq    %s, [rsp + #%d]\t# spill",
 2222                      Matcher::regName[dst_first],
 2223                      offset);
 2224 #endif
 2225         }
 2226       } else {
 2227         // 32-bit
 2228         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2229         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2230         int offset = ra_->reg2offset(src_first);
 2231         if (masm) {
 2232           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2233 #ifndef PRODUCT
 2234         } else {
 2235           st->print("movl    %s, [rsp + #%d]\t# spill",
 2236                      Matcher::regName[dst_first],
 2237                      offset);
 2238 #endif
 2239         }
 2240       }
 2241       return 0;
 2242     } else if (dst_first_rc == rc_float) {
 2243       // mem-> xmm
 2244       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2245           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2246         // 64-bit
 2247         int offset = ra_->reg2offset(src_first);
 2248         if (masm) {
 2249           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2250 #ifndef PRODUCT
 2251         } else {
 2252           st->print("%s  %s, [rsp + #%d]\t# spill",
 2253                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2254                      Matcher::regName[dst_first],
 2255                      offset);
 2256 #endif
 2257         }
 2258       } else {
 2259         // 32-bit
 2260         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2261         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2262         int offset = ra_->reg2offset(src_first);
 2263         if (masm) {
 2264           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2265 #ifndef PRODUCT
 2266         } else {
 2267           st->print("movss   %s, [rsp + #%d]\t# spill",
 2268                      Matcher::regName[dst_first],
 2269                      offset);
 2270 #endif
 2271         }
 2272       }
 2273       return 0;
 2274     } else if (dst_first_rc == rc_kreg) {
 2275       // mem -> kreg
 2276       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2277           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2278         // 64-bit
 2279         int offset = ra_->reg2offset(src_first);
 2280         if (masm) {
 2281           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2282 #ifndef PRODUCT
 2283         } else {
 2284           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2285                      Matcher::regName[dst_first],
 2286                      offset);
 2287 #endif
 2288         }
 2289       }
 2290       return 0;
 2291     }
 2292   } else if (src_first_rc == rc_int) {
 2293     // gpr ->
 2294     if (dst_first_rc == rc_stack) {
 2295       // gpr -> mem
 2296       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2297           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2298         // 64-bit
 2299         int offset = ra_->reg2offset(dst_first);
 2300         if (masm) {
 2301           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2302 #ifndef PRODUCT
 2303         } else {
 2304           st->print("movq    [rsp + #%d], %s\t# spill",
 2305                      offset,
 2306                      Matcher::regName[src_first]);
 2307 #endif
 2308         }
 2309       } else {
 2310         // 32-bit
 2311         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2312         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2313         int offset = ra_->reg2offset(dst_first);
 2314         if (masm) {
 2315           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2316 #ifndef PRODUCT
 2317         } else {
 2318           st->print("movl    [rsp + #%d], %s\t# spill",
 2319                      offset,
 2320                      Matcher::regName[src_first]);
 2321 #endif
 2322         }
 2323       }
 2324       return 0;
 2325     } else if (dst_first_rc == rc_int) {
 2326       // gpr -> gpr
 2327       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2328           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2329         // 64-bit
 2330         if (masm) {
 2331           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2332                   as_Register(Matcher::_regEncode[src_first]));
 2333 #ifndef PRODUCT
 2334         } else {
 2335           st->print("movq    %s, %s\t# spill",
 2336                      Matcher::regName[dst_first],
 2337                      Matcher::regName[src_first]);
 2338 #endif
 2339         }
 2340         return 0;
 2341       } else {
 2342         // 32-bit
 2343         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2344         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2345         if (masm) {
 2346           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2347                   as_Register(Matcher::_regEncode[src_first]));
 2348 #ifndef PRODUCT
 2349         } else {
 2350           st->print("movl    %s, %s\t# spill",
 2351                      Matcher::regName[dst_first],
 2352                      Matcher::regName[src_first]);
 2353 #endif
 2354         }
 2355         return 0;
 2356       }
 2357     } else if (dst_first_rc == rc_float) {
 2358       // gpr -> xmm
 2359       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2360           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2361         // 64-bit
 2362         if (masm) {
 2363           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2364 #ifndef PRODUCT
 2365         } else {
 2366           st->print("movdq   %s, %s\t# spill",
 2367                      Matcher::regName[dst_first],
 2368                      Matcher::regName[src_first]);
 2369 #endif
 2370         }
 2371       } else {
 2372         // 32-bit
 2373         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2374         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2375         if (masm) {
 2376           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2377 #ifndef PRODUCT
 2378         } else {
 2379           st->print("movdl   %s, %s\t# spill",
 2380                      Matcher::regName[dst_first],
 2381                      Matcher::regName[src_first]);
 2382 #endif
 2383         }
 2384       }
 2385       return 0;
 2386     } else if (dst_first_rc == rc_kreg) {
 2387       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2388           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2389         // 64-bit
 2390         if (masm) {
 2391           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2392   #ifndef PRODUCT
 2393         } else {
 2394            st->print("kmovq   %s, %s\t# spill",
 2395                        Matcher::regName[dst_first],
 2396                        Matcher::regName[src_first]);
 2397   #endif
 2398         }
 2399       }
 2400       Unimplemented();
 2401       return 0;
 2402     }
 2403   } else if (src_first_rc == rc_float) {
 2404     // xmm ->
 2405     if (dst_first_rc == rc_stack) {
 2406       // xmm -> mem
 2407       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2408           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2409         // 64-bit
 2410         int offset = ra_->reg2offset(dst_first);
 2411         if (masm) {
 2412           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2413 #ifndef PRODUCT
 2414         } else {
 2415           st->print("movsd   [rsp + #%d], %s\t# spill",
 2416                      offset,
 2417                      Matcher::regName[src_first]);
 2418 #endif
 2419         }
 2420       } else {
 2421         // 32-bit
 2422         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2423         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2424         int offset = ra_->reg2offset(dst_first);
 2425         if (masm) {
 2426           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2427 #ifndef PRODUCT
 2428         } else {
 2429           st->print("movss   [rsp + #%d], %s\t# spill",
 2430                      offset,
 2431                      Matcher::regName[src_first]);
 2432 #endif
 2433         }
 2434       }
 2435       return 0;
 2436     } else if (dst_first_rc == rc_int) {
 2437       // xmm -> gpr
 2438       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2439           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2440         // 64-bit
 2441         if (masm) {
 2442           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2443 #ifndef PRODUCT
 2444         } else {
 2445           st->print("movdq   %s, %s\t# spill",
 2446                      Matcher::regName[dst_first],
 2447                      Matcher::regName[src_first]);
 2448 #endif
 2449         }
 2450       } else {
 2451         // 32-bit
 2452         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2453         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2454         if (masm) {
 2455           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2456 #ifndef PRODUCT
 2457         } else {
 2458           st->print("movdl   %s, %s\t# spill",
 2459                      Matcher::regName[dst_first],
 2460                      Matcher::regName[src_first]);
 2461 #endif
 2462         }
 2463       }
 2464       return 0;
 2465     } else if (dst_first_rc == rc_float) {
 2466       // xmm -> xmm
 2467       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2468           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2469         // 64-bit
 2470         if (masm) {
 2471           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2472 #ifndef PRODUCT
 2473         } else {
 2474           st->print("%s  %s, %s\t# spill",
 2475                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2476                      Matcher::regName[dst_first],
 2477                      Matcher::regName[src_first]);
 2478 #endif
 2479         }
 2480       } else {
 2481         // 32-bit
 2482         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2483         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2484         if (masm) {
 2485           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2486 #ifndef PRODUCT
 2487         } else {
 2488           st->print("%s  %s, %s\t# spill",
 2489                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2490                      Matcher::regName[dst_first],
 2491                      Matcher::regName[src_first]);
 2492 #endif
 2493         }
 2494       }
 2495       return 0;
 2496     } else if (dst_first_rc == rc_kreg) {
 2497       assert(false, "Illegal spilling");
 2498       return 0;
 2499     }
 2500   } else if (src_first_rc == rc_kreg) {
 2501     if (dst_first_rc == rc_stack) {
 2502       // mem -> kreg
 2503       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2504           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2505         // 64-bit
 2506         int offset = ra_->reg2offset(dst_first);
 2507         if (masm) {
 2508           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2509 #ifndef PRODUCT
 2510         } else {
 2511           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2512                      offset,
 2513                      Matcher::regName[src_first]);
 2514 #endif
 2515         }
 2516       }
 2517       return 0;
 2518     } else if (dst_first_rc == rc_int) {
 2519       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2520           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2521         // 64-bit
 2522         if (masm) {
 2523           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2524 #ifndef PRODUCT
 2525         } else {
 2526          st->print("kmovq   %s, %s\t# spill",
 2527                      Matcher::regName[dst_first],
 2528                      Matcher::regName[src_first]);
 2529 #endif
 2530         }
 2531       }
 2532       Unimplemented();
 2533       return 0;
 2534     } else if (dst_first_rc == rc_kreg) {
 2535       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2536           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2537         // 64-bit
 2538         if (masm) {
 2539           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2540 #ifndef PRODUCT
 2541         } else {
 2542          st->print("kmovq   %s, %s\t# spill",
 2543                      Matcher::regName[dst_first],
 2544                      Matcher::regName[src_first]);
 2545 #endif
 2546         }
 2547       }
 2548       return 0;
 2549     } else if (dst_first_rc == rc_float) {
 2550       assert(false, "Illegal spill");
 2551       return 0;
 2552     }
 2553   }
 2554 
 2555   assert(0," foo ");
 2556   Unimplemented();
 2557   return 0;
 2558 }
 2559 
 2560 #ifndef PRODUCT
 2561 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2562   implementation(nullptr, ra_, false, st);
 2563 }
 2564 #endif
 2565 
 2566 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2567   implementation(masm, ra_, false, nullptr);
 2568 }
 2569 
 2570 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2571   return MachNode::size(ra_);
 2572 }
 2573 
 2574 //=============================================================================
 2575 #ifndef PRODUCT
 2576 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2577 {
 2578   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2579   int reg = ra_->get_reg_first(this);
 2580   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2581             Matcher::regName[reg], offset);
 2582 }
 2583 #endif
 2584 
 2585 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2586 {
 2587   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2588   int reg = ra_->get_encode(this);
 2589 
 2590   __ lea(as_Register(reg), Address(rsp, offset));
 2591 }
 2592 
 2593 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2594 {
 2595   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2596   if (ra_->get_encode(this) > 15) {
 2597     return (offset < 0x80) ? 6 : 9; // REX2
 2598   } else {
 2599     return (offset < 0x80) ? 5 : 8; // REX
 2600   }
 2601 }
 2602 
 2603 //=============================================================================
 2604 #ifndef PRODUCT
 2605 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2606 {
 2607   if (UseCompressedClassPointers) {
 2608     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2609     st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2610   } else {
 2611     st->print_cr("movq    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2612     st->print_cr("\tcmpq    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2613   }
 2614   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2615 }
 2616 #endif
 2617 
 2618 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2619 {
 2620   __ ic_check(InteriorEntryAlignment);
 2621 }
 2622 
 2623 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2624 {
 2625   return MachNode::size(ra_); // too many variables; just compute it
 2626                               // the hard way
 2627 }
 2628 
 2629 
 2630 //=============================================================================
 2631 
 2632 bool Matcher::supports_vector_calling_convention(void) {
 2633   return EnableVectorSupport;
 2634 }
 2635 
 2636 static bool is_ndd_demotable(const MachNode* mdef) {
 2637   return ((mdef->flags() & Node::PD::Flag_ndd_demotable) != 0);
 2638 }
 2639 
 2640 static bool is_ndd_demotable_commutative(const MachNode* mdef) {
 2641   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_commutative) != 0);
 2642 }
 2643 
 2644 static bool is_demotion_candidate(const MachNode* mdef) {
 2645   return (is_ndd_demotable(mdef) || is_ndd_demotable_commutative(mdef));
 2646 }
 2647 
 2648 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
 2649                                             int oper_index) {
 2650   if (mdef == nullptr) {
 2651     return false;
 2652   }
 2653 
 2654   if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
 2655       mdef->in(mdef->operand_index(oper_index)) == nullptr) {
 2656     assert(oper_index != 1 || !is_demotion_candidate(mdef), "%s", mdef->Name());
 2657     assert(oper_index != 2 || !is_ndd_demotable_commutative(mdef), "%s", mdef->Name());
 2658     return false;
 2659   }
 2660 
 2661   // Complex memory operand covers multiple incoming edges needed for
 2662   // address computation. Biasing def towards any address component will not
 2663   // result in NDD demotion by assembler.
 2664   if (mdef->operand_num_edges(oper_index) != 1) {
 2665     assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
 2666     return false;
 2667   }
 2668 
 2669   // Demotion candidate must be register mask compatible with definition.
 2670   const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
 2671   if (!oper_mask.overlap(mdef->out_RegMask())) {
 2672     assert(!is_demotion_candidate(mdef), "%s", mdef->Name());
 2673     return false;
 2674   }
 2675 
 2676   switch (oper_index) {
 2677   // First operand of MachNode corresponding to Intel APX NDD selection
 2678   // pattern can share its assigned register with definition operand if
 2679   // their live ranges do not overlap. In such a scenario we can demote
 2680   // it to legacy map0/map1 instruction by replacing its 4-byte extended
 2681   // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
 2682   // are decorated with a special flag by instruction selector.
 2683   case 1:
 2684     return is_demotion_candidate(mdef);
 2685 
 2686   // Definition operand of commutative operation can be biased towards second
 2687   // operand.
 2688   case 2:
 2689     return is_ndd_demotable_commutative(mdef);
 2690 
 2691   // Current scheme only selects up to two biasing candidates
 2692   default:
 2693     assert(false, "unhandled operand index: %s", mdef->Name());
 2694     break;
 2695   }
 2696 
 2697   return false;
 2698 }
 2699 
 2700 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2701   assert(EnableVectorSupport, "sanity");
 2702   int lo = XMM0_num;
 2703   int hi = XMM0b_num;
 2704   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2705   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2706   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2707   return OptoRegPair(hi, lo);
 2708 }
 2709 
 2710 // Is this branch offset short enough that a short branch can be used?
 2711 //
 2712 // NOTE: If the platform does not provide any short branch variants, then
 2713 //       this method should return false for offset 0.
 2714 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2715   // The passed offset is relative to address of the branch.
 2716   // On 86 a branch displacement is calculated relative to address
 2717   // of a next instruction.
 2718   offset -= br_size;
 2719 
 2720   // the short version of jmpConUCF2 contains multiple branches,
 2721   // making the reach slightly less
 2722   if (rule == jmpConUCF2_rule)
 2723     return (-126 <= offset && offset <= 125);
 2724   return (-128 <= offset && offset <= 127);
 2725 }
 2726 
 2727 // Return whether or not this register is ever used as an argument.
 2728 // This function is used on startup to build the trampoline stubs in
 2729 // generateOptoStub.  Registers not mentioned will be killed by the VM
 2730 // call in the trampoline, and arguments in those registers not be
 2731 // available to the callee.
 2732 bool Matcher::can_be_java_arg(int reg)
 2733 {
 2734   return
 2735     reg ==  RDI_num || reg == RDI_H_num ||
 2736     reg ==  RSI_num || reg == RSI_H_num ||
 2737     reg ==  RDX_num || reg == RDX_H_num ||
 2738     reg ==  RCX_num || reg == RCX_H_num ||
 2739     reg ==   R8_num || reg ==  R8_H_num ||
 2740     reg ==   R9_num || reg ==  R9_H_num ||
 2741     reg ==  R12_num || reg == R12_H_num ||
 2742     reg == XMM0_num || reg == XMM0b_num ||
 2743     reg == XMM1_num || reg == XMM1b_num ||
 2744     reg == XMM2_num || reg == XMM2b_num ||
 2745     reg == XMM3_num || reg == XMM3b_num ||
 2746     reg == XMM4_num || reg == XMM4b_num ||
 2747     reg == XMM5_num || reg == XMM5b_num ||
 2748     reg == XMM6_num || reg == XMM6b_num ||
 2749     reg == XMM7_num || reg == XMM7b_num;
 2750 }
 2751 
 2752 bool Matcher::is_spillable_arg(int reg)
 2753 {
 2754   return can_be_java_arg(reg);
 2755 }
 2756 
 2757 uint Matcher::int_pressure_limit()
 2758 {
 2759   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2760 }
 2761 
 2762 uint Matcher::float_pressure_limit()
 2763 {
 2764   // After experiment around with different values, the following default threshold
 2765   // works best for LCM's register pressure scheduling on x64.
 2766   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2767   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2768   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2769 }
 2770 
 2771 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 2772   // In 64 bit mode a code which use multiply when
 2773   // devisor is constant is faster than hardware
 2774   // DIV instruction (it uses MulHiL).
 2775   return false;
 2776 }
 2777 
 2778 // Register for DIVI projection of divmodI
 2779 const RegMask& Matcher::divI_proj_mask() {
 2780   return INT_RAX_REG_mask();
 2781 }
 2782 
 2783 // Register for MODI projection of divmodI
 2784 const RegMask& Matcher::modI_proj_mask() {
 2785   return INT_RDX_REG_mask();
 2786 }
 2787 
 2788 // Register for DIVL projection of divmodL
 2789 const RegMask& Matcher::divL_proj_mask() {
 2790   return LONG_RAX_REG_mask();
 2791 }
 2792 
 2793 // Register for MODL projection of divmodL
 2794 const RegMask& Matcher::modL_proj_mask() {
 2795   return LONG_RDX_REG_mask();
 2796 }
 2797 
 2798 %}
 2799 
 2800 source_hpp %{
 2801 // Header information of the source block.
 2802 // Method declarations/definitions which are used outside
 2803 // the ad-scope can conveniently be defined here.
 2804 //
 2805 // To keep related declarations/definitions/uses close together,
 2806 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2807 
 2808 #include "runtime/vm_version.hpp"
 2809 
 2810 class NativeJump;
 2811 
 2812 class CallStubImpl {
 2813 
 2814   //--------------------------------------------------------------
 2815   //---<  Used for optimization in Compile::shorten_branches  >---
 2816   //--------------------------------------------------------------
 2817 
 2818  public:
 2819   // Size of call trampoline stub.
 2820   static uint size_call_trampoline() {
 2821     return 0; // no call trampolines on this platform
 2822   }
 2823 
 2824   // number of relocations needed by a call trampoline stub
 2825   static uint reloc_call_trampoline() {
 2826     return 0; // no call trampolines on this platform
 2827   }
 2828 };
 2829 
 2830 class HandlerImpl {
 2831 
 2832  public:
 2833 
 2834   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2835 
 2836   static uint size_deopt_handler() {
 2837     // one call and one jmp.
 2838     return 7;
 2839   }
 2840 };
 2841 
 2842 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2843   switch(bytes) {
 2844     case  4: // fall-through
 2845     case  8: // fall-through
 2846     case 16: return Assembler::AVX_128bit;
 2847     case 32: return Assembler::AVX_256bit;
 2848     case 64: return Assembler::AVX_512bit;
 2849 
 2850     default: {
 2851       ShouldNotReachHere();
 2852       return Assembler::AVX_NoVec;
 2853     }
 2854   }
 2855 }
 2856 
 2857 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2858   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2859 }
 2860 
 2861 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2862   uint def_idx = use->operand_index(opnd);
 2863   Node* def = use->in(def_idx);
 2864   return vector_length_encoding(def);
 2865 }
 2866 
 2867 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2868   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2869          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2870 }
 2871 
 2872 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2873   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2874            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2875 }
 2876 
 2877 class Node::PD {
 2878 public:
 2879   enum NodeFlags : uint64_t {
 2880     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2881     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2882     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2883     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2884     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2885     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2886     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2887     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2888     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2889     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2890     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2891     Flag_ndd_demotable        = Node::_last_flag << 12,
 2892     Flag_ndd_demotable_commutative = Node::_last_flag << 13,
 2893     _last_flag                = Flag_ndd_demotable_commutative
 2894   };
 2895 };
 2896 
 2897 %} // end source_hpp
 2898 
 2899 source %{
 2900 
 2901 #include "opto/addnode.hpp"
 2902 #include "c2_intelJccErratum_x86.hpp"
 2903 
 2904 void PhaseOutput::pd_perform_mach_node_analysis() {
 2905   if (VM_Version::has_intel_jcc_erratum()) {
 2906     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2907     _buf_sizes._code += extra_padding;
 2908   }
 2909 }
 2910 
 2911 int MachNode::pd_alignment_required() const {
 2912   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2913     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2914     return IntelJccErratum::largest_jcc_size() + 1;
 2915   } else {
 2916     return 1;
 2917   }
 2918 }
 2919 
 2920 int MachNode::compute_padding(int current_offset) const {
 2921   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2922     Compile* C = Compile::current();
 2923     PhaseOutput* output = C->output();
 2924     Block* block = output->block();
 2925     int index = output->index();
 2926     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2927   } else {
 2928     return 0;
 2929   }
 2930 }
 2931 
 2932 // Emit deopt handler code.
 2933 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2934 
 2935   // Note that the code buffer's insts_mark is always relative to insts.
 2936   // That's why we must use the macroassembler to generate a handler.
 2937   address base = __ start_a_stub(size_deopt_handler());
 2938   if (base == nullptr) {
 2939     ciEnv::current()->record_failure("CodeCache is full");
 2940     return 0;  // CodeBuffer::expand failed
 2941   }
 2942   int offset = __ offset();
 2943 
 2944   Label start;
 2945   __ bind(start);
 2946 
 2947   __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2948 
 2949   int entry_offset = __ offset();
 2950 
 2951   __ jmp(start);
 2952 
 2953   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2954   assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
 2955          "out of bounds read in post-call NOP check");
 2956   __ end_a_stub();
 2957   return entry_offset;
 2958 }
 2959 
 2960 static Assembler::Width widthForType(BasicType bt) {
 2961   if (bt == T_BYTE) {
 2962     return Assembler::B;
 2963   } else if (bt == T_SHORT) {
 2964     return Assembler::W;
 2965   } else if (bt == T_INT) {
 2966     return Assembler::D;
 2967   } else {
 2968     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2969     return Assembler::Q;
 2970   }
 2971 }
 2972 
 2973 //=============================================================================
 2974 
 2975   // Float masks come from different places depending on platform.
 2976   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2977   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2978   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2979   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2980   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2981   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2982   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2983   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2984   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2985   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2986   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2987   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 2988   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 2989   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 2990   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 2991   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 2992   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 2993   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 2994   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 2995 
 2996 //=============================================================================
 2997 bool Matcher::match_rule_supported(int opcode) {
 2998   if (!has_match_rule(opcode)) {
 2999     return false; // no match rule present
 3000   }
 3001   switch (opcode) {
 3002     case Op_AbsVL:
 3003     case Op_StoreVectorScatter:
 3004       if (UseAVX < 3) {
 3005         return false;
 3006       }
 3007       break;
 3008     case Op_PopCountI:
 3009     case Op_PopCountL:
 3010       if (!UsePopCountInstruction) {
 3011         return false;
 3012       }
 3013       break;
 3014     case Op_PopCountVI:
 3015       if (UseAVX < 2) {
 3016         return false;
 3017       }
 3018       break;
 3019     case Op_CompressV:
 3020     case Op_ExpandV:
 3021     case Op_PopCountVL:
 3022       if (UseAVX < 2) {
 3023         return false;
 3024       }
 3025       break;
 3026     case Op_MulVI:
 3027       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 3028         return false;
 3029       }
 3030       break;
 3031     case Op_MulVL:
 3032       if (UseSSE < 4) { // only with SSE4_1 or AVX
 3033         return false;
 3034       }
 3035       break;
 3036     case Op_MulReductionVL:
 3037       if (VM_Version::supports_avx512dq() == false) {
 3038         return false;
 3039       }
 3040       break;
 3041     case Op_AbsVB:
 3042     case Op_AbsVS:
 3043     case Op_AbsVI:
 3044     case Op_AddReductionVI:
 3045     case Op_AndReductionV:
 3046     case Op_OrReductionV:
 3047     case Op_XorReductionV:
 3048       if (UseSSE < 3) { // requires at least SSSE3
 3049         return false;
 3050       }
 3051       break;
 3052     case Op_MaxHF:
 3053     case Op_MinHF:
 3054       if (!VM_Version::supports_avx512vlbw()) {
 3055         return false;
 3056       }  // fallthrough
 3057     case Op_AddHF:
 3058     case Op_DivHF:
 3059     case Op_FmaHF:
 3060     case Op_MulHF:
 3061     case Op_ReinterpretS2HF:
 3062     case Op_ReinterpretHF2S:
 3063     case Op_SubHF:
 3064     case Op_SqrtHF:
 3065       if (!VM_Version::supports_avx512_fp16()) {
 3066         return false;
 3067       }
 3068       break;
 3069     case Op_VectorLoadShuffle:
 3070     case Op_VectorRearrange:
 3071     case Op_MulReductionVI:
 3072       if (UseSSE < 4) { // requires at least SSE4
 3073         return false;
 3074       }
 3075       break;
 3076     case Op_IsInfiniteF:
 3077     case Op_IsInfiniteD:
 3078       if (!VM_Version::supports_avx512dq()) {
 3079         return false;
 3080       }
 3081       break;
 3082     case Op_SqrtVD:
 3083     case Op_SqrtVF:
 3084     case Op_VectorMaskCmp:
 3085     case Op_VectorCastB2X:
 3086     case Op_VectorCastS2X:
 3087     case Op_VectorCastI2X:
 3088     case Op_VectorCastL2X:
 3089     case Op_VectorCastF2X:
 3090     case Op_VectorCastD2X:
 3091     case Op_VectorUCastB2X:
 3092     case Op_VectorUCastS2X:
 3093     case Op_VectorUCastI2X:
 3094     case Op_VectorMaskCast:
 3095       if (UseAVX < 1) { // enabled for AVX only
 3096         return false;
 3097       }
 3098       break;
 3099     case Op_PopulateIndex:
 3100       if (UseAVX < 2) {
 3101         return false;
 3102       }
 3103       break;
 3104     case Op_RoundVF:
 3105       if (UseAVX < 2) { // enabled for AVX2 only
 3106         return false;
 3107       }
 3108       break;
 3109     case Op_RoundVD:
 3110       if (UseAVX < 3) {
 3111         return false;  // enabled for AVX3 only
 3112       }
 3113       break;
 3114     case Op_CompareAndSwapL:
 3115     case Op_CompareAndSwapP:
 3116       break;
 3117     case Op_StrIndexOf:
 3118       if (!UseSSE42Intrinsics) {
 3119         return false;
 3120       }
 3121       break;
 3122     case Op_StrIndexOfChar:
 3123       if (!UseSSE42Intrinsics) {
 3124         return false;
 3125       }
 3126       break;
 3127     case Op_OnSpinWait:
 3128       if (VM_Version::supports_on_spin_wait() == false) {
 3129         return false;
 3130       }
 3131       break;
 3132     case Op_MulVB:
 3133     case Op_LShiftVB:
 3134     case Op_RShiftVB:
 3135     case Op_URShiftVB:
 3136     case Op_VectorInsert:
 3137     case Op_VectorLoadMask:
 3138     case Op_VectorStoreMask:
 3139     case Op_VectorBlend:
 3140       if (UseSSE < 4) {
 3141         return false;
 3142       }
 3143       break;
 3144     case Op_MaxD:
 3145     case Op_MaxF:
 3146     case Op_MinD:
 3147     case Op_MinF:
 3148       if (UseAVX < 1) { // enabled for AVX only
 3149         return false;
 3150       }
 3151       break;
 3152     case Op_CacheWB:
 3153     case Op_CacheWBPreSync:
 3154     case Op_CacheWBPostSync:
 3155       if (!VM_Version::supports_data_cache_line_flush()) {
 3156         return false;
 3157       }
 3158       break;
 3159     case Op_ExtractB:
 3160     case Op_ExtractL:
 3161     case Op_ExtractI:
 3162     case Op_RoundDoubleMode:
 3163       if (UseSSE < 4) {
 3164         return false;
 3165       }
 3166       break;
 3167     case Op_RoundDoubleModeV:
 3168       if (VM_Version::supports_avx() == false) {
 3169         return false; // 128bit vroundpd is not available
 3170       }
 3171       break;
 3172     case Op_LoadVectorGather:
 3173     case Op_LoadVectorGatherMasked:
 3174       if (UseAVX < 2) {
 3175         return false;
 3176       }
 3177       break;
 3178     case Op_FmaF:
 3179     case Op_FmaD:
 3180     case Op_FmaVD:
 3181     case Op_FmaVF:
 3182       if (!UseFMA) {
 3183         return false;
 3184       }
 3185       break;
 3186     case Op_MacroLogicV:
 3187       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3188         return false;
 3189       }
 3190       break;
 3191 
 3192     case Op_VectorCmpMasked:
 3193     case Op_VectorMaskGen:
 3194       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3195         return false;
 3196       }
 3197       break;
 3198     case Op_VectorMaskFirstTrue:
 3199     case Op_VectorMaskLastTrue:
 3200     case Op_VectorMaskTrueCount:
 3201     case Op_VectorMaskToLong:
 3202       if (UseAVX < 1) {
 3203          return false;
 3204       }
 3205       break;
 3206     case Op_RoundF:
 3207     case Op_RoundD:
 3208       break;
 3209     case Op_CopySignD:
 3210     case Op_CopySignF:
 3211       if (UseAVX < 3)  {
 3212         return false;
 3213       }
 3214       if (!VM_Version::supports_avx512vl()) {
 3215         return false;
 3216       }
 3217       break;
 3218     case Op_CompressBits:
 3219     case Op_ExpandBits:
 3220       if (!VM_Version::supports_bmi2()) {
 3221         return false;
 3222       }
 3223       break;
 3224     case Op_CompressM:
 3225       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3226         return false;
 3227       }
 3228       break;
 3229     case Op_ConvF2HF:
 3230     case Op_ConvHF2F:
 3231       if (!VM_Version::supports_float16()) {
 3232         return false;
 3233       }
 3234       break;
 3235     case Op_VectorCastF2HF:
 3236     case Op_VectorCastHF2F:
 3237       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3238         return false;
 3239       }
 3240       break;
 3241   }
 3242   return true;  // Match rules are supported by default.
 3243 }
 3244 
 3245 //------------------------------------------------------------------------
 3246 
 3247 static inline bool is_pop_count_instr_target(BasicType bt) {
 3248   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3249          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3250 }
 3251 
 3252 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3253   return match_rule_supported_vector(opcode, vlen, bt);
 3254 }
 3255 
 3256 // Identify extra cases that we might want to provide match rules for vector nodes and
 3257 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3258 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3259   if (!match_rule_supported(opcode)) {
 3260     return false;
 3261   }
 3262   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3263   //   * SSE2 supports 128bit vectors for all types;
 3264   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3265   //   * AVX2 supports 256bit vectors for all types;
 3266   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3267   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3268   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3269   // And MaxVectorSize is taken into account as well.
 3270   if (!vector_size_supported(bt, vlen)) {
 3271     return false;
 3272   }
 3273   // Special cases which require vector length follow:
 3274   //   * implementation limitations
 3275   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3276   //   * 128bit vroundpd instruction is present only in AVX1
 3277   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3278   switch (opcode) {
 3279     case Op_MaxVHF:
 3280     case Op_MinVHF:
 3281       if (!VM_Version::supports_avx512bw()) {
 3282         return false;
 3283       }
 3284     case Op_AddVHF:
 3285     case Op_DivVHF:
 3286     case Op_FmaVHF:
 3287     case Op_MulVHF:
 3288     case Op_SubVHF:
 3289     case Op_SqrtVHF:
 3290       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3291         return false;
 3292       }
 3293       if (!VM_Version::supports_avx512_fp16()) {
 3294         return false;
 3295       }
 3296       break;
 3297     case Op_AbsVF:
 3298     case Op_NegVF:
 3299       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3300         return false; // 512bit vandps and vxorps are not available
 3301       }
 3302       break;
 3303     case Op_AbsVD:
 3304     case Op_NegVD:
 3305       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3306         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3307       }
 3308       break;
 3309     case Op_RotateRightV:
 3310     case Op_RotateLeftV:
 3311       if (bt != T_INT && bt != T_LONG) {
 3312         return false;
 3313       } // fallthrough
 3314     case Op_MacroLogicV:
 3315       if (!VM_Version::supports_evex() ||
 3316           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3317         return false;
 3318       }
 3319       break;
 3320     case Op_ClearArray:
 3321     case Op_VectorMaskGen:
 3322     case Op_VectorCmpMasked:
 3323       if (!VM_Version::supports_avx512bw()) {
 3324         return false;
 3325       }
 3326       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3327         return false;
 3328       }
 3329       break;
 3330     case Op_LoadVectorMasked:
 3331     case Op_StoreVectorMasked:
 3332       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3333         return false;
 3334       }
 3335       break;
 3336     case Op_UMinV:
 3337     case Op_UMaxV:
 3338       if (UseAVX == 0) {
 3339         return false;
 3340       }
 3341       break;
 3342     case Op_MaxV:
 3343     case Op_MinV:
 3344       if (UseSSE < 4 && is_integral_type(bt)) {
 3345         return false;
 3346       }
 3347       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3348           // Float/Double intrinsics are enabled for AVX family currently.
 3349           if (UseAVX == 0) {
 3350             return false;
 3351           }
 3352           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3353             return false;
 3354           }
 3355       }
 3356       break;
 3357     case Op_CallLeafVector:
 3358       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3359         return false;
 3360       }
 3361       break;
 3362     case Op_AddReductionVI:
 3363       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3364         return false;
 3365       }
 3366       // fallthrough
 3367     case Op_AndReductionV:
 3368     case Op_OrReductionV:
 3369     case Op_XorReductionV:
 3370       if (is_subword_type(bt) && (UseSSE < 4)) {
 3371         return false;
 3372       }
 3373       break;
 3374     case Op_MinReductionV:
 3375     case Op_MaxReductionV:
 3376       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3377         return false;
 3378       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3379         return false;
 3380       }
 3381       // Float/Double intrinsics enabled for AVX family.
 3382       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3383         return false;
 3384       }
 3385       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3386         return false;
 3387       }
 3388       break;
 3389     case Op_VectorBlend:
 3390       if (UseAVX == 0 && size_in_bits < 128) {
 3391         return false;
 3392       }
 3393       break;
 3394     case Op_VectorTest:
 3395       if (UseSSE < 4) {
 3396         return false; // Implementation limitation
 3397       } else if (size_in_bits < 32) {
 3398         return false; // Implementation limitation
 3399       }
 3400       break;
 3401     case Op_VectorLoadShuffle:
 3402     case Op_VectorRearrange:
 3403       if(vlen == 2) {
 3404         return false; // Implementation limitation due to how shuffle is loaded
 3405       } else if (size_in_bits == 256 && UseAVX < 2) {
 3406         return false; // Implementation limitation
 3407       }
 3408       break;
 3409     case Op_VectorLoadMask:
 3410     case Op_VectorMaskCast:
 3411       if (size_in_bits == 256 && UseAVX < 2) {
 3412         return false; // Implementation limitation
 3413       }
 3414       // fallthrough
 3415     case Op_VectorStoreMask:
 3416       if (vlen == 2) {
 3417         return false; // Implementation limitation
 3418       }
 3419       break;
 3420     case Op_PopulateIndex:
 3421       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3422         return false;
 3423       }
 3424       break;
 3425     case Op_VectorCastB2X:
 3426     case Op_VectorCastS2X:
 3427     case Op_VectorCastI2X:
 3428       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3429         return false;
 3430       }
 3431       break;
 3432     case Op_VectorCastL2X:
 3433       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3434         return false;
 3435       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3436         return false;
 3437       }
 3438       break;
 3439     case Op_VectorCastF2X: {
 3440         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3441         // happen after intermediate conversion to integer and special handling
 3442         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3443         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3444         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3445           return false;
 3446         }
 3447       }
 3448       // fallthrough
 3449     case Op_VectorCastD2X:
 3450       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3451         return false;
 3452       }
 3453       break;
 3454     case Op_VectorCastF2HF:
 3455     case Op_VectorCastHF2F:
 3456       if (!VM_Version::supports_f16c() &&
 3457          ((!VM_Version::supports_evex() ||
 3458          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3459         return false;
 3460       }
 3461       break;
 3462     case Op_RoundVD:
 3463       if (!VM_Version::supports_avx512dq()) {
 3464         return false;
 3465       }
 3466       break;
 3467     case Op_MulReductionVI:
 3468       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3469         return false;
 3470       }
 3471       break;
 3472     case Op_LoadVectorGatherMasked:
 3473       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3474         return false;
 3475       }
 3476       if (is_subword_type(bt) &&
 3477          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3478           (size_in_bits < 64)                                      ||
 3479           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3480         return false;
 3481       }
 3482       break;
 3483     case Op_StoreVectorScatterMasked:
 3484     case Op_StoreVectorScatter:
 3485       if (is_subword_type(bt)) {
 3486         return false;
 3487       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3488         return false;
 3489       }
 3490       // fallthrough
 3491     case Op_LoadVectorGather:
 3492       if (!is_subword_type(bt) && size_in_bits == 64) {
 3493         return false;
 3494       }
 3495       if (is_subword_type(bt) && size_in_bits < 64) {
 3496         return false;
 3497       }
 3498       break;
 3499     case Op_SaturatingAddV:
 3500     case Op_SaturatingSubV:
 3501       if (UseAVX < 1) {
 3502         return false; // Implementation limitation
 3503       }
 3504       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3505         return false;
 3506       }
 3507       break;
 3508     case Op_SelectFromTwoVector:
 3509        if (size_in_bits < 128) {
 3510          return false;
 3511        }
 3512        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3513          return false;
 3514        }
 3515        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3516          return false;
 3517        }
 3518        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3519          return false;
 3520        }
 3521        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3522          return false;
 3523        }
 3524        break;
 3525     case Op_MaskAll:
 3526       if (!VM_Version::supports_evex()) {
 3527         return false;
 3528       }
 3529       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3530         return false;
 3531       }
 3532       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3533         return false;
 3534       }
 3535       break;
 3536     case Op_VectorMaskCmp:
 3537       if (vlen < 2 || size_in_bits < 32) {
 3538         return false;
 3539       }
 3540       break;
 3541     case Op_CompressM:
 3542       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3543         return false;
 3544       }
 3545       break;
 3546     case Op_CompressV:
 3547     case Op_ExpandV:
 3548       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3549         return false;
 3550       }
 3551       if (size_in_bits < 128 ) {
 3552         return false;
 3553       }
 3554     case Op_VectorLongToMask:
 3555       if (UseAVX < 1) {
 3556         return false;
 3557       }
 3558       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3559         return false;
 3560       }
 3561       break;
 3562     case Op_SignumVD:
 3563     case Op_SignumVF:
 3564       if (UseAVX < 1) {
 3565         return false;
 3566       }
 3567       break;
 3568     case Op_PopCountVI:
 3569     case Op_PopCountVL: {
 3570         if (!is_pop_count_instr_target(bt) &&
 3571             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3572           return false;
 3573         }
 3574       }
 3575       break;
 3576     case Op_ReverseV:
 3577     case Op_ReverseBytesV:
 3578       if (UseAVX < 2) {
 3579         return false;
 3580       }
 3581       break;
 3582     case Op_CountTrailingZerosV:
 3583     case Op_CountLeadingZerosV:
 3584       if (UseAVX < 2) {
 3585         return false;
 3586       }
 3587       break;
 3588   }
 3589   return true;  // Per default match rules are supported.
 3590 }
 3591 
 3592 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3593   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3594   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3595   // of their non-masked counterpart with mask edge being the differentiator.
 3596   // This routine does a strict check on the existence of masked operation patterns
 3597   // by returning a default false value for all the other opcodes apart from the
 3598   // ones whose masked instruction patterns are defined in this file.
 3599   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3600     return false;
 3601   }
 3602 
 3603   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3604   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3605     return false;
 3606   }
 3607   switch(opcode) {
 3608     // Unary masked operations
 3609     case Op_AbsVB:
 3610     case Op_AbsVS:
 3611       if(!VM_Version::supports_avx512bw()) {
 3612         return false;  // Implementation limitation
 3613       }
 3614     case Op_AbsVI:
 3615     case Op_AbsVL:
 3616       return true;
 3617 
 3618     // Ternary masked operations
 3619     case Op_FmaVF:
 3620     case Op_FmaVD:
 3621       return true;
 3622 
 3623     case Op_MacroLogicV:
 3624       if(bt != T_INT && bt != T_LONG) {
 3625         return false;
 3626       }
 3627       return true;
 3628 
 3629     // Binary masked operations
 3630     case Op_AddVB:
 3631     case Op_AddVS:
 3632     case Op_SubVB:
 3633     case Op_SubVS:
 3634     case Op_MulVS:
 3635     case Op_LShiftVS:
 3636     case Op_RShiftVS:
 3637     case Op_URShiftVS:
 3638       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3639       if (!VM_Version::supports_avx512bw()) {
 3640         return false;  // Implementation limitation
 3641       }
 3642       return true;
 3643 
 3644     case Op_MulVL:
 3645       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3646       if (!VM_Version::supports_avx512dq()) {
 3647         return false;  // Implementation limitation
 3648       }
 3649       return true;
 3650 
 3651     case Op_AndV:
 3652     case Op_OrV:
 3653     case Op_XorV:
 3654     case Op_RotateRightV:
 3655     case Op_RotateLeftV:
 3656       if (bt != T_INT && bt != T_LONG) {
 3657         return false; // Implementation limitation
 3658       }
 3659       return true;
 3660 
 3661     case Op_VectorLoadMask:
 3662       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3663       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3664         return false;
 3665       }
 3666       return true;
 3667 
 3668     case Op_AddVI:
 3669     case Op_AddVL:
 3670     case Op_AddVF:
 3671     case Op_AddVD:
 3672     case Op_SubVI:
 3673     case Op_SubVL:
 3674     case Op_SubVF:
 3675     case Op_SubVD:
 3676     case Op_MulVI:
 3677     case Op_MulVF:
 3678     case Op_MulVD:
 3679     case Op_DivVF:
 3680     case Op_DivVD:
 3681     case Op_SqrtVF:
 3682     case Op_SqrtVD:
 3683     case Op_LShiftVI:
 3684     case Op_LShiftVL:
 3685     case Op_RShiftVI:
 3686     case Op_RShiftVL:
 3687     case Op_URShiftVI:
 3688     case Op_URShiftVL:
 3689     case Op_LoadVectorMasked:
 3690     case Op_StoreVectorMasked:
 3691     case Op_LoadVectorGatherMasked:
 3692     case Op_StoreVectorScatterMasked:
 3693       return true;
 3694 
 3695     case Op_UMinV:
 3696     case Op_UMaxV:
 3697       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3698         return false;
 3699       } // fallthrough
 3700     case Op_MaxV:
 3701     case Op_MinV:
 3702       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3703         return false; // Implementation limitation
 3704       }
 3705       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3706         return false; // Implementation limitation
 3707       }
 3708       return true;
 3709     case Op_SaturatingAddV:
 3710     case Op_SaturatingSubV:
 3711       if (!is_subword_type(bt)) {
 3712         return false;
 3713       }
 3714       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3715         return false; // Implementation limitation
 3716       }
 3717       return true;
 3718 
 3719     case Op_VectorMaskCmp:
 3720       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3721         return false; // Implementation limitation
 3722       }
 3723       return true;
 3724 
 3725     case Op_VectorRearrange:
 3726       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3727         return false; // Implementation limitation
 3728       }
 3729       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3730         return false; // Implementation limitation
 3731       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3732         return false; // Implementation limitation
 3733       }
 3734       return true;
 3735 
 3736     // Binary Logical operations
 3737     case Op_AndVMask:
 3738     case Op_OrVMask:
 3739     case Op_XorVMask:
 3740       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3741         return false; // Implementation limitation
 3742       }
 3743       return true;
 3744 
 3745     case Op_PopCountVI:
 3746     case Op_PopCountVL:
 3747       if (!is_pop_count_instr_target(bt)) {
 3748         return false;
 3749       }
 3750       return true;
 3751 
 3752     case Op_MaskAll:
 3753       return true;
 3754 
 3755     case Op_CountLeadingZerosV:
 3756       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3757         return true;
 3758       }
 3759     default:
 3760       return false;
 3761   }
 3762 }
 3763 
 3764 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3765   return false;
 3766 }
 3767 
 3768 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3769 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3770   switch (elem_bt) {
 3771     case T_BYTE:  return false;
 3772     case T_SHORT: return !VM_Version::supports_avx512bw();
 3773     case T_INT:   return !VM_Version::supports_avx();
 3774     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3775     default:
 3776       ShouldNotReachHere();
 3777       return false;
 3778   }
 3779 }
 3780 
 3781 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 3782   // Prefer predicate if the mask type is "TypeVectMask".
 3783   return vt->isa_vectmask() != nullptr;
 3784 }
 3785 
 3786 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3787   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3788   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3789   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3790       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3791     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3792     return new legVecZOper();
 3793   }
 3794   if (legacy) {
 3795     switch (ideal_reg) {
 3796       case Op_VecS: return new legVecSOper();
 3797       case Op_VecD: return new legVecDOper();
 3798       case Op_VecX: return new legVecXOper();
 3799       case Op_VecY: return new legVecYOper();
 3800       case Op_VecZ: return new legVecZOper();
 3801     }
 3802   } else {
 3803     switch (ideal_reg) {
 3804       case Op_VecS: return new vecSOper();
 3805       case Op_VecD: return new vecDOper();
 3806       case Op_VecX: return new vecXOper();
 3807       case Op_VecY: return new vecYOper();
 3808       case Op_VecZ: return new vecZOper();
 3809     }
 3810   }
 3811   ShouldNotReachHere();
 3812   return nullptr;
 3813 }
 3814 
 3815 bool Matcher::is_reg2reg_move(MachNode* m) {
 3816   switch (m->rule()) {
 3817     case MoveVec2Leg_rule:
 3818     case MoveLeg2Vec_rule:
 3819     case MoveF2VL_rule:
 3820     case MoveF2LEG_rule:
 3821     case MoveVL2F_rule:
 3822     case MoveLEG2F_rule:
 3823     case MoveD2VL_rule:
 3824     case MoveD2LEG_rule:
 3825     case MoveVL2D_rule:
 3826     case MoveLEG2D_rule:
 3827       return true;
 3828     default:
 3829       return false;
 3830   }
 3831 }
 3832 
 3833 bool Matcher::is_generic_vector(MachOper* opnd) {
 3834   switch (opnd->opcode()) {
 3835     case VEC:
 3836     case LEGVEC:
 3837       return true;
 3838     default:
 3839       return false;
 3840   }
 3841 }
 3842 
 3843 //------------------------------------------------------------------------
 3844 
 3845 const RegMask* Matcher::predicate_reg_mask(void) {
 3846   return &_VECTMASK_REG_mask;
 3847 }
 3848 
 3849 // Max vector size in bytes. 0 if not supported.
 3850 int Matcher::vector_width_in_bytes(BasicType bt) {
 3851   assert(is_java_primitive(bt), "only primitive type vectors");
 3852   // SSE2 supports 128bit vectors for all types.
 3853   // AVX2 supports 256bit vectors for all types.
 3854   // AVX2/EVEX supports 512bit vectors for all types.
 3855   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3856   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3857   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3858     size = (UseAVX > 2) ? 64 : 32;
 3859   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3860     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3861   // Use flag to limit vector size.
 3862   size = MIN2(size,(int)MaxVectorSize);
 3863   // Minimum 2 values in vector (or 4 for bytes).
 3864   switch (bt) {
 3865   case T_DOUBLE:
 3866   case T_LONG:
 3867     if (size < 16) return 0;
 3868     break;
 3869   case T_FLOAT:
 3870   case T_INT:
 3871     if (size < 8) return 0;
 3872     break;
 3873   case T_BOOLEAN:
 3874     if (size < 4) return 0;
 3875     break;
 3876   case T_CHAR:
 3877     if (size < 4) return 0;
 3878     break;
 3879   case T_BYTE:
 3880     if (size < 4) return 0;
 3881     break;
 3882   case T_SHORT:
 3883     if (size < 4) return 0;
 3884     break;
 3885   default:
 3886     ShouldNotReachHere();
 3887   }
 3888   return size;
 3889 }
 3890 
 3891 // Limits on vector size (number of elements) loaded into vector.
 3892 int Matcher::max_vector_size(const BasicType bt) {
 3893   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3894 }
 3895 int Matcher::min_vector_size(const BasicType bt) {
 3896   int max_size = max_vector_size(bt);
 3897   // Min size which can be loaded into vector is 4 bytes.
 3898   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3899   // Support for calling svml double64 vectors
 3900   if (bt == T_DOUBLE) {
 3901     size = 1;
 3902   }
 3903   return MIN2(size,max_size);
 3904 }
 3905 
 3906 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3907   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3908   // by default on Cascade Lake
 3909   if (VM_Version::is_default_intel_cascade_lake()) {
 3910     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3911   }
 3912   return Matcher::max_vector_size(bt);
 3913 }
 3914 
 3915 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3916   return -1;
 3917 }
 3918 
 3919 // Vector ideal reg corresponding to specified size in bytes
 3920 uint Matcher::vector_ideal_reg(int size) {
 3921   assert(MaxVectorSize >= size, "");
 3922   switch(size) {
 3923     case  4: return Op_VecS;
 3924     case  8: return Op_VecD;
 3925     case 16: return Op_VecX;
 3926     case 32: return Op_VecY;
 3927     case 64: return Op_VecZ;
 3928   }
 3929   ShouldNotReachHere();
 3930   return 0;
 3931 }
 3932 
 3933 // Check for shift by small constant as well
 3934 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3935   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3936       shift->in(2)->get_int() <= 3 &&
 3937       // Are there other uses besides address expressions?
 3938       !matcher->is_visited(shift)) {
 3939     address_visited.set(shift->_idx); // Flag as address_visited
 3940     mstack.push(shift->in(2), Matcher::Visit);
 3941     Node *conv = shift->in(1);
 3942     // Allow Matcher to match the rule which bypass
 3943     // ConvI2L operation for an array index on LP64
 3944     // if the index value is positive.
 3945     if (conv->Opcode() == Op_ConvI2L &&
 3946         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3947         // Are there other uses besides address expressions?
 3948         !matcher->is_visited(conv)) {
 3949       address_visited.set(conv->_idx); // Flag as address_visited
 3950       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3951     } else {
 3952       mstack.push(conv, Matcher::Pre_Visit);
 3953     }
 3954     return true;
 3955   }
 3956   return false;
 3957 }
 3958 
 3959 // This function identifies sub-graphs in which a 'load' node is
 3960 // input to two different nodes, and such that it can be matched
 3961 // with BMI instructions like blsi, blsr, etc.
 3962 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3963 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3964 // refers to the same node.
 3965 //
 3966 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3967 // This is a temporary solution until we make DAGs expressible in ADL.
 3968 template<typename ConType>
 3969 class FusedPatternMatcher {
 3970   Node* _op1_node;
 3971   Node* _mop_node;
 3972   int _con_op;
 3973 
 3974   static int match_next(Node* n, int next_op, int next_op_idx) {
 3975     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3976       return -1;
 3977     }
 3978 
 3979     if (next_op_idx == -1) { // n is commutative, try rotations
 3980       if (n->in(1)->Opcode() == next_op) {
 3981         return 1;
 3982       } else if (n->in(2)->Opcode() == next_op) {
 3983         return 2;
 3984       }
 3985     } else {
 3986       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 3987       if (n->in(next_op_idx)->Opcode() == next_op) {
 3988         return next_op_idx;
 3989       }
 3990     }
 3991     return -1;
 3992   }
 3993 
 3994  public:
 3995   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 3996     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 3997 
 3998   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 3999              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 4000              typename ConType::NativeType con_value) {
 4001     if (_op1_node->Opcode() != op1) {
 4002       return false;
 4003     }
 4004     if (_mop_node->outcnt() > 2) {
 4005       return false;
 4006     }
 4007     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 4008     if (op1_op2_idx == -1) {
 4009       return false;
 4010     }
 4011     // Memory operation must be the other edge
 4012     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 4013 
 4014     // Check that the mop node is really what we want
 4015     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 4016       Node* op2_node = _op1_node->in(op1_op2_idx);
 4017       if (op2_node->outcnt() > 1) {
 4018         return false;
 4019       }
 4020       assert(op2_node->Opcode() == op2, "Should be");
 4021       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 4022       if (op2_con_idx == -1) {
 4023         return false;
 4024       }
 4025       // Memory operation must be the other edge
 4026       int op2_mop_idx = (op2_con_idx & 1) + 1;
 4027       // Check that the memory operation is the same node
 4028       if (op2_node->in(op2_mop_idx) == _mop_node) {
 4029         // Now check the constant
 4030         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 4031         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 4032           return true;
 4033         }
 4034       }
 4035     }
 4036     return false;
 4037   }
 4038 };
 4039 
 4040 static bool is_bmi_pattern(Node* n, Node* m) {
 4041   assert(UseBMI1Instructions, "sanity");
 4042   if (n != nullptr && m != nullptr) {
 4043     if (m->Opcode() == Op_LoadI) {
 4044       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 4045       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 4046              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 4047              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 4048     } else if (m->Opcode() == Op_LoadL) {
 4049       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 4050       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 4051              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 4052              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 4053     }
 4054   }
 4055   return false;
 4056 }
 4057 
 4058 // Should the matcher clone input 'm' of node 'n'?
 4059 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 4060   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 4061   if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
 4062     mstack.push(m, Visit);
 4063     return true;
 4064   }
 4065   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 4066     mstack.push(m, Visit);           // m = ShiftCntV
 4067     return true;
 4068   }
 4069   if (is_encode_and_store_pattern(n, m)) {
 4070     mstack.push(m, Visit);
 4071     return true;
 4072   }
 4073   return false;
 4074 }
 4075 
 4076 // Should the Matcher clone shifts on addressing modes, expecting them
 4077 // to be subsumed into complex addressing expressions or compute them
 4078 // into registers?
 4079 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4080   Node *off = m->in(AddPNode::Offset);
 4081   if (off->is_Con()) {
 4082     address_visited.test_set(m->_idx); // Flag as address_visited
 4083     Node *adr = m->in(AddPNode::Address);
 4084 
 4085     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4086     // AtomicAdd is not an addressing expression.
 4087     // Cheap to find it by looking for screwy base.
 4088     if (adr->is_AddP() &&
 4089         !adr->in(AddPNode::Base)->is_top() &&
 4090         !adr->in(AddPNode::Offset)->is_Con() &&
 4091         off->get_long() == (int) (off->get_long()) && // immL32
 4092         // Are there other uses besides address expressions?
 4093         !is_visited(adr)) {
 4094       address_visited.set(adr->_idx); // Flag as address_visited
 4095       Node *shift = adr->in(AddPNode::Offset);
 4096       if (!clone_shift(shift, this, mstack, address_visited)) {
 4097         mstack.push(shift, Pre_Visit);
 4098       }
 4099       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4100       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4101     } else {
 4102       mstack.push(adr, Pre_Visit);
 4103     }
 4104 
 4105     // Clone X+offset as it also folds into most addressing expressions
 4106     mstack.push(off, Visit);
 4107     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4108     return true;
 4109   } else if (clone_shift(off, this, mstack, address_visited)) {
 4110     address_visited.test_set(m->_idx); // Flag as address_visited
 4111     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4112     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4113     return true;
 4114   }
 4115   return false;
 4116 }
 4117 
 4118 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4119   switch (bt) {
 4120     case BoolTest::eq:
 4121       return Assembler::eq;
 4122     case BoolTest::ne:
 4123       return Assembler::neq;
 4124     case BoolTest::le:
 4125     case BoolTest::ule:
 4126       return Assembler::le;
 4127     case BoolTest::ge:
 4128     case BoolTest::uge:
 4129       return Assembler::nlt;
 4130     case BoolTest::lt:
 4131     case BoolTest::ult:
 4132       return Assembler::lt;
 4133     case BoolTest::gt:
 4134     case BoolTest::ugt:
 4135       return Assembler::nle;
 4136     default : ShouldNotReachHere(); return Assembler::_false;
 4137   }
 4138 }
 4139 
 4140 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4141   switch (bt) {
 4142   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4143   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4144   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4145   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4146   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4147   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4148   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4149   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4150   }
 4151 }
 4152 
 4153 // Helper methods for MachSpillCopyNode::implementation().
 4154 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4155                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4156   assert(ireg == Op_VecS || // 32bit vector
 4157          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4158           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4159          "no non-adjacent vector moves" );
 4160   if (masm) {
 4161     switch (ireg) {
 4162     case Op_VecS: // copy whole register
 4163     case Op_VecD:
 4164     case Op_VecX:
 4165       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4166         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4167       } else {
 4168         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4169      }
 4170       break;
 4171     case Op_VecY:
 4172       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4173         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4174       } else {
 4175         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4176      }
 4177       break;
 4178     case Op_VecZ:
 4179       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4180       break;
 4181     default:
 4182       ShouldNotReachHere();
 4183     }
 4184 #ifndef PRODUCT
 4185   } else {
 4186     switch (ireg) {
 4187     case Op_VecS:
 4188     case Op_VecD:
 4189     case Op_VecX:
 4190       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4191       break;
 4192     case Op_VecY:
 4193     case Op_VecZ:
 4194       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4195       break;
 4196     default:
 4197       ShouldNotReachHere();
 4198     }
 4199 #endif
 4200   }
 4201 }
 4202 
 4203 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4204                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4205   if (masm) {
 4206     if (is_load) {
 4207       switch (ireg) {
 4208       case Op_VecS:
 4209         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4210         break;
 4211       case Op_VecD:
 4212         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4213         break;
 4214       case Op_VecX:
 4215         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4216           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4217         } else {
 4218           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4219           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4220         }
 4221         break;
 4222       case Op_VecY:
 4223         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4224           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4225         } else {
 4226           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4227           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4228         }
 4229         break;
 4230       case Op_VecZ:
 4231         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4232         break;
 4233       default:
 4234         ShouldNotReachHere();
 4235       }
 4236     } else { // store
 4237       switch (ireg) {
 4238       case Op_VecS:
 4239         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4240         break;
 4241       case Op_VecD:
 4242         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4243         break;
 4244       case Op_VecX:
 4245         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4246           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4247         }
 4248         else {
 4249           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4250         }
 4251         break;
 4252       case Op_VecY:
 4253         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4254           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4255         }
 4256         else {
 4257           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4258         }
 4259         break;
 4260       case Op_VecZ:
 4261         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4262         break;
 4263       default:
 4264         ShouldNotReachHere();
 4265       }
 4266     }
 4267 #ifndef PRODUCT
 4268   } else {
 4269     if (is_load) {
 4270       switch (ireg) {
 4271       case Op_VecS:
 4272         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4273         break;
 4274       case Op_VecD:
 4275         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4276         break;
 4277        case Op_VecX:
 4278         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4279         break;
 4280       case Op_VecY:
 4281       case Op_VecZ:
 4282         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4283         break;
 4284       default:
 4285         ShouldNotReachHere();
 4286       }
 4287     } else { // store
 4288       switch (ireg) {
 4289       case Op_VecS:
 4290         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4291         break;
 4292       case Op_VecD:
 4293         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4294         break;
 4295        case Op_VecX:
 4296         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4297         break;
 4298       case Op_VecY:
 4299       case Op_VecZ:
 4300         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4301         break;
 4302       default:
 4303         ShouldNotReachHere();
 4304       }
 4305     }
 4306 #endif
 4307   }
 4308 }
 4309 
 4310 template <class T>
 4311 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4312   int size = type2aelembytes(bt) * len;
 4313   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4314   for (int i = 0; i < len; i++) {
 4315     int offset = i * type2aelembytes(bt);
 4316     switch (bt) {
 4317       case T_BYTE: val->at(i) = con; break;
 4318       case T_SHORT: {
 4319         jshort c = con;
 4320         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4321         break;
 4322       }
 4323       case T_INT: {
 4324         jint c = con;
 4325         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4326         break;
 4327       }
 4328       case T_LONG: {
 4329         jlong c = con;
 4330         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4331         break;
 4332       }
 4333       case T_FLOAT: {
 4334         jfloat c = con;
 4335         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4336         break;
 4337       }
 4338       case T_DOUBLE: {
 4339         jdouble c = con;
 4340         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4341         break;
 4342       }
 4343       default: assert(false, "%s", type2name(bt));
 4344     }
 4345   }
 4346   return val;
 4347 }
 4348 
 4349 static inline jlong high_bit_set(BasicType bt) {
 4350   switch (bt) {
 4351     case T_BYTE:  return 0x8080808080808080;
 4352     case T_SHORT: return 0x8000800080008000;
 4353     case T_INT:   return 0x8000000080000000;
 4354     case T_LONG:  return 0x8000000000000000;
 4355     default:
 4356       ShouldNotReachHere();
 4357       return 0;
 4358   }
 4359 }
 4360 
 4361 #ifndef PRODUCT
 4362   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4363     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4364   }
 4365 #endif
 4366 
 4367   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4368     __ nop(_count);
 4369   }
 4370 
 4371   uint MachNopNode::size(PhaseRegAlloc*) const {
 4372     return _count;
 4373   }
 4374 
 4375 #ifndef PRODUCT
 4376   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4377     st->print("# breakpoint");
 4378   }
 4379 #endif
 4380 
 4381   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4382     __ int3();
 4383   }
 4384 
 4385   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4386     return MachNode::size(ra_);
 4387   }
 4388 
 4389 %}
 4390 
 4391 //----------ENCODING BLOCK-----------------------------------------------------
 4392 // This block specifies the encoding classes used by the compiler to
 4393 // output byte streams.  Encoding classes are parameterized macros
 4394 // used by Machine Instruction Nodes in order to generate the bit
 4395 // encoding of the instruction.  Operands specify their base encoding
 4396 // interface with the interface keyword.  There are currently
 4397 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4398 // COND_INTER.  REG_INTER causes an operand to generate a function
 4399 // which returns its register number when queried.  CONST_INTER causes
 4400 // an operand to generate a function which returns the value of the
 4401 // constant when queried.  MEMORY_INTER causes an operand to generate
 4402 // four functions which return the Base Register, the Index Register,
 4403 // the Scale Value, and the Offset Value of the operand when queried.
 4404 // COND_INTER causes an operand to generate six functions which return
 4405 // the encoding code (ie - encoding bits for the instruction)
 4406 // associated with each basic boolean condition for a conditional
 4407 // instruction.
 4408 //
 4409 // Instructions specify two basic values for encoding.  Again, a
 4410 // function is available to check if the constant displacement is an
 4411 // oop. They use the ins_encode keyword to specify their encoding
 4412 // classes (which must be a sequence of enc_class names, and their
 4413 // parameters, specified in the encoding block), and they use the
 4414 // opcode keyword to specify, in order, their primary, secondary, and
 4415 // tertiary opcode.  Only the opcode sections which a particular
 4416 // instruction needs for encoding need to be specified.
 4417 encode %{
 4418   enc_class cdql_enc(no_rax_rdx_RegI div)
 4419   %{
 4420     // Full implementation of Java idiv and irem; checks for
 4421     // special case as described in JVM spec., p.243 & p.271.
 4422     //
 4423     //         normal case                           special case
 4424     //
 4425     // input : rax: dividend                         min_int
 4426     //         reg: divisor                          -1
 4427     //
 4428     // output: rax: quotient  (= rax idiv reg)       min_int
 4429     //         rdx: remainder (= rax irem reg)       0
 4430     //
 4431     //  Code sequnce:
 4432     //
 4433     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4434     //    5:   75 07/08                jne    e <normal>
 4435     //    7:   33 d2                   xor    %edx,%edx
 4436     //  [div >= 8 -> offset + 1]
 4437     //  [REX_B]
 4438     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4439     //    c:   74 03/04                je     11 <done>
 4440     // 000000000000000e <normal>:
 4441     //    e:   99                      cltd
 4442     //  [div >= 8 -> offset + 1]
 4443     //  [REX_B]
 4444     //    f:   f7 f9                   idiv   $div
 4445     // 0000000000000011 <done>:
 4446     Label normal;
 4447     Label done;
 4448 
 4449     // cmp    $0x80000000,%eax
 4450     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4451 
 4452     // jne    e <normal>
 4453     __ jccb(Assembler::notEqual, normal);
 4454 
 4455     // xor    %edx,%edx
 4456     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4457 
 4458     // cmp    $0xffffffffffffffff,%ecx
 4459     __ cmpl($div$$Register, -1);
 4460 
 4461     // je     11 <done>
 4462     __ jccb(Assembler::equal, done);
 4463 
 4464     // <normal>
 4465     // cltd
 4466     __ bind(normal);
 4467     __ cdql();
 4468 
 4469     // idivl
 4470     // <done>
 4471     __ idivl($div$$Register);
 4472     __ bind(done);
 4473   %}
 4474 
 4475   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4476   %{
 4477     // Full implementation of Java ldiv and lrem; checks for
 4478     // special case as described in JVM spec., p.243 & p.271.
 4479     //
 4480     //         normal case                           special case
 4481     //
 4482     // input : rax: dividend                         min_long
 4483     //         reg: divisor                          -1
 4484     //
 4485     // output: rax: quotient  (= rax idiv reg)       min_long
 4486     //         rdx: remainder (= rax irem reg)       0
 4487     //
 4488     //  Code sequnce:
 4489     //
 4490     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4491     //    7:   00 00 80
 4492     //    a:   48 39 d0                cmp    %rdx,%rax
 4493     //    d:   75 08                   jne    17 <normal>
 4494     //    f:   33 d2                   xor    %edx,%edx
 4495     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4496     //   15:   74 05                   je     1c <done>
 4497     // 0000000000000017 <normal>:
 4498     //   17:   48 99                   cqto
 4499     //   19:   48 f7 f9                idiv   $div
 4500     // 000000000000001c <done>:
 4501     Label normal;
 4502     Label done;
 4503 
 4504     // mov    $0x8000000000000000,%rdx
 4505     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4506 
 4507     // cmp    %rdx,%rax
 4508     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4509 
 4510     // jne    17 <normal>
 4511     __ jccb(Assembler::notEqual, normal);
 4512 
 4513     // xor    %edx,%edx
 4514     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4515 
 4516     // cmp    $0xffffffffffffffff,$div
 4517     __ cmpq($div$$Register, -1);
 4518 
 4519     // je     1e <done>
 4520     __ jccb(Assembler::equal, done);
 4521 
 4522     // <normal>
 4523     // cqto
 4524     __ bind(normal);
 4525     __ cdqq();
 4526 
 4527     // idivq (note: must be emitted by the user of this rule)
 4528     // <done>
 4529     __ idivq($div$$Register);
 4530     __ bind(done);
 4531   %}
 4532 
 4533   enc_class clear_avx %{
 4534     DEBUG_ONLY(int off0 = __ offset());
 4535     if (generate_vzeroupper(Compile::current())) {
 4536       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4537       // Clear upper bits of YMM registers when current compiled code uses
 4538       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4539       __ vzeroupper();
 4540     }
 4541     DEBUG_ONLY(int off1 = __ offset());
 4542     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4543   %}
 4544 
 4545   enc_class Java_To_Runtime(method meth) %{
 4546     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4547     __ call(r10);
 4548     __ post_call_nop();
 4549   %}
 4550 
 4551   enc_class Java_Static_Call(method meth)
 4552   %{
 4553     // JAVA STATIC CALL
 4554     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4555     // determine who we intended to call.
 4556     if (!_method) {
 4557       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4558     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4559       // The NOP here is purely to ensure that eliding a call to
 4560       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4561       __ addr_nop_5();
 4562       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4563     } else {
 4564       int method_index = resolved_method_index(masm);
 4565       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4566                                                   : static_call_Relocation::spec(method_index);
 4567       address mark = __ pc();
 4568       int call_offset = __ offset();
 4569       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4570       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4571         // Calls of the same statically bound method can share
 4572         // a stub to the interpreter.
 4573         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4574       } else {
 4575         // Emit stubs for static call.
 4576         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4577         __ clear_inst_mark();
 4578         if (stub == nullptr) {
 4579           ciEnv::current()->record_failure("CodeCache is full");
 4580           return;
 4581         }
 4582       }
 4583     }
 4584     __ post_call_nop();
 4585   %}
 4586 
 4587   enc_class Java_Dynamic_Call(method meth) %{
 4588     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4589     __ post_call_nop();
 4590   %}
 4591 
 4592   enc_class call_epilog %{
 4593     if (VerifyStackAtCalls) {
 4594       // Check that stack depth is unchanged: find majik cookie on stack
 4595       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4596       Label L;
 4597       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4598       __ jccb(Assembler::equal, L);
 4599       // Die if stack mismatch
 4600       __ int3();
 4601       __ bind(L);
 4602     }
 4603   %}
 4604 
 4605 %}
 4606 
 4607 //----------FRAME--------------------------------------------------------------
 4608 // Definition of frame structure and management information.
 4609 //
 4610 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4611 //                             |   (to get allocators register number
 4612 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4613 //  r   CALLER     |        |
 4614 //  o     |        +--------+      pad to even-align allocators stack-slot
 4615 //  w     V        |  pad0  |        numbers; owned by CALLER
 4616 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4617 //  h     ^        |   in   |  5
 4618 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4619 //  |     |        |        |  3
 4620 //  |     |        +--------+
 4621 //  V     |        | old out|      Empty on Intel, window on Sparc
 4622 //        |    old |preserve|      Must be even aligned.
 4623 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4624 //        |        |   in   |  3   area for Intel ret address
 4625 //     Owned by    |preserve|      Empty on Sparc.
 4626 //       SELF      +--------+
 4627 //        |        |  pad2  |  2   pad to align old SP
 4628 //        |        +--------+  1
 4629 //        |        | locks  |  0
 4630 //        |        +--------+----> OptoReg::stack0(), even aligned
 4631 //        |        |  pad1  | 11   pad to align new SP
 4632 //        |        +--------+
 4633 //        |        |        | 10
 4634 //        |        | spills |  9   spills
 4635 //        V        |        |  8   (pad0 slot for callee)
 4636 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4637 //        ^        |  out   |  7
 4638 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4639 //     Owned by    +--------+
 4640 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4641 //        |    new |preserve|      Must be even-aligned.
 4642 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4643 //        |        |        |
 4644 //
 4645 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4646 //         known from SELF's arguments and the Java calling convention.
 4647 //         Region 6-7 is determined per call site.
 4648 // Note 2: If the calling convention leaves holes in the incoming argument
 4649 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4650 //         are owned by the CALLEE.  Holes should not be necessary in the
 4651 //         incoming area, as the Java calling convention is completely under
 4652 //         the control of the AD file.  Doubles can be sorted and packed to
 4653 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4654 //         varargs C calling conventions.
 4655 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4656 //         even aligned with pad0 as needed.
 4657 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4658 //         region 6-11 is even aligned; it may be padded out more so that
 4659 //         the region from SP to FP meets the minimum stack alignment.
 4660 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4661 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4662 //         SP meets the minimum alignment.
 4663 
 4664 frame
 4665 %{
 4666   // These three registers define part of the calling convention
 4667   // between compiled code and the interpreter.
 4668   inline_cache_reg(RAX);                // Inline Cache Register
 4669 
 4670   // Optional: name the operand used by cisc-spilling to access
 4671   // [stack_pointer + offset]
 4672   cisc_spilling_operand_name(indOffset32);
 4673 
 4674   // Number of stack slots consumed by locking an object
 4675   sync_stack_slots(2);
 4676 
 4677   // Compiled code's Frame Pointer
 4678   frame_pointer(RSP);
 4679 
 4680   // Interpreter stores its frame pointer in a register which is
 4681   // stored to the stack by I2CAdaptors.
 4682   // I2CAdaptors convert from interpreted java to compiled java.
 4683   interpreter_frame_pointer(RBP);
 4684 
 4685   // Stack alignment requirement
 4686   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4687 
 4688   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4689   // for calls to C.  Supports the var-args backing area for register parms.
 4690   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4691 
 4692   // The after-PROLOG location of the return address.  Location of
 4693   // return address specifies a type (REG or STACK) and a number
 4694   // representing the register number (i.e. - use a register name) or
 4695   // stack slot.
 4696   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4697   // Otherwise, it is above the locks and verification slot and alignment word
 4698   return_addr(STACK - 2 +
 4699               align_up((Compile::current()->in_preserve_stack_slots() +
 4700                         Compile::current()->fixed_slots()),
 4701                        stack_alignment_in_slots()));
 4702 
 4703   // Location of compiled Java return values.  Same as C for now.
 4704   return_value
 4705   %{
 4706     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4707            "only return normal values");
 4708 
 4709     static const int lo[Op_RegL + 1] = {
 4710       0,
 4711       0,
 4712       RAX_num,  // Op_RegN
 4713       RAX_num,  // Op_RegI
 4714       RAX_num,  // Op_RegP
 4715       XMM0_num, // Op_RegF
 4716       XMM0_num, // Op_RegD
 4717       RAX_num   // Op_RegL
 4718     };
 4719     static const int hi[Op_RegL + 1] = {
 4720       0,
 4721       0,
 4722       OptoReg::Bad, // Op_RegN
 4723       OptoReg::Bad, // Op_RegI
 4724       RAX_H_num,    // Op_RegP
 4725       OptoReg::Bad, // Op_RegF
 4726       XMM0b_num,    // Op_RegD
 4727       RAX_H_num     // Op_RegL
 4728     };
 4729     // Excluded flags and vector registers.
 4730     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4731     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4732   %}
 4733 %}
 4734 
 4735 //----------ATTRIBUTES---------------------------------------------------------
 4736 //----------Operand Attributes-------------------------------------------------
 4737 op_attrib op_cost(0);        // Required cost attribute
 4738 
 4739 //----------Instruction Attributes---------------------------------------------
 4740 ins_attrib ins_cost(100);       // Required cost attribute
 4741 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4742 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4743                                 // a non-matching short branch variant
 4744                                 // of some long branch?
 4745 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4746                                 // be a power of 2) specifies the
 4747                                 // alignment that some part of the
 4748                                 // instruction (not necessarily the
 4749                                 // start) requires.  If > 1, a
 4750                                 // compute_padding() function must be
 4751                                 // provided for the instruction
 4752 
 4753 // Whether this node is expanded during code emission into a sequence of
 4754 // instructions and the first instruction can perform an implicit null check.
 4755 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4756 
 4757 //----------OPERANDS-----------------------------------------------------------
 4758 // Operand definitions must precede instruction definitions for correct parsing
 4759 // in the ADLC because operands constitute user defined types which are used in
 4760 // instruction definitions.
 4761 
 4762 //----------Simple Operands----------------------------------------------------
 4763 // Immediate Operands
 4764 // Integer Immediate
 4765 operand immI()
 4766 %{
 4767   match(ConI);
 4768 
 4769   op_cost(10);
 4770   format %{ %}
 4771   interface(CONST_INTER);
 4772 %}
 4773 
 4774 // Constant for test vs zero
 4775 operand immI_0()
 4776 %{
 4777   predicate(n->get_int() == 0);
 4778   match(ConI);
 4779 
 4780   op_cost(0);
 4781   format %{ %}
 4782   interface(CONST_INTER);
 4783 %}
 4784 
 4785 // Constant for increment
 4786 operand immI_1()
 4787 %{
 4788   predicate(n->get_int() == 1);
 4789   match(ConI);
 4790 
 4791   op_cost(0);
 4792   format %{ %}
 4793   interface(CONST_INTER);
 4794 %}
 4795 
 4796 // Constant for decrement
 4797 operand immI_M1()
 4798 %{
 4799   predicate(n->get_int() == -1);
 4800   match(ConI);
 4801 
 4802   op_cost(0);
 4803   format %{ %}
 4804   interface(CONST_INTER);
 4805 %}
 4806 
 4807 operand immI_2()
 4808 %{
 4809   predicate(n->get_int() == 2);
 4810   match(ConI);
 4811 
 4812   op_cost(0);
 4813   format %{ %}
 4814   interface(CONST_INTER);
 4815 %}
 4816 
 4817 operand immI_4()
 4818 %{
 4819   predicate(n->get_int() == 4);
 4820   match(ConI);
 4821 
 4822   op_cost(0);
 4823   format %{ %}
 4824   interface(CONST_INTER);
 4825 %}
 4826 
 4827 operand immI_8()
 4828 %{
 4829   predicate(n->get_int() == 8);
 4830   match(ConI);
 4831 
 4832   op_cost(0);
 4833   format %{ %}
 4834   interface(CONST_INTER);
 4835 %}
 4836 
 4837 // Valid scale values for addressing modes
 4838 operand immI2()
 4839 %{
 4840   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4841   match(ConI);
 4842 
 4843   format %{ %}
 4844   interface(CONST_INTER);
 4845 %}
 4846 
 4847 operand immU7()
 4848 %{
 4849   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4850   match(ConI);
 4851 
 4852   op_cost(5);
 4853   format %{ %}
 4854   interface(CONST_INTER);
 4855 %}
 4856 
 4857 operand immI8()
 4858 %{
 4859   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4860   match(ConI);
 4861 
 4862   op_cost(5);
 4863   format %{ %}
 4864   interface(CONST_INTER);
 4865 %}
 4866 
 4867 operand immU8()
 4868 %{
 4869   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4870   match(ConI);
 4871 
 4872   op_cost(5);
 4873   format %{ %}
 4874   interface(CONST_INTER);
 4875 %}
 4876 
 4877 operand immI16()
 4878 %{
 4879   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4880   match(ConI);
 4881 
 4882   op_cost(10);
 4883   format %{ %}
 4884   interface(CONST_INTER);
 4885 %}
 4886 
 4887 // Int Immediate non-negative
 4888 operand immU31()
 4889 %{
 4890   predicate(n->get_int() >= 0);
 4891   match(ConI);
 4892 
 4893   op_cost(0);
 4894   format %{ %}
 4895   interface(CONST_INTER);
 4896 %}
 4897 
 4898 // Pointer Immediate
 4899 operand immP()
 4900 %{
 4901   match(ConP);
 4902 
 4903   op_cost(10);
 4904   format %{ %}
 4905   interface(CONST_INTER);
 4906 %}
 4907 
 4908 // Null Pointer Immediate
 4909 operand immP0()
 4910 %{
 4911   predicate(n->get_ptr() == 0);
 4912   match(ConP);
 4913 
 4914   op_cost(5);
 4915   format %{ %}
 4916   interface(CONST_INTER);
 4917 %}
 4918 
 4919 // Pointer Immediate
 4920 operand immN() %{
 4921   match(ConN);
 4922 
 4923   op_cost(10);
 4924   format %{ %}
 4925   interface(CONST_INTER);
 4926 %}
 4927 
 4928 operand immNKlass() %{
 4929   match(ConNKlass);
 4930 
 4931   op_cost(10);
 4932   format %{ %}
 4933   interface(CONST_INTER);
 4934 %}
 4935 
 4936 // Null Pointer Immediate
 4937 operand immN0() %{
 4938   predicate(n->get_narrowcon() == 0);
 4939   match(ConN);
 4940 
 4941   op_cost(5);
 4942   format %{ %}
 4943   interface(CONST_INTER);
 4944 %}
 4945 
 4946 operand immP31()
 4947 %{
 4948   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 4949             && (n->get_ptr() >> 31) == 0);
 4950   match(ConP);
 4951 
 4952   op_cost(5);
 4953   format %{ %}
 4954   interface(CONST_INTER);
 4955 %}
 4956 
 4957 
 4958 // Long Immediate
 4959 operand immL()
 4960 %{
 4961   match(ConL);
 4962 
 4963   op_cost(20);
 4964   format %{ %}
 4965   interface(CONST_INTER);
 4966 %}
 4967 
 4968 // Long Immediate 8-bit
 4969 operand immL8()
 4970 %{
 4971   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 4972   match(ConL);
 4973 
 4974   op_cost(5);
 4975   format %{ %}
 4976   interface(CONST_INTER);
 4977 %}
 4978 
 4979 // Long Immediate 32-bit unsigned
 4980 operand immUL32()
 4981 %{
 4982   predicate(n->get_long() == (unsigned int) (n->get_long()));
 4983   match(ConL);
 4984 
 4985   op_cost(10);
 4986   format %{ %}
 4987   interface(CONST_INTER);
 4988 %}
 4989 
 4990 // Long Immediate 32-bit signed
 4991 operand immL32()
 4992 %{
 4993   predicate(n->get_long() == (int) (n->get_long()));
 4994   match(ConL);
 4995 
 4996   op_cost(15);
 4997   format %{ %}
 4998   interface(CONST_INTER);
 4999 %}
 5000 
 5001 operand immL_Pow2()
 5002 %{
 5003   predicate(is_power_of_2((julong)n->get_long()));
 5004   match(ConL);
 5005 
 5006   op_cost(15);
 5007   format %{ %}
 5008   interface(CONST_INTER);
 5009 %}
 5010 
 5011 operand immL_NotPow2()
 5012 %{
 5013   predicate(is_power_of_2((julong)~n->get_long()));
 5014   match(ConL);
 5015 
 5016   op_cost(15);
 5017   format %{ %}
 5018   interface(CONST_INTER);
 5019 %}
 5020 
 5021 // Long Immediate zero
 5022 operand immL0()
 5023 %{
 5024   predicate(n->get_long() == 0L);
 5025   match(ConL);
 5026 
 5027   op_cost(10);
 5028   format %{ %}
 5029   interface(CONST_INTER);
 5030 %}
 5031 
 5032 // Constant for increment
 5033 operand immL1()
 5034 %{
 5035   predicate(n->get_long() == 1);
 5036   match(ConL);
 5037 
 5038   format %{ %}
 5039   interface(CONST_INTER);
 5040 %}
 5041 
 5042 // Constant for decrement
 5043 operand immL_M1()
 5044 %{
 5045   predicate(n->get_long() == -1);
 5046   match(ConL);
 5047 
 5048   format %{ %}
 5049   interface(CONST_INTER);
 5050 %}
 5051 
 5052 // Long Immediate: low 32-bit mask
 5053 operand immL_32bits()
 5054 %{
 5055   predicate(n->get_long() == 0xFFFFFFFFL);
 5056   match(ConL);
 5057   op_cost(20);
 5058 
 5059   format %{ %}
 5060   interface(CONST_INTER);
 5061 %}
 5062 
 5063 // Int Immediate: 2^n-1, positive
 5064 operand immI_Pow2M1()
 5065 %{
 5066   predicate((n->get_int() > 0)
 5067             && is_power_of_2((juint)n->get_int() + 1));
 5068   match(ConI);
 5069 
 5070   op_cost(20);
 5071   format %{ %}
 5072   interface(CONST_INTER);
 5073 %}
 5074 
 5075 // Float Immediate zero
 5076 operand immF0()
 5077 %{
 5078   predicate(jint_cast(n->getf()) == 0);
 5079   match(ConF);
 5080 
 5081   op_cost(5);
 5082   format %{ %}
 5083   interface(CONST_INTER);
 5084 %}
 5085 
 5086 // Float Immediate
 5087 operand immF()
 5088 %{
 5089   match(ConF);
 5090 
 5091   op_cost(15);
 5092   format %{ %}
 5093   interface(CONST_INTER);
 5094 %}
 5095 
 5096 // Half Float Immediate
 5097 operand immH()
 5098 %{
 5099   match(ConH);
 5100 
 5101   op_cost(15);
 5102   format %{ %}
 5103   interface(CONST_INTER);
 5104 %}
 5105 
 5106 // Double Immediate zero
 5107 operand immD0()
 5108 %{
 5109   predicate(jlong_cast(n->getd()) == 0);
 5110   match(ConD);
 5111 
 5112   op_cost(5);
 5113   format %{ %}
 5114   interface(CONST_INTER);
 5115 %}
 5116 
 5117 // Double Immediate
 5118 operand immD()
 5119 %{
 5120   match(ConD);
 5121 
 5122   op_cost(15);
 5123   format %{ %}
 5124   interface(CONST_INTER);
 5125 %}
 5126 
 5127 // Immediates for special shifts (sign extend)
 5128 
 5129 // Constants for increment
 5130 operand immI_16()
 5131 %{
 5132   predicate(n->get_int() == 16);
 5133   match(ConI);
 5134 
 5135   format %{ %}
 5136   interface(CONST_INTER);
 5137 %}
 5138 
 5139 operand immI_24()
 5140 %{
 5141   predicate(n->get_int() == 24);
 5142   match(ConI);
 5143 
 5144   format %{ %}
 5145   interface(CONST_INTER);
 5146 %}
 5147 
 5148 // Constant for byte-wide masking
 5149 operand immI_255()
 5150 %{
 5151   predicate(n->get_int() == 255);
 5152   match(ConI);
 5153 
 5154   format %{ %}
 5155   interface(CONST_INTER);
 5156 %}
 5157 
 5158 // Constant for short-wide masking
 5159 operand immI_65535()
 5160 %{
 5161   predicate(n->get_int() == 65535);
 5162   match(ConI);
 5163 
 5164   format %{ %}
 5165   interface(CONST_INTER);
 5166 %}
 5167 
 5168 // Constant for byte-wide masking
 5169 operand immL_255()
 5170 %{
 5171   predicate(n->get_long() == 255);
 5172   match(ConL);
 5173 
 5174   format %{ %}
 5175   interface(CONST_INTER);
 5176 %}
 5177 
 5178 // Constant for short-wide masking
 5179 operand immL_65535()
 5180 %{
 5181   predicate(n->get_long() == 65535);
 5182   match(ConL);
 5183 
 5184   format %{ %}
 5185   interface(CONST_INTER);
 5186 %}
 5187 
 5188 operand kReg()
 5189 %{
 5190   constraint(ALLOC_IN_RC(vectmask_reg));
 5191   match(RegVectMask);
 5192   format %{%}
 5193   interface(REG_INTER);
 5194 %}
 5195 
 5196 // Register Operands
 5197 // Integer Register
 5198 operand rRegI()
 5199 %{
 5200   constraint(ALLOC_IN_RC(int_reg));
 5201   match(RegI);
 5202 
 5203   match(rax_RegI);
 5204   match(rbx_RegI);
 5205   match(rcx_RegI);
 5206   match(rdx_RegI);
 5207   match(rdi_RegI);
 5208 
 5209   format %{ %}
 5210   interface(REG_INTER);
 5211 %}
 5212 
 5213 // Special Registers
 5214 operand rax_RegI()
 5215 %{
 5216   constraint(ALLOC_IN_RC(int_rax_reg));
 5217   match(RegI);
 5218   match(rRegI);
 5219 
 5220   format %{ "RAX" %}
 5221   interface(REG_INTER);
 5222 %}
 5223 
 5224 // Special Registers
 5225 operand rbx_RegI()
 5226 %{
 5227   constraint(ALLOC_IN_RC(int_rbx_reg));
 5228   match(RegI);
 5229   match(rRegI);
 5230 
 5231   format %{ "RBX" %}
 5232   interface(REG_INTER);
 5233 %}
 5234 
 5235 operand rcx_RegI()
 5236 %{
 5237   constraint(ALLOC_IN_RC(int_rcx_reg));
 5238   match(RegI);
 5239   match(rRegI);
 5240 
 5241   format %{ "RCX" %}
 5242   interface(REG_INTER);
 5243 %}
 5244 
 5245 operand rdx_RegI()
 5246 %{
 5247   constraint(ALLOC_IN_RC(int_rdx_reg));
 5248   match(RegI);
 5249   match(rRegI);
 5250 
 5251   format %{ "RDX" %}
 5252   interface(REG_INTER);
 5253 %}
 5254 
 5255 operand rdi_RegI()
 5256 %{
 5257   constraint(ALLOC_IN_RC(int_rdi_reg));
 5258   match(RegI);
 5259   match(rRegI);
 5260 
 5261   format %{ "RDI" %}
 5262   interface(REG_INTER);
 5263 %}
 5264 
 5265 operand no_rax_rdx_RegI()
 5266 %{
 5267   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5268   match(RegI);
 5269   match(rbx_RegI);
 5270   match(rcx_RegI);
 5271   match(rdi_RegI);
 5272 
 5273   format %{ %}
 5274   interface(REG_INTER);
 5275 %}
 5276 
 5277 operand no_rbp_r13_RegI()
 5278 %{
 5279   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5280   match(RegI);
 5281   match(rRegI);
 5282   match(rax_RegI);
 5283   match(rbx_RegI);
 5284   match(rcx_RegI);
 5285   match(rdx_RegI);
 5286   match(rdi_RegI);
 5287 
 5288   format %{ %}
 5289   interface(REG_INTER);
 5290 %}
 5291 
 5292 // Pointer Register
 5293 operand any_RegP()
 5294 %{
 5295   constraint(ALLOC_IN_RC(any_reg));
 5296   match(RegP);
 5297   match(rax_RegP);
 5298   match(rbx_RegP);
 5299   match(rdi_RegP);
 5300   match(rsi_RegP);
 5301   match(rbp_RegP);
 5302   match(r15_RegP);
 5303   match(rRegP);
 5304 
 5305   format %{ %}
 5306   interface(REG_INTER);
 5307 %}
 5308 
 5309 operand rRegP()
 5310 %{
 5311   constraint(ALLOC_IN_RC(ptr_reg));
 5312   match(RegP);
 5313   match(rax_RegP);
 5314   match(rbx_RegP);
 5315   match(rdi_RegP);
 5316   match(rsi_RegP);
 5317   match(rbp_RegP);  // See Q&A below about
 5318   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5319 
 5320   format %{ %}
 5321   interface(REG_INTER);
 5322 %}
 5323 
 5324 operand rRegN() %{
 5325   constraint(ALLOC_IN_RC(int_reg));
 5326   match(RegN);
 5327 
 5328   format %{ %}
 5329   interface(REG_INTER);
 5330 %}
 5331 
 5332 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5333 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5334 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5335 // The output of an instruction is controlled by the allocator, which respects
 5336 // register class masks, not match rules.  Unless an instruction mentions
 5337 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5338 // by the allocator as an input.
 5339 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5340 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5341 // result, RBP is not included in the output of the instruction either.
 5342 
 5343 // This operand is not allowed to use RBP even if
 5344 // RBP is not used to hold the frame pointer.
 5345 operand no_rbp_RegP()
 5346 %{
 5347   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5348   match(RegP);
 5349   match(rbx_RegP);
 5350   match(rsi_RegP);
 5351   match(rdi_RegP);
 5352 
 5353   format %{ %}
 5354   interface(REG_INTER);
 5355 %}
 5356 
 5357 // Special Registers
 5358 // Return a pointer value
 5359 operand rax_RegP()
 5360 %{
 5361   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5362   match(RegP);
 5363   match(rRegP);
 5364 
 5365   format %{ %}
 5366   interface(REG_INTER);
 5367 %}
 5368 
 5369 // Special Registers
 5370 // Return a compressed pointer value
 5371 operand rax_RegN()
 5372 %{
 5373   constraint(ALLOC_IN_RC(int_rax_reg));
 5374   match(RegN);
 5375   match(rRegN);
 5376 
 5377   format %{ %}
 5378   interface(REG_INTER);
 5379 %}
 5380 
 5381 // Used in AtomicAdd
 5382 operand rbx_RegP()
 5383 %{
 5384   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5385   match(RegP);
 5386   match(rRegP);
 5387 
 5388   format %{ %}
 5389   interface(REG_INTER);
 5390 %}
 5391 
 5392 operand rsi_RegP()
 5393 %{
 5394   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5395   match(RegP);
 5396   match(rRegP);
 5397 
 5398   format %{ %}
 5399   interface(REG_INTER);
 5400 %}
 5401 
 5402 operand rbp_RegP()
 5403 %{
 5404   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5405   match(RegP);
 5406   match(rRegP);
 5407 
 5408   format %{ %}
 5409   interface(REG_INTER);
 5410 %}
 5411 
 5412 // Used in rep stosq
 5413 operand rdi_RegP()
 5414 %{
 5415   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5416   match(RegP);
 5417   match(rRegP);
 5418 
 5419   format %{ %}
 5420   interface(REG_INTER);
 5421 %}
 5422 
 5423 operand r15_RegP()
 5424 %{
 5425   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5426   match(RegP);
 5427   match(rRegP);
 5428 
 5429   format %{ %}
 5430   interface(REG_INTER);
 5431 %}
 5432 
 5433 operand rRegL()
 5434 %{
 5435   constraint(ALLOC_IN_RC(long_reg));
 5436   match(RegL);
 5437   match(rax_RegL);
 5438   match(rdx_RegL);
 5439 
 5440   format %{ %}
 5441   interface(REG_INTER);
 5442 %}
 5443 
 5444 // Special Registers
 5445 operand no_rax_rdx_RegL()
 5446 %{
 5447   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5448   match(RegL);
 5449   match(rRegL);
 5450 
 5451   format %{ %}
 5452   interface(REG_INTER);
 5453 %}
 5454 
 5455 operand rax_RegL()
 5456 %{
 5457   constraint(ALLOC_IN_RC(long_rax_reg));
 5458   match(RegL);
 5459   match(rRegL);
 5460 
 5461   format %{ "RAX" %}
 5462   interface(REG_INTER);
 5463 %}
 5464 
 5465 operand rcx_RegL()
 5466 %{
 5467   constraint(ALLOC_IN_RC(long_rcx_reg));
 5468   match(RegL);
 5469   match(rRegL);
 5470 
 5471   format %{ %}
 5472   interface(REG_INTER);
 5473 %}
 5474 
 5475 operand rdx_RegL()
 5476 %{
 5477   constraint(ALLOC_IN_RC(long_rdx_reg));
 5478   match(RegL);
 5479   match(rRegL);
 5480 
 5481   format %{ %}
 5482   interface(REG_INTER);
 5483 %}
 5484 
 5485 operand r11_RegL()
 5486 %{
 5487   constraint(ALLOC_IN_RC(long_r11_reg));
 5488   match(RegL);
 5489   match(rRegL);
 5490 
 5491   format %{ %}
 5492   interface(REG_INTER);
 5493 %}
 5494 
 5495 operand no_rbp_r13_RegL()
 5496 %{
 5497   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5498   match(RegL);
 5499   match(rRegL);
 5500   match(rax_RegL);
 5501   match(rcx_RegL);
 5502   match(rdx_RegL);
 5503 
 5504   format %{ %}
 5505   interface(REG_INTER);
 5506 %}
 5507 
 5508 // Flags register, used as output of compare instructions
 5509 operand rFlagsReg()
 5510 %{
 5511   constraint(ALLOC_IN_RC(int_flags));
 5512   match(RegFlags);
 5513 
 5514   format %{ "RFLAGS" %}
 5515   interface(REG_INTER);
 5516 %}
 5517 
 5518 // Flags register, used as output of FLOATING POINT compare instructions
 5519 operand rFlagsRegU()
 5520 %{
 5521   constraint(ALLOC_IN_RC(int_flags));
 5522   match(RegFlags);
 5523 
 5524   format %{ "RFLAGS_U" %}
 5525   interface(REG_INTER);
 5526 %}
 5527 
 5528 operand rFlagsRegUCF() %{
 5529   constraint(ALLOC_IN_RC(int_flags));
 5530   match(RegFlags);
 5531   predicate(false);
 5532 
 5533   format %{ "RFLAGS_U_CF" %}
 5534   interface(REG_INTER);
 5535 %}
 5536 
 5537 // Float register operands
 5538 operand regF() %{
 5539    constraint(ALLOC_IN_RC(float_reg));
 5540    match(RegF);
 5541 
 5542    format %{ %}
 5543    interface(REG_INTER);
 5544 %}
 5545 
 5546 // Float register operands
 5547 operand legRegF() %{
 5548    constraint(ALLOC_IN_RC(float_reg_legacy));
 5549    match(RegF);
 5550 
 5551    format %{ %}
 5552    interface(REG_INTER);
 5553 %}
 5554 
 5555 // Float register operands
 5556 operand vlRegF() %{
 5557    constraint(ALLOC_IN_RC(float_reg_vl));
 5558    match(RegF);
 5559 
 5560    format %{ %}
 5561    interface(REG_INTER);
 5562 %}
 5563 
 5564 // Double register operands
 5565 operand regD() %{
 5566    constraint(ALLOC_IN_RC(double_reg));
 5567    match(RegD);
 5568 
 5569    format %{ %}
 5570    interface(REG_INTER);
 5571 %}
 5572 
 5573 // Double register operands
 5574 operand legRegD() %{
 5575    constraint(ALLOC_IN_RC(double_reg_legacy));
 5576    match(RegD);
 5577 
 5578    format %{ %}
 5579    interface(REG_INTER);
 5580 %}
 5581 
 5582 // Double register operands
 5583 operand vlRegD() %{
 5584    constraint(ALLOC_IN_RC(double_reg_vl));
 5585    match(RegD);
 5586 
 5587    format %{ %}
 5588    interface(REG_INTER);
 5589 %}
 5590 
 5591 //----------Memory Operands----------------------------------------------------
 5592 // Direct Memory Operand
 5593 // operand direct(immP addr)
 5594 // %{
 5595 //   match(addr);
 5596 
 5597 //   format %{ "[$addr]" %}
 5598 //   interface(MEMORY_INTER) %{
 5599 //     base(0xFFFFFFFF);
 5600 //     index(0x4);
 5601 //     scale(0x0);
 5602 //     disp($addr);
 5603 //   %}
 5604 // %}
 5605 
 5606 // Indirect Memory Operand
 5607 operand indirect(any_RegP reg)
 5608 %{
 5609   constraint(ALLOC_IN_RC(ptr_reg));
 5610   match(reg);
 5611 
 5612   format %{ "[$reg]" %}
 5613   interface(MEMORY_INTER) %{
 5614     base($reg);
 5615     index(0x4);
 5616     scale(0x0);
 5617     disp(0x0);
 5618   %}
 5619 %}
 5620 
 5621 // Indirect Memory Plus Short Offset Operand
 5622 operand indOffset8(any_RegP reg, immL8 off)
 5623 %{
 5624   constraint(ALLOC_IN_RC(ptr_reg));
 5625   match(AddP reg off);
 5626 
 5627   format %{ "[$reg + $off (8-bit)]" %}
 5628   interface(MEMORY_INTER) %{
 5629     base($reg);
 5630     index(0x4);
 5631     scale(0x0);
 5632     disp($off);
 5633   %}
 5634 %}
 5635 
 5636 // Indirect Memory Plus Long Offset Operand
 5637 operand indOffset32(any_RegP reg, immL32 off)
 5638 %{
 5639   constraint(ALLOC_IN_RC(ptr_reg));
 5640   match(AddP reg off);
 5641 
 5642   format %{ "[$reg + $off (32-bit)]" %}
 5643   interface(MEMORY_INTER) %{
 5644     base($reg);
 5645     index(0x4);
 5646     scale(0x0);
 5647     disp($off);
 5648   %}
 5649 %}
 5650 
 5651 // Indirect Memory Plus Index Register Plus Offset Operand
 5652 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5653 %{
 5654   constraint(ALLOC_IN_RC(ptr_reg));
 5655   match(AddP (AddP reg lreg) off);
 5656 
 5657   op_cost(10);
 5658   format %{"[$reg + $off + $lreg]" %}
 5659   interface(MEMORY_INTER) %{
 5660     base($reg);
 5661     index($lreg);
 5662     scale(0x0);
 5663     disp($off);
 5664   %}
 5665 %}
 5666 
 5667 // Indirect Memory Plus Index Register Plus Offset Operand
 5668 operand indIndex(any_RegP reg, rRegL lreg)
 5669 %{
 5670   constraint(ALLOC_IN_RC(ptr_reg));
 5671   match(AddP reg lreg);
 5672 
 5673   op_cost(10);
 5674   format %{"[$reg + $lreg]" %}
 5675   interface(MEMORY_INTER) %{
 5676     base($reg);
 5677     index($lreg);
 5678     scale(0x0);
 5679     disp(0x0);
 5680   %}
 5681 %}
 5682 
 5683 // Indirect Memory Times Scale Plus Index Register
 5684 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5685 %{
 5686   constraint(ALLOC_IN_RC(ptr_reg));
 5687   match(AddP reg (LShiftL lreg scale));
 5688 
 5689   op_cost(10);
 5690   format %{"[$reg + $lreg << $scale]" %}
 5691   interface(MEMORY_INTER) %{
 5692     base($reg);
 5693     index($lreg);
 5694     scale($scale);
 5695     disp(0x0);
 5696   %}
 5697 %}
 5698 
 5699 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5700 %{
 5701   constraint(ALLOC_IN_RC(ptr_reg));
 5702   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5703   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5704 
 5705   op_cost(10);
 5706   format %{"[$reg + pos $idx << $scale]" %}
 5707   interface(MEMORY_INTER) %{
 5708     base($reg);
 5709     index($idx);
 5710     scale($scale);
 5711     disp(0x0);
 5712   %}
 5713 %}
 5714 
 5715 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5716 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5717 %{
 5718   constraint(ALLOC_IN_RC(ptr_reg));
 5719   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5720 
 5721   op_cost(10);
 5722   format %{"[$reg + $off + $lreg << $scale]" %}
 5723   interface(MEMORY_INTER) %{
 5724     base($reg);
 5725     index($lreg);
 5726     scale($scale);
 5727     disp($off);
 5728   %}
 5729 %}
 5730 
 5731 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5732 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5733 %{
 5734   constraint(ALLOC_IN_RC(ptr_reg));
 5735   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5736   match(AddP (AddP reg (ConvI2L idx)) off);
 5737 
 5738   op_cost(10);
 5739   format %{"[$reg + $off + $idx]" %}
 5740   interface(MEMORY_INTER) %{
 5741     base($reg);
 5742     index($idx);
 5743     scale(0x0);
 5744     disp($off);
 5745   %}
 5746 %}
 5747 
 5748 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5749 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5750 %{
 5751   constraint(ALLOC_IN_RC(ptr_reg));
 5752   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5753   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5754 
 5755   op_cost(10);
 5756   format %{"[$reg + $off + $idx << $scale]" %}
 5757   interface(MEMORY_INTER) %{
 5758     base($reg);
 5759     index($idx);
 5760     scale($scale);
 5761     disp($off);
 5762   %}
 5763 %}
 5764 
 5765 // Indirect Narrow Oop Plus Offset Operand
 5766 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5767 // we can't free r12 even with CompressedOops::base() == nullptr.
 5768 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5769   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5770   constraint(ALLOC_IN_RC(ptr_reg));
 5771   match(AddP (DecodeN reg) off);
 5772 
 5773   op_cost(10);
 5774   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5775   interface(MEMORY_INTER) %{
 5776     base(0xc); // R12
 5777     index($reg);
 5778     scale(0x3);
 5779     disp($off);
 5780   %}
 5781 %}
 5782 
 5783 // Indirect Memory Operand
 5784 operand indirectNarrow(rRegN reg)
 5785 %{
 5786   predicate(CompressedOops::shift() == 0);
 5787   constraint(ALLOC_IN_RC(ptr_reg));
 5788   match(DecodeN reg);
 5789 
 5790   format %{ "[$reg]" %}
 5791   interface(MEMORY_INTER) %{
 5792     base($reg);
 5793     index(0x4);
 5794     scale(0x0);
 5795     disp(0x0);
 5796   %}
 5797 %}
 5798 
 5799 // Indirect Memory Plus Short Offset Operand
 5800 operand indOffset8Narrow(rRegN reg, immL8 off)
 5801 %{
 5802   predicate(CompressedOops::shift() == 0);
 5803   constraint(ALLOC_IN_RC(ptr_reg));
 5804   match(AddP (DecodeN reg) off);
 5805 
 5806   format %{ "[$reg + $off (8-bit)]" %}
 5807   interface(MEMORY_INTER) %{
 5808     base($reg);
 5809     index(0x4);
 5810     scale(0x0);
 5811     disp($off);
 5812   %}
 5813 %}
 5814 
 5815 // Indirect Memory Plus Long Offset Operand
 5816 operand indOffset32Narrow(rRegN reg, immL32 off)
 5817 %{
 5818   predicate(CompressedOops::shift() == 0);
 5819   constraint(ALLOC_IN_RC(ptr_reg));
 5820   match(AddP (DecodeN reg) off);
 5821 
 5822   format %{ "[$reg + $off (32-bit)]" %}
 5823   interface(MEMORY_INTER) %{
 5824     base($reg);
 5825     index(0x4);
 5826     scale(0x0);
 5827     disp($off);
 5828   %}
 5829 %}
 5830 
 5831 // Indirect Memory Plus Index Register Plus Offset Operand
 5832 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5833 %{
 5834   predicate(CompressedOops::shift() == 0);
 5835   constraint(ALLOC_IN_RC(ptr_reg));
 5836   match(AddP (AddP (DecodeN reg) lreg) off);
 5837 
 5838   op_cost(10);
 5839   format %{"[$reg + $off + $lreg]" %}
 5840   interface(MEMORY_INTER) %{
 5841     base($reg);
 5842     index($lreg);
 5843     scale(0x0);
 5844     disp($off);
 5845   %}
 5846 %}
 5847 
 5848 // Indirect Memory Plus Index Register Plus Offset Operand
 5849 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5850 %{
 5851   predicate(CompressedOops::shift() == 0);
 5852   constraint(ALLOC_IN_RC(ptr_reg));
 5853   match(AddP (DecodeN reg) lreg);
 5854 
 5855   op_cost(10);
 5856   format %{"[$reg + $lreg]" %}
 5857   interface(MEMORY_INTER) %{
 5858     base($reg);
 5859     index($lreg);
 5860     scale(0x0);
 5861     disp(0x0);
 5862   %}
 5863 %}
 5864 
 5865 // Indirect Memory Times Scale Plus Index Register
 5866 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5867 %{
 5868   predicate(CompressedOops::shift() == 0);
 5869   constraint(ALLOC_IN_RC(ptr_reg));
 5870   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5871 
 5872   op_cost(10);
 5873   format %{"[$reg + $lreg << $scale]" %}
 5874   interface(MEMORY_INTER) %{
 5875     base($reg);
 5876     index($lreg);
 5877     scale($scale);
 5878     disp(0x0);
 5879   %}
 5880 %}
 5881 
 5882 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5883 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5884 %{
 5885   predicate(CompressedOops::shift() == 0);
 5886   constraint(ALLOC_IN_RC(ptr_reg));
 5887   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5888 
 5889   op_cost(10);
 5890   format %{"[$reg + $off + $lreg << $scale]" %}
 5891   interface(MEMORY_INTER) %{
 5892     base($reg);
 5893     index($lreg);
 5894     scale($scale);
 5895     disp($off);
 5896   %}
 5897 %}
 5898 
 5899 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5900 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5901 %{
 5902   constraint(ALLOC_IN_RC(ptr_reg));
 5903   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5904   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5905 
 5906   op_cost(10);
 5907   format %{"[$reg + $off + $idx]" %}
 5908   interface(MEMORY_INTER) %{
 5909     base($reg);
 5910     index($idx);
 5911     scale(0x0);
 5912     disp($off);
 5913   %}
 5914 %}
 5915 
 5916 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5917 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 5918 %{
 5919   constraint(ALLOC_IN_RC(ptr_reg));
 5920   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5921   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 5922 
 5923   op_cost(10);
 5924   format %{"[$reg + $off + $idx << $scale]" %}
 5925   interface(MEMORY_INTER) %{
 5926     base($reg);
 5927     index($idx);
 5928     scale($scale);
 5929     disp($off);
 5930   %}
 5931 %}
 5932 
 5933 //----------Special Memory Operands--------------------------------------------
 5934 // Stack Slot Operand - This operand is used for loading and storing temporary
 5935 //                      values on the stack where a match requires a value to
 5936 //                      flow through memory.
 5937 operand stackSlotP(sRegP reg)
 5938 %{
 5939   constraint(ALLOC_IN_RC(stack_slots));
 5940   // No match rule because this operand is only generated in matching
 5941 
 5942   format %{ "[$reg]" %}
 5943   interface(MEMORY_INTER) %{
 5944     base(0x4);   // RSP
 5945     index(0x4);  // No Index
 5946     scale(0x0);  // No Scale
 5947     disp($reg);  // Stack Offset
 5948   %}
 5949 %}
 5950 
 5951 operand stackSlotI(sRegI reg)
 5952 %{
 5953   constraint(ALLOC_IN_RC(stack_slots));
 5954   // No match rule because this operand is only generated in matching
 5955 
 5956   format %{ "[$reg]" %}
 5957   interface(MEMORY_INTER) %{
 5958     base(0x4);   // RSP
 5959     index(0x4);  // No Index
 5960     scale(0x0);  // No Scale
 5961     disp($reg);  // Stack Offset
 5962   %}
 5963 %}
 5964 
 5965 operand stackSlotF(sRegF reg)
 5966 %{
 5967   constraint(ALLOC_IN_RC(stack_slots));
 5968   // No match rule because this operand is only generated in matching
 5969 
 5970   format %{ "[$reg]" %}
 5971   interface(MEMORY_INTER) %{
 5972     base(0x4);   // RSP
 5973     index(0x4);  // No Index
 5974     scale(0x0);  // No Scale
 5975     disp($reg);  // Stack Offset
 5976   %}
 5977 %}
 5978 
 5979 operand stackSlotD(sRegD reg)
 5980 %{
 5981   constraint(ALLOC_IN_RC(stack_slots));
 5982   // No match rule because this operand is only generated in matching
 5983 
 5984   format %{ "[$reg]" %}
 5985   interface(MEMORY_INTER) %{
 5986     base(0x4);   // RSP
 5987     index(0x4);  // No Index
 5988     scale(0x0);  // No Scale
 5989     disp($reg);  // Stack Offset
 5990   %}
 5991 %}
 5992 operand stackSlotL(sRegL reg)
 5993 %{
 5994   constraint(ALLOC_IN_RC(stack_slots));
 5995   // No match rule because this operand is only generated in matching
 5996 
 5997   format %{ "[$reg]" %}
 5998   interface(MEMORY_INTER) %{
 5999     base(0x4);   // RSP
 6000     index(0x4);  // No Index
 6001     scale(0x0);  // No Scale
 6002     disp($reg);  // Stack Offset
 6003   %}
 6004 %}
 6005 
 6006 //----------Conditional Branch Operands----------------------------------------
 6007 // Comparison Op  - This is the operation of the comparison, and is limited to
 6008 //                  the following set of codes:
 6009 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 6010 //
 6011 // Other attributes of the comparison, such as unsignedness, are specified
 6012 // by the comparison instruction that sets a condition code flags register.
 6013 // That result is represented by a flags operand whose subtype is appropriate
 6014 // to the unsignedness (etc.) of the comparison.
 6015 //
 6016 // Later, the instruction which matches both the Comparison Op (a Bool) and
 6017 // the flags (produced by the Cmp) specifies the coding of the comparison op
 6018 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 6019 
 6020 // Comparison Code
 6021 operand cmpOp()
 6022 %{
 6023   match(Bool);
 6024 
 6025   format %{ "" %}
 6026   interface(COND_INTER) %{
 6027     equal(0x4, "e");
 6028     not_equal(0x5, "ne");
 6029     less(0xC, "l");
 6030     greater_equal(0xD, "ge");
 6031     less_equal(0xE, "le");
 6032     greater(0xF, "g");
 6033     overflow(0x0, "o");
 6034     no_overflow(0x1, "no");
 6035   %}
 6036 %}
 6037 
 6038 // Comparison Code, unsigned compare.  Used by FP also, with
 6039 // C2 (unordered) turned into GT or LT already.  The other bits
 6040 // C0 and C3 are turned into Carry & Zero flags.
 6041 operand cmpOpU()
 6042 %{
 6043   match(Bool);
 6044 
 6045   format %{ "" %}
 6046   interface(COND_INTER) %{
 6047     equal(0x4, "e");
 6048     not_equal(0x5, "ne");
 6049     less(0x2, "b");
 6050     greater_equal(0x3, "ae");
 6051     less_equal(0x6, "be");
 6052     greater(0x7, "a");
 6053     overflow(0x0, "o");
 6054     no_overflow(0x1, "no");
 6055   %}
 6056 %}
 6057 
 6058 
 6059 // Floating comparisons that don't require any fixup for the unordered case,
 6060 // If both inputs of the comparison are the same, ZF is always set so we
 6061 // don't need to use cmpOpUCF2 for eq/ne
 6062 operand cmpOpUCF() %{
 6063   match(Bool);
 6064   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 6065             n->as_Bool()->_test._test == BoolTest::ge ||
 6066             n->as_Bool()->_test._test == BoolTest::le ||
 6067             n->as_Bool()->_test._test == BoolTest::gt ||
 6068             n->in(1)->in(1) == n->in(1)->in(2));
 6069   format %{ "" %}
 6070   interface(COND_INTER) %{
 6071     equal(0xb, "np");
 6072     not_equal(0xa, "p");
 6073     less(0x2, "b");
 6074     greater_equal(0x3, "ae");
 6075     less_equal(0x6, "be");
 6076     greater(0x7, "a");
 6077     overflow(0x0, "o");
 6078     no_overflow(0x1, "no");
 6079   %}
 6080 %}
 6081 
 6082 
 6083 // Floating comparisons that can be fixed up with extra conditional jumps
 6084 operand cmpOpUCF2() %{
 6085   match(Bool);
 6086   predicate((n->as_Bool()->_test._test == BoolTest::ne ||
 6087              n->as_Bool()->_test._test == BoolTest::eq) &&
 6088             n->in(1)->in(1) != n->in(1)->in(2));
 6089   format %{ "" %}
 6090   interface(COND_INTER) %{
 6091     equal(0x4, "e");
 6092     not_equal(0x5, "ne");
 6093     less(0x2, "b");
 6094     greater_equal(0x3, "ae");
 6095     less_equal(0x6, "be");
 6096     greater(0x7, "a");
 6097     overflow(0x0, "o");
 6098     no_overflow(0x1, "no");
 6099   %}
 6100 %}
 6101 
 6102 // Operands for bound floating pointer register arguments
 6103 operand rxmm0() %{
 6104   constraint(ALLOC_IN_RC(xmm0_reg));
 6105   match(VecX);
 6106   format%{%}
 6107   interface(REG_INTER);
 6108 %}
 6109 
 6110 // Vectors
 6111 
 6112 // Dummy generic vector class. Should be used for all vector operands.
 6113 // Replaced with vec[SDXYZ] during post-selection pass.
 6114 operand vec() %{
 6115   constraint(ALLOC_IN_RC(dynamic));
 6116   match(VecX);
 6117   match(VecY);
 6118   match(VecZ);
 6119   match(VecS);
 6120   match(VecD);
 6121 
 6122   format %{ %}
 6123   interface(REG_INTER);
 6124 %}
 6125 
 6126 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6127 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6128 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6129 // runtime code generation via reg_class_dynamic.
 6130 operand legVec() %{
 6131   constraint(ALLOC_IN_RC(dynamic));
 6132   match(VecX);
 6133   match(VecY);
 6134   match(VecZ);
 6135   match(VecS);
 6136   match(VecD);
 6137 
 6138   format %{ %}
 6139   interface(REG_INTER);
 6140 %}
 6141 
 6142 // Replaces vec during post-selection cleanup. See above.
 6143 operand vecS() %{
 6144   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6145   match(VecS);
 6146 
 6147   format %{ %}
 6148   interface(REG_INTER);
 6149 %}
 6150 
 6151 // Replaces legVec during post-selection cleanup. See above.
 6152 operand legVecS() %{
 6153   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6154   match(VecS);
 6155 
 6156   format %{ %}
 6157   interface(REG_INTER);
 6158 %}
 6159 
 6160 // Replaces vec during post-selection cleanup. See above.
 6161 operand vecD() %{
 6162   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6163   match(VecD);
 6164 
 6165   format %{ %}
 6166   interface(REG_INTER);
 6167 %}
 6168 
 6169 // Replaces legVec during post-selection cleanup. See above.
 6170 operand legVecD() %{
 6171   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6172   match(VecD);
 6173 
 6174   format %{ %}
 6175   interface(REG_INTER);
 6176 %}
 6177 
 6178 // Replaces vec during post-selection cleanup. See above.
 6179 operand vecX() %{
 6180   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6181   match(VecX);
 6182 
 6183   format %{ %}
 6184   interface(REG_INTER);
 6185 %}
 6186 
 6187 // Replaces legVec during post-selection cleanup. See above.
 6188 operand legVecX() %{
 6189   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6190   match(VecX);
 6191 
 6192   format %{ %}
 6193   interface(REG_INTER);
 6194 %}
 6195 
 6196 // Replaces vec during post-selection cleanup. See above.
 6197 operand vecY() %{
 6198   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6199   match(VecY);
 6200 
 6201   format %{ %}
 6202   interface(REG_INTER);
 6203 %}
 6204 
 6205 // Replaces legVec during post-selection cleanup. See above.
 6206 operand legVecY() %{
 6207   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6208   match(VecY);
 6209 
 6210   format %{ %}
 6211   interface(REG_INTER);
 6212 %}
 6213 
 6214 // Replaces vec during post-selection cleanup. See above.
 6215 operand vecZ() %{
 6216   constraint(ALLOC_IN_RC(vectorz_reg));
 6217   match(VecZ);
 6218 
 6219   format %{ %}
 6220   interface(REG_INTER);
 6221 %}
 6222 
 6223 // Replaces legVec during post-selection cleanup. See above.
 6224 operand legVecZ() %{
 6225   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6226   match(VecZ);
 6227 
 6228   format %{ %}
 6229   interface(REG_INTER);
 6230 %}
 6231 
 6232 //----------OPERAND CLASSES----------------------------------------------------
 6233 // Operand Classes are groups of operands that are used as to simplify
 6234 // instruction definitions by not requiring the AD writer to specify separate
 6235 // instructions for every form of operand when the instruction accepts
 6236 // multiple operand types with the same basic encoding and format.  The classic
 6237 // case of this is memory operands.
 6238 
 6239 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6240                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6241                indCompressedOopOffset,
 6242                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6243                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6244                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6245 
 6246 //----------PIPELINE-----------------------------------------------------------
 6247 // Rules which define the behavior of the target architectures pipeline.
 6248 pipeline %{
 6249 
 6250 //----------ATTRIBUTES---------------------------------------------------------
 6251 attributes %{
 6252   variable_size_instructions;        // Fixed size instructions
 6253   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6254   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6255   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6256   instruction_fetch_units = 1;       // of 16 bytes
 6257 %}
 6258 
 6259 //----------RESOURCES----------------------------------------------------------
 6260 // Resources are the functional units available to the machine
 6261 
 6262 // Generic P2/P3 pipeline
 6263 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6264 // 3 instructions decoded per cycle.
 6265 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6266 // 3 ALU op, only ALU0 handles mul instructions.
 6267 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6268            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6269            BR, FPU,
 6270            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6271 
 6272 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6273 // Pipeline Description specifies the stages in the machine's pipeline
 6274 
 6275 // Generic P2/P3 pipeline
 6276 pipe_desc(S0, S1, S2, S3, S4, S5);
 6277 
 6278 //----------PIPELINE CLASSES---------------------------------------------------
 6279 // Pipeline Classes describe the stages in which input and output are
 6280 // referenced by the hardware pipeline.
 6281 
 6282 // Naming convention: ialu or fpu
 6283 // Then: _reg
 6284 // Then: _reg if there is a 2nd register
 6285 // Then: _long if it's a pair of instructions implementing a long
 6286 // Then: _fat if it requires the big decoder
 6287 //   Or: _mem if it requires the big decoder and a memory unit.
 6288 
 6289 // Integer ALU reg operation
 6290 pipe_class ialu_reg(rRegI dst)
 6291 %{
 6292     single_instruction;
 6293     dst    : S4(write);
 6294     dst    : S3(read);
 6295     DECODE : S0;        // any decoder
 6296     ALU    : S3;        // any alu
 6297 %}
 6298 
 6299 // Long ALU reg operation
 6300 pipe_class ialu_reg_long(rRegL dst)
 6301 %{
 6302     instruction_count(2);
 6303     dst    : S4(write);
 6304     dst    : S3(read);
 6305     DECODE : S0(2);     // any 2 decoders
 6306     ALU    : S3(2);     // both alus
 6307 %}
 6308 
 6309 // Integer ALU reg operation using big decoder
 6310 pipe_class ialu_reg_fat(rRegI dst)
 6311 %{
 6312     single_instruction;
 6313     dst    : S4(write);
 6314     dst    : S3(read);
 6315     D0     : S0;        // big decoder only
 6316     ALU    : S3;        // any alu
 6317 %}
 6318 
 6319 // Integer ALU reg-reg operation
 6320 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6321 %{
 6322     single_instruction;
 6323     dst    : S4(write);
 6324     src    : S3(read);
 6325     DECODE : S0;        // any decoder
 6326     ALU    : S3;        // any alu
 6327 %}
 6328 
 6329 // Integer ALU reg-reg operation
 6330 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6331 %{
 6332     single_instruction;
 6333     dst    : S4(write);
 6334     src    : S3(read);
 6335     D0     : S0;        // big decoder only
 6336     ALU    : S3;        // any alu
 6337 %}
 6338 
 6339 // Integer ALU reg-mem operation
 6340 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6341 %{
 6342     single_instruction;
 6343     dst    : S5(write);
 6344     mem    : S3(read);
 6345     D0     : S0;        // big decoder only
 6346     ALU    : S4;        // any alu
 6347     MEM    : S3;        // any mem
 6348 %}
 6349 
 6350 // Integer mem operation (prefetch)
 6351 pipe_class ialu_mem(memory mem)
 6352 %{
 6353     single_instruction;
 6354     mem    : S3(read);
 6355     D0     : S0;        // big decoder only
 6356     MEM    : S3;        // any mem
 6357 %}
 6358 
 6359 // Integer Store to Memory
 6360 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6361 %{
 6362     single_instruction;
 6363     mem    : S3(read);
 6364     src    : S5(read);
 6365     D0     : S0;        // big decoder only
 6366     ALU    : S4;        // any alu
 6367     MEM    : S3;
 6368 %}
 6369 
 6370 // // Long Store to Memory
 6371 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6372 // %{
 6373 //     instruction_count(2);
 6374 //     mem    : S3(read);
 6375 //     src    : S5(read);
 6376 //     D0     : S0(2);          // big decoder only; twice
 6377 //     ALU    : S4(2);     // any 2 alus
 6378 //     MEM    : S3(2);  // Both mems
 6379 // %}
 6380 
 6381 // Integer Store to Memory
 6382 pipe_class ialu_mem_imm(memory mem)
 6383 %{
 6384     single_instruction;
 6385     mem    : S3(read);
 6386     D0     : S0;        // big decoder only
 6387     ALU    : S4;        // any alu
 6388     MEM    : S3;
 6389 %}
 6390 
 6391 // Integer ALU0 reg-reg operation
 6392 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6393 %{
 6394     single_instruction;
 6395     dst    : S4(write);
 6396     src    : S3(read);
 6397     D0     : S0;        // Big decoder only
 6398     ALU0   : S3;        // only alu0
 6399 %}
 6400 
 6401 // Integer ALU0 reg-mem operation
 6402 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6403 %{
 6404     single_instruction;
 6405     dst    : S5(write);
 6406     mem    : S3(read);
 6407     D0     : S0;        // big decoder only
 6408     ALU0   : S4;        // ALU0 only
 6409     MEM    : S3;        // any mem
 6410 %}
 6411 
 6412 // Integer ALU reg-reg operation
 6413 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6414 %{
 6415     single_instruction;
 6416     cr     : S4(write);
 6417     src1   : S3(read);
 6418     src2   : S3(read);
 6419     DECODE : S0;        // any decoder
 6420     ALU    : S3;        // any alu
 6421 %}
 6422 
 6423 // Integer ALU reg-imm operation
 6424 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6425 %{
 6426     single_instruction;
 6427     cr     : S4(write);
 6428     src1   : S3(read);
 6429     DECODE : S0;        // any decoder
 6430     ALU    : S3;        // any alu
 6431 %}
 6432 
 6433 // Integer ALU reg-mem operation
 6434 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6435 %{
 6436     single_instruction;
 6437     cr     : S4(write);
 6438     src1   : S3(read);
 6439     src2   : S3(read);
 6440     D0     : S0;        // big decoder only
 6441     ALU    : S4;        // any alu
 6442     MEM    : S3;
 6443 %}
 6444 
 6445 // Conditional move reg-reg
 6446 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6447 %{
 6448     instruction_count(4);
 6449     y      : S4(read);
 6450     q      : S3(read);
 6451     p      : S3(read);
 6452     DECODE : S0(4);     // any decoder
 6453 %}
 6454 
 6455 // Conditional move reg-reg
 6456 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6457 %{
 6458     single_instruction;
 6459     dst    : S4(write);
 6460     src    : S3(read);
 6461     cr     : S3(read);
 6462     DECODE : S0;        // any decoder
 6463 %}
 6464 
 6465 // Conditional move reg-mem
 6466 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6467 %{
 6468     single_instruction;
 6469     dst    : S4(write);
 6470     src    : S3(read);
 6471     cr     : S3(read);
 6472     DECODE : S0;        // any decoder
 6473     MEM    : S3;
 6474 %}
 6475 
 6476 // Conditional move reg-reg long
 6477 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6478 %{
 6479     single_instruction;
 6480     dst    : S4(write);
 6481     src    : S3(read);
 6482     cr     : S3(read);
 6483     DECODE : S0(2);     // any 2 decoders
 6484 %}
 6485 
 6486 // Float reg-reg operation
 6487 pipe_class fpu_reg(regD dst)
 6488 %{
 6489     instruction_count(2);
 6490     dst    : S3(read);
 6491     DECODE : S0(2);     // any 2 decoders
 6492     FPU    : S3;
 6493 %}
 6494 
 6495 // Float reg-reg operation
 6496 pipe_class fpu_reg_reg(regD dst, regD src)
 6497 %{
 6498     instruction_count(2);
 6499     dst    : S4(write);
 6500     src    : S3(read);
 6501     DECODE : S0(2);     // any 2 decoders
 6502     FPU    : S3;
 6503 %}
 6504 
 6505 // Float reg-reg operation
 6506 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6507 %{
 6508     instruction_count(3);
 6509     dst    : S4(write);
 6510     src1   : S3(read);
 6511     src2   : S3(read);
 6512     DECODE : S0(3);     // any 3 decoders
 6513     FPU    : S3(2);
 6514 %}
 6515 
 6516 // Float reg-reg operation
 6517 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6518 %{
 6519     instruction_count(4);
 6520     dst    : S4(write);
 6521     src1   : S3(read);
 6522     src2   : S3(read);
 6523     src3   : S3(read);
 6524     DECODE : S0(4);     // any 3 decoders
 6525     FPU    : S3(2);
 6526 %}
 6527 
 6528 // Float reg-reg operation
 6529 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6530 %{
 6531     instruction_count(4);
 6532     dst    : S4(write);
 6533     src1   : S3(read);
 6534     src2   : S3(read);
 6535     src3   : S3(read);
 6536     DECODE : S1(3);     // any 3 decoders
 6537     D0     : S0;        // Big decoder only
 6538     FPU    : S3(2);
 6539     MEM    : S3;
 6540 %}
 6541 
 6542 // Float reg-mem operation
 6543 pipe_class fpu_reg_mem(regD dst, memory mem)
 6544 %{
 6545     instruction_count(2);
 6546     dst    : S5(write);
 6547     mem    : S3(read);
 6548     D0     : S0;        // big decoder only
 6549     DECODE : S1;        // any decoder for FPU POP
 6550     FPU    : S4;
 6551     MEM    : S3;        // any mem
 6552 %}
 6553 
 6554 // Float reg-mem operation
 6555 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6556 %{
 6557     instruction_count(3);
 6558     dst    : S5(write);
 6559     src1   : S3(read);
 6560     mem    : S3(read);
 6561     D0     : S0;        // big decoder only
 6562     DECODE : S1(2);     // any decoder for FPU POP
 6563     FPU    : S4;
 6564     MEM    : S3;        // any mem
 6565 %}
 6566 
 6567 // Float mem-reg operation
 6568 pipe_class fpu_mem_reg(memory mem, regD src)
 6569 %{
 6570     instruction_count(2);
 6571     src    : S5(read);
 6572     mem    : S3(read);
 6573     DECODE : S0;        // any decoder for FPU PUSH
 6574     D0     : S1;        // big decoder only
 6575     FPU    : S4;
 6576     MEM    : S3;        // any mem
 6577 %}
 6578 
 6579 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6580 %{
 6581     instruction_count(3);
 6582     src1   : S3(read);
 6583     src2   : S3(read);
 6584     mem    : S3(read);
 6585     DECODE : S0(2);     // any decoder for FPU PUSH
 6586     D0     : S1;        // big decoder only
 6587     FPU    : S4;
 6588     MEM    : S3;        // any mem
 6589 %}
 6590 
 6591 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6592 %{
 6593     instruction_count(3);
 6594     src1   : S3(read);
 6595     src2   : S3(read);
 6596     mem    : S4(read);
 6597     DECODE : S0;        // any decoder for FPU PUSH
 6598     D0     : S0(2);     // big decoder only
 6599     FPU    : S4;
 6600     MEM    : S3(2);     // any mem
 6601 %}
 6602 
 6603 pipe_class fpu_mem_mem(memory dst, memory src1)
 6604 %{
 6605     instruction_count(2);
 6606     src1   : S3(read);
 6607     dst    : S4(read);
 6608     D0     : S0(2);     // big decoder only
 6609     MEM    : S3(2);     // any mem
 6610 %}
 6611 
 6612 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6613 %{
 6614     instruction_count(3);
 6615     src1   : S3(read);
 6616     src2   : S3(read);
 6617     dst    : S4(read);
 6618     D0     : S0(3);     // big decoder only
 6619     FPU    : S4;
 6620     MEM    : S3(3);     // any mem
 6621 %}
 6622 
 6623 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6624 %{
 6625     instruction_count(3);
 6626     src1   : S4(read);
 6627     mem    : S4(read);
 6628     DECODE : S0;        // any decoder for FPU PUSH
 6629     D0     : S0(2);     // big decoder only
 6630     FPU    : S4;
 6631     MEM    : S3(2);     // any mem
 6632 %}
 6633 
 6634 // Float load constant
 6635 pipe_class fpu_reg_con(regD dst)
 6636 %{
 6637     instruction_count(2);
 6638     dst    : S5(write);
 6639     D0     : S0;        // big decoder only for the load
 6640     DECODE : S1;        // any decoder for FPU POP
 6641     FPU    : S4;
 6642     MEM    : S3;        // any mem
 6643 %}
 6644 
 6645 // Float load constant
 6646 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6647 %{
 6648     instruction_count(3);
 6649     dst    : S5(write);
 6650     src    : S3(read);
 6651     D0     : S0;        // big decoder only for the load
 6652     DECODE : S1(2);     // any decoder for FPU POP
 6653     FPU    : S4;
 6654     MEM    : S3;        // any mem
 6655 %}
 6656 
 6657 // UnConditional branch
 6658 pipe_class pipe_jmp(label labl)
 6659 %{
 6660     single_instruction;
 6661     BR   : S3;
 6662 %}
 6663 
 6664 // Conditional branch
 6665 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6666 %{
 6667     single_instruction;
 6668     cr    : S1(read);
 6669     BR    : S3;
 6670 %}
 6671 
 6672 // Allocation idiom
 6673 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6674 %{
 6675     instruction_count(1); force_serialization;
 6676     fixed_latency(6);
 6677     heap_ptr : S3(read);
 6678     DECODE   : S0(3);
 6679     D0       : S2;
 6680     MEM      : S3;
 6681     ALU      : S3(2);
 6682     dst      : S5(write);
 6683     BR       : S5;
 6684 %}
 6685 
 6686 // Generic big/slow expanded idiom
 6687 pipe_class pipe_slow()
 6688 %{
 6689     instruction_count(10); multiple_bundles; force_serialization;
 6690     fixed_latency(100);
 6691     D0  : S0(2);
 6692     MEM : S3(2);
 6693 %}
 6694 
 6695 // The real do-nothing guy
 6696 pipe_class empty()
 6697 %{
 6698     instruction_count(0);
 6699 %}
 6700 
 6701 // Define the class for the Nop node
 6702 define
 6703 %{
 6704    MachNop = empty;
 6705 %}
 6706 
 6707 %}
 6708 
 6709 //----------INSTRUCTIONS-------------------------------------------------------
 6710 //
 6711 // match      -- States which machine-independent subtree may be replaced
 6712 //               by this instruction.
 6713 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6714 //               selection to identify a minimum cost tree of machine
 6715 //               instructions that matches a tree of machine-independent
 6716 //               instructions.
 6717 // format     -- A string providing the disassembly for this instruction.
 6718 //               The value of an instruction's operand may be inserted
 6719 //               by referring to it with a '$' prefix.
 6720 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6721 //               to within an encode class as $primary, $secondary, and $tertiary
 6722 //               rrspectively.  The primary opcode is commonly used to
 6723 //               indicate the type of machine instruction, while secondary
 6724 //               and tertiary are often used for prefix options or addressing
 6725 //               modes.
 6726 // ins_encode -- A list of encode classes with parameters. The encode class
 6727 //               name must have been defined in an 'enc_class' specification
 6728 //               in the encode section of the architecture description.
 6729 
 6730 // ============================================================================
 6731 
 6732 instruct ShouldNotReachHere() %{
 6733   match(Halt);
 6734   format %{ "stop\t# ShouldNotReachHere" %}
 6735   ins_encode %{
 6736     if (is_reachable()) {
 6737       const char* str = __ code_string(_halt_reason);
 6738       __ stop(str);
 6739     }
 6740   %}
 6741   ins_pipe(pipe_slow);
 6742 %}
 6743 
 6744 // ============================================================================
 6745 
 6746 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6747 // Load Float
 6748 instruct MoveF2VL(vlRegF dst, regF src) %{
 6749   match(Set dst src);
 6750   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6751   ins_encode %{
 6752     ShouldNotReachHere();
 6753   %}
 6754   ins_pipe( fpu_reg_reg );
 6755 %}
 6756 
 6757 // Load Float
 6758 instruct MoveF2LEG(legRegF dst, regF src) %{
 6759   match(Set dst src);
 6760   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6761   ins_encode %{
 6762     ShouldNotReachHere();
 6763   %}
 6764   ins_pipe( fpu_reg_reg );
 6765 %}
 6766 
 6767 // Load Float
 6768 instruct MoveVL2F(regF dst, vlRegF src) %{
 6769   match(Set dst src);
 6770   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6771   ins_encode %{
 6772     ShouldNotReachHere();
 6773   %}
 6774   ins_pipe( fpu_reg_reg );
 6775 %}
 6776 
 6777 // Load Float
 6778 instruct MoveLEG2F(regF dst, legRegF src) %{
 6779   match(Set dst src);
 6780   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6781   ins_encode %{
 6782     ShouldNotReachHere();
 6783   %}
 6784   ins_pipe( fpu_reg_reg );
 6785 %}
 6786 
 6787 // Load Double
 6788 instruct MoveD2VL(vlRegD dst, regD src) %{
 6789   match(Set dst src);
 6790   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6791   ins_encode %{
 6792     ShouldNotReachHere();
 6793   %}
 6794   ins_pipe( fpu_reg_reg );
 6795 %}
 6796 
 6797 // Load Double
 6798 instruct MoveD2LEG(legRegD dst, regD src) %{
 6799   match(Set dst src);
 6800   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6801   ins_encode %{
 6802     ShouldNotReachHere();
 6803   %}
 6804   ins_pipe( fpu_reg_reg );
 6805 %}
 6806 
 6807 // Load Double
 6808 instruct MoveVL2D(regD dst, vlRegD src) %{
 6809   match(Set dst src);
 6810   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6811   ins_encode %{
 6812     ShouldNotReachHere();
 6813   %}
 6814   ins_pipe( fpu_reg_reg );
 6815 %}
 6816 
 6817 // Load Double
 6818 instruct MoveLEG2D(regD dst, legRegD src) %{
 6819   match(Set dst src);
 6820   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6821   ins_encode %{
 6822     ShouldNotReachHere();
 6823   %}
 6824   ins_pipe( fpu_reg_reg );
 6825 %}
 6826 
 6827 //----------Load/Store/Move Instructions---------------------------------------
 6828 //----------Load Instructions--------------------------------------------------
 6829 
 6830 // Load Byte (8 bit signed)
 6831 instruct loadB(rRegI dst, memory mem)
 6832 %{
 6833   match(Set dst (LoadB mem));
 6834 
 6835   ins_cost(125);
 6836   format %{ "movsbl  $dst, $mem\t# byte" %}
 6837 
 6838   ins_encode %{
 6839     __ movsbl($dst$$Register, $mem$$Address);
 6840   %}
 6841 
 6842   ins_pipe(ialu_reg_mem);
 6843 %}
 6844 
 6845 // Load Byte (8 bit signed) into Long Register
 6846 instruct loadB2L(rRegL dst, memory mem)
 6847 %{
 6848   match(Set dst (ConvI2L (LoadB mem)));
 6849 
 6850   ins_cost(125);
 6851   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6852 
 6853   ins_encode %{
 6854     __ movsbq($dst$$Register, $mem$$Address);
 6855   %}
 6856 
 6857   ins_pipe(ialu_reg_mem);
 6858 %}
 6859 
 6860 // Load Unsigned Byte (8 bit UNsigned)
 6861 instruct loadUB(rRegI dst, memory mem)
 6862 %{
 6863   match(Set dst (LoadUB mem));
 6864 
 6865   ins_cost(125);
 6866   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6867 
 6868   ins_encode %{
 6869     __ movzbl($dst$$Register, $mem$$Address);
 6870   %}
 6871 
 6872   ins_pipe(ialu_reg_mem);
 6873 %}
 6874 
 6875 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6876 instruct loadUB2L(rRegL dst, memory mem)
 6877 %{
 6878   match(Set dst (ConvI2L (LoadUB mem)));
 6879 
 6880   ins_cost(125);
 6881   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 6882 
 6883   ins_encode %{
 6884     __ movzbq($dst$$Register, $mem$$Address);
 6885   %}
 6886 
 6887   ins_pipe(ialu_reg_mem);
 6888 %}
 6889 
 6890 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 6891 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 6892   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 6893   effect(KILL cr);
 6894 
 6895   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 6896             "andl    $dst, right_n_bits($mask, 8)" %}
 6897   ins_encode %{
 6898     Register Rdst = $dst$$Register;
 6899     __ movzbq(Rdst, $mem$$Address);
 6900     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 6901   %}
 6902   ins_pipe(ialu_reg_mem);
 6903 %}
 6904 
 6905 // Load Short (16 bit signed)
 6906 instruct loadS(rRegI dst, memory mem)
 6907 %{
 6908   match(Set dst (LoadS mem));
 6909 
 6910   ins_cost(125);
 6911   format %{ "movswl $dst, $mem\t# short" %}
 6912 
 6913   ins_encode %{
 6914     __ movswl($dst$$Register, $mem$$Address);
 6915   %}
 6916 
 6917   ins_pipe(ialu_reg_mem);
 6918 %}
 6919 
 6920 // Load Short (16 bit signed) to Byte (8 bit signed)
 6921 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6922   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 6923 
 6924   ins_cost(125);
 6925   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 6926   ins_encode %{
 6927     __ movsbl($dst$$Register, $mem$$Address);
 6928   %}
 6929   ins_pipe(ialu_reg_mem);
 6930 %}
 6931 
 6932 // Load Short (16 bit signed) into Long Register
 6933 instruct loadS2L(rRegL dst, memory mem)
 6934 %{
 6935   match(Set dst (ConvI2L (LoadS mem)));
 6936 
 6937   ins_cost(125);
 6938   format %{ "movswq $dst, $mem\t# short -> long" %}
 6939 
 6940   ins_encode %{
 6941     __ movswq($dst$$Register, $mem$$Address);
 6942   %}
 6943 
 6944   ins_pipe(ialu_reg_mem);
 6945 %}
 6946 
 6947 // Load Unsigned Short/Char (16 bit UNsigned)
 6948 instruct loadUS(rRegI dst, memory mem)
 6949 %{
 6950   match(Set dst (LoadUS mem));
 6951 
 6952   ins_cost(125);
 6953   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 6954 
 6955   ins_encode %{
 6956     __ movzwl($dst$$Register, $mem$$Address);
 6957   %}
 6958 
 6959   ins_pipe(ialu_reg_mem);
 6960 %}
 6961 
 6962 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 6963 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6964   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 6965 
 6966   ins_cost(125);
 6967   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 6968   ins_encode %{
 6969     __ movsbl($dst$$Register, $mem$$Address);
 6970   %}
 6971   ins_pipe(ialu_reg_mem);
 6972 %}
 6973 
 6974 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 6975 instruct loadUS2L(rRegL dst, memory mem)
 6976 %{
 6977   match(Set dst (ConvI2L (LoadUS mem)));
 6978 
 6979   ins_cost(125);
 6980   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 6981 
 6982   ins_encode %{
 6983     __ movzwq($dst$$Register, $mem$$Address);
 6984   %}
 6985 
 6986   ins_pipe(ialu_reg_mem);
 6987 %}
 6988 
 6989 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 6990 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 6991   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 6992 
 6993   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 6994   ins_encode %{
 6995     __ movzbq($dst$$Register, $mem$$Address);
 6996   %}
 6997   ins_pipe(ialu_reg_mem);
 6998 %}
 6999 
 7000 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 7001 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7002   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7003   effect(KILL cr);
 7004 
 7005   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 7006             "andl    $dst, right_n_bits($mask, 16)" %}
 7007   ins_encode %{
 7008     Register Rdst = $dst$$Register;
 7009     __ movzwq(Rdst, $mem$$Address);
 7010     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 7011   %}
 7012   ins_pipe(ialu_reg_mem);
 7013 %}
 7014 
 7015 // Load Integer
 7016 instruct loadI(rRegI dst, memory mem)
 7017 %{
 7018   match(Set dst (LoadI mem));
 7019 
 7020   ins_cost(125);
 7021   format %{ "movl    $dst, $mem\t# int" %}
 7022 
 7023   ins_encode %{
 7024     __ movl($dst$$Register, $mem$$Address);
 7025   %}
 7026 
 7027   ins_pipe(ialu_reg_mem);
 7028 %}
 7029 
 7030 // Load Integer (32 bit signed) to Byte (8 bit signed)
 7031 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7032   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 7033 
 7034   ins_cost(125);
 7035   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 7036   ins_encode %{
 7037     __ movsbl($dst$$Register, $mem$$Address);
 7038   %}
 7039   ins_pipe(ialu_reg_mem);
 7040 %}
 7041 
 7042 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 7043 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 7044   match(Set dst (AndI (LoadI mem) mask));
 7045 
 7046   ins_cost(125);
 7047   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 7048   ins_encode %{
 7049     __ movzbl($dst$$Register, $mem$$Address);
 7050   %}
 7051   ins_pipe(ialu_reg_mem);
 7052 %}
 7053 
 7054 // Load Integer (32 bit signed) to Short (16 bit signed)
 7055 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 7056   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 7057 
 7058   ins_cost(125);
 7059   format %{ "movswl  $dst, $mem\t# int -> short" %}
 7060   ins_encode %{
 7061     __ movswl($dst$$Register, $mem$$Address);
 7062   %}
 7063   ins_pipe(ialu_reg_mem);
 7064 %}
 7065 
 7066 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 7067 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 7068   match(Set dst (AndI (LoadI mem) mask));
 7069 
 7070   ins_cost(125);
 7071   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7072   ins_encode %{
 7073     __ movzwl($dst$$Register, $mem$$Address);
 7074   %}
 7075   ins_pipe(ialu_reg_mem);
 7076 %}
 7077 
 7078 // Load Integer into Long Register
 7079 instruct loadI2L(rRegL dst, memory mem)
 7080 %{
 7081   match(Set dst (ConvI2L (LoadI mem)));
 7082 
 7083   ins_cost(125);
 7084   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7085 
 7086   ins_encode %{
 7087     __ movslq($dst$$Register, $mem$$Address);
 7088   %}
 7089 
 7090   ins_pipe(ialu_reg_mem);
 7091 %}
 7092 
 7093 // Load Integer with mask 0xFF into Long Register
 7094 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7095   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7096 
 7097   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7098   ins_encode %{
 7099     __ movzbq($dst$$Register, $mem$$Address);
 7100   %}
 7101   ins_pipe(ialu_reg_mem);
 7102 %}
 7103 
 7104 // Load Integer with mask 0xFFFF into Long Register
 7105 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7106   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7107 
 7108   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7109   ins_encode %{
 7110     __ movzwq($dst$$Register, $mem$$Address);
 7111   %}
 7112   ins_pipe(ialu_reg_mem);
 7113 %}
 7114 
 7115 // Load Integer with a 31-bit mask into Long Register
 7116 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7117   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7118   effect(KILL cr);
 7119 
 7120   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7121             "andl    $dst, $mask" %}
 7122   ins_encode %{
 7123     Register Rdst = $dst$$Register;
 7124     __ movl(Rdst, $mem$$Address);
 7125     __ andl(Rdst, $mask$$constant);
 7126   %}
 7127   ins_pipe(ialu_reg_mem);
 7128 %}
 7129 
 7130 // Load Unsigned Integer into Long Register
 7131 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7132 %{
 7133   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7134 
 7135   ins_cost(125);
 7136   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7137 
 7138   ins_encode %{
 7139     __ movl($dst$$Register, $mem$$Address);
 7140   %}
 7141 
 7142   ins_pipe(ialu_reg_mem);
 7143 %}
 7144 
 7145 // Load Long
 7146 instruct loadL(rRegL dst, memory mem)
 7147 %{
 7148   match(Set dst (LoadL mem));
 7149 
 7150   ins_cost(125);
 7151   format %{ "movq    $dst, $mem\t# long" %}
 7152 
 7153   ins_encode %{
 7154     __ movq($dst$$Register, $mem$$Address);
 7155   %}
 7156 
 7157   ins_pipe(ialu_reg_mem); // XXX
 7158 %}
 7159 
 7160 // Load Range
 7161 instruct loadRange(rRegI dst, memory mem)
 7162 %{
 7163   match(Set dst (LoadRange mem));
 7164 
 7165   ins_cost(125); // XXX
 7166   format %{ "movl    $dst, $mem\t# range" %}
 7167   ins_encode %{
 7168     __ movl($dst$$Register, $mem$$Address);
 7169   %}
 7170   ins_pipe(ialu_reg_mem);
 7171 %}
 7172 
 7173 // Load Pointer
 7174 instruct loadP(rRegP dst, memory mem)
 7175 %{
 7176   match(Set dst (LoadP mem));
 7177   predicate(n->as_Load()->barrier_data() == 0);
 7178 
 7179   ins_cost(125); // XXX
 7180   format %{ "movq    $dst, $mem\t# ptr" %}
 7181   ins_encode %{
 7182     __ movq($dst$$Register, $mem$$Address);
 7183   %}
 7184   ins_pipe(ialu_reg_mem); // XXX
 7185 %}
 7186 
 7187 // Load Compressed Pointer
 7188 instruct loadN(rRegN dst, memory mem)
 7189 %{
 7190    predicate(n->as_Load()->barrier_data() == 0);
 7191    match(Set dst (LoadN mem));
 7192 
 7193    ins_cost(125); // XXX
 7194    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7195    ins_encode %{
 7196      __ movl($dst$$Register, $mem$$Address);
 7197    %}
 7198    ins_pipe(ialu_reg_mem); // XXX
 7199 %}
 7200 
 7201 
 7202 // Load Klass Pointer
 7203 instruct loadKlass(rRegP dst, memory mem)
 7204 %{
 7205   match(Set dst (LoadKlass mem));
 7206 
 7207   ins_cost(125); // XXX
 7208   format %{ "movq    $dst, $mem\t# class" %}
 7209   ins_encode %{
 7210     __ movq($dst$$Register, $mem$$Address);
 7211   %}
 7212   ins_pipe(ialu_reg_mem); // XXX
 7213 %}
 7214 
 7215 // Load narrow Klass Pointer
 7216 instruct loadNKlass(rRegN dst, memory mem)
 7217 %{
 7218   predicate(!UseCompactObjectHeaders);
 7219   match(Set dst (LoadNKlass mem));
 7220 
 7221   ins_cost(125); // XXX
 7222   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7223   ins_encode %{
 7224     __ movl($dst$$Register, $mem$$Address);
 7225   %}
 7226   ins_pipe(ialu_reg_mem); // XXX
 7227 %}
 7228 
 7229 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7230 %{
 7231   predicate(UseCompactObjectHeaders);
 7232   match(Set dst (LoadNKlass mem));
 7233   effect(KILL cr);
 7234   ins_cost(125);
 7235   format %{
 7236     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7237     "shrl    $dst, markWord::klass_shift_at_offset"
 7238   %}
 7239   ins_encode %{
 7240     if (UseAPX) {
 7241       __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
 7242     }
 7243     else {
 7244       __ movl($dst$$Register, $mem$$Address);
 7245       __ shrl($dst$$Register, markWord::klass_shift_at_offset);
 7246     }
 7247   %}
 7248   ins_pipe(ialu_reg_mem);
 7249 %}
 7250 
 7251 // Load Float
 7252 instruct loadF(regF dst, memory mem)
 7253 %{
 7254   match(Set dst (LoadF mem));
 7255 
 7256   ins_cost(145); // XXX
 7257   format %{ "movss   $dst, $mem\t# float" %}
 7258   ins_encode %{
 7259     __ movflt($dst$$XMMRegister, $mem$$Address);
 7260   %}
 7261   ins_pipe(pipe_slow); // XXX
 7262 %}
 7263 
 7264 // Load Double
 7265 instruct loadD_partial(regD dst, memory mem)
 7266 %{
 7267   predicate(!UseXmmLoadAndClearUpper);
 7268   match(Set dst (LoadD mem));
 7269 
 7270   ins_cost(145); // XXX
 7271   format %{ "movlpd  $dst, $mem\t# double" %}
 7272   ins_encode %{
 7273     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7274   %}
 7275   ins_pipe(pipe_slow); // XXX
 7276 %}
 7277 
 7278 instruct loadD(regD dst, memory mem)
 7279 %{
 7280   predicate(UseXmmLoadAndClearUpper);
 7281   match(Set dst (LoadD mem));
 7282 
 7283   ins_cost(145); // XXX
 7284   format %{ "movsd   $dst, $mem\t# double" %}
 7285   ins_encode %{
 7286     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7287   %}
 7288   ins_pipe(pipe_slow); // XXX
 7289 %}
 7290 
 7291 // max = java.lang.Math.max(float a, float b)
 7292 instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
 7293   predicate(VM_Version::supports_avx10_2());
 7294   match(Set dst (MaxF a b));
 7295   format %{ "maxF $dst, $a, $b" %}
 7296   ins_encode %{
 7297     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
 7298   %}
 7299   ins_pipe( pipe_slow );
 7300 %}
 7301 
 7302 // max = java.lang.Math.max(float a, float b)
 7303 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7304   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7305   match(Set dst (MaxF a b));
 7306   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7307   format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7308   ins_encode %{
 7309     __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7310   %}
 7311   ins_pipe( pipe_slow );
 7312 %}
 7313 
 7314 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7315   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7316   match(Set dst (MaxF a b));
 7317   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7318 
 7319   format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
 7320   ins_encode %{
 7321     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7322                     false /*min*/, true /*single*/);
 7323   %}
 7324   ins_pipe( pipe_slow );
 7325 %}
 7326 
 7327 // max = java.lang.Math.max(double a, double b)
 7328 instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
 7329   predicate(VM_Version::supports_avx10_2());
 7330   match(Set dst (MaxD a b));
 7331   format %{ "maxD $dst, $a, $b" %}
 7332   ins_encode %{
 7333     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
 7334   %}
 7335   ins_pipe( pipe_slow );
 7336 %}
 7337 
 7338 // max = java.lang.Math.max(double a, double b)
 7339 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7340   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7341   match(Set dst (MaxD a b));
 7342   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7343   format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7344   ins_encode %{
 7345     __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7346   %}
 7347   ins_pipe( pipe_slow );
 7348 %}
 7349 
 7350 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7351   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7352   match(Set dst (MaxD a b));
 7353   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7354 
 7355   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7356   ins_encode %{
 7357     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7358                     false /*min*/, false /*single*/);
 7359   %}
 7360   ins_pipe( pipe_slow );
 7361 %}
 7362 
 7363 // max = java.lang.Math.min(float a, float b)
 7364 instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
 7365   predicate(VM_Version::supports_avx10_2());
 7366   match(Set dst (MinF a b));
 7367   format %{ "minF $dst, $a, $b" %}
 7368   ins_encode %{
 7369     __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
 7370   %}
 7371   ins_pipe( pipe_slow );
 7372 %}
 7373 
 7374 // min = java.lang.Math.min(float a, float b)
 7375 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
 7376   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7377   match(Set dst (MinF a b));
 7378   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7379   format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7380   ins_encode %{
 7381     __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7382   %}
 7383   ins_pipe( pipe_slow );
 7384 %}
 7385 
 7386 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
 7387   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7388   match(Set dst (MinF a b));
 7389   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7390 
 7391   format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7392   ins_encode %{
 7393     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7394                     true /*min*/, true /*single*/);
 7395   %}
 7396   ins_pipe( pipe_slow );
 7397 %}
 7398 
 7399 // max = java.lang.Math.min(double a, double b)
 7400 instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
 7401   predicate(VM_Version::supports_avx10_2());
 7402   match(Set dst (MinD a b));
 7403   format %{ "minD $dst, $a, $b" %}
 7404   ins_encode %{
 7405     __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
 7406   %}
 7407   ins_pipe( pipe_slow );
 7408 %}
 7409 
 7410 // min = java.lang.Math.min(double a, double b)
 7411 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
 7412   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7413   match(Set dst (MinD a b));
 7414   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7415     format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7416   ins_encode %{
 7417     __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7418   %}
 7419   ins_pipe( pipe_slow );
 7420 %}
 7421 
 7422 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
 7423   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7424   match(Set dst (MinD a b));
 7425   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7426 
 7427   format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7428   ins_encode %{
 7429     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7430                     true /*min*/, false /*single*/);
 7431   %}
 7432   ins_pipe( pipe_slow );
 7433 %}
 7434 
 7435 // Load Effective Address
 7436 instruct leaP8(rRegP dst, indOffset8 mem)
 7437 %{
 7438   match(Set dst mem);
 7439 
 7440   ins_cost(110); // XXX
 7441   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7442   ins_encode %{
 7443     __ leaq($dst$$Register, $mem$$Address);
 7444   %}
 7445   ins_pipe(ialu_reg_reg_fat);
 7446 %}
 7447 
 7448 instruct leaP32(rRegP dst, indOffset32 mem)
 7449 %{
 7450   match(Set dst mem);
 7451 
 7452   ins_cost(110);
 7453   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7454   ins_encode %{
 7455     __ leaq($dst$$Register, $mem$$Address);
 7456   %}
 7457   ins_pipe(ialu_reg_reg_fat);
 7458 %}
 7459 
 7460 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7461 %{
 7462   match(Set dst mem);
 7463 
 7464   ins_cost(110);
 7465   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7466   ins_encode %{
 7467     __ leaq($dst$$Register, $mem$$Address);
 7468   %}
 7469   ins_pipe(ialu_reg_reg_fat);
 7470 %}
 7471 
 7472 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7473 %{
 7474   match(Set dst mem);
 7475 
 7476   ins_cost(110);
 7477   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7478   ins_encode %{
 7479     __ leaq($dst$$Register, $mem$$Address);
 7480   %}
 7481   ins_pipe(ialu_reg_reg_fat);
 7482 %}
 7483 
 7484 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7485 %{
 7486   match(Set dst mem);
 7487 
 7488   ins_cost(110);
 7489   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7490   ins_encode %{
 7491     __ leaq($dst$$Register, $mem$$Address);
 7492   %}
 7493   ins_pipe(ialu_reg_reg_fat);
 7494 %}
 7495 
 7496 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7497 %{
 7498   match(Set dst mem);
 7499 
 7500   ins_cost(110);
 7501   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7502   ins_encode %{
 7503     __ leaq($dst$$Register, $mem$$Address);
 7504   %}
 7505   ins_pipe(ialu_reg_reg_fat);
 7506 %}
 7507 
 7508 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7509 %{
 7510   match(Set dst mem);
 7511 
 7512   ins_cost(110);
 7513   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7514   ins_encode %{
 7515     __ leaq($dst$$Register, $mem$$Address);
 7516   %}
 7517   ins_pipe(ialu_reg_reg_fat);
 7518 %}
 7519 
 7520 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7521 %{
 7522   match(Set dst mem);
 7523 
 7524   ins_cost(110);
 7525   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7526   ins_encode %{
 7527     __ leaq($dst$$Register, $mem$$Address);
 7528   %}
 7529   ins_pipe(ialu_reg_reg_fat);
 7530 %}
 7531 
 7532 // Load Effective Address which uses Narrow (32-bits) oop
 7533 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7534 %{
 7535   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7536   match(Set dst mem);
 7537 
 7538   ins_cost(110);
 7539   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7540   ins_encode %{
 7541     __ leaq($dst$$Register, $mem$$Address);
 7542   %}
 7543   ins_pipe(ialu_reg_reg_fat);
 7544 %}
 7545 
 7546 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7547 %{
 7548   predicate(CompressedOops::shift() == 0);
 7549   match(Set dst mem);
 7550 
 7551   ins_cost(110); // XXX
 7552   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7553   ins_encode %{
 7554     __ leaq($dst$$Register, $mem$$Address);
 7555   %}
 7556   ins_pipe(ialu_reg_reg_fat);
 7557 %}
 7558 
 7559 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7560 %{
 7561   predicate(CompressedOops::shift() == 0);
 7562   match(Set dst mem);
 7563 
 7564   ins_cost(110);
 7565   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7566   ins_encode %{
 7567     __ leaq($dst$$Register, $mem$$Address);
 7568   %}
 7569   ins_pipe(ialu_reg_reg_fat);
 7570 %}
 7571 
 7572 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7573 %{
 7574   predicate(CompressedOops::shift() == 0);
 7575   match(Set dst mem);
 7576 
 7577   ins_cost(110);
 7578   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7579   ins_encode %{
 7580     __ leaq($dst$$Register, $mem$$Address);
 7581   %}
 7582   ins_pipe(ialu_reg_reg_fat);
 7583 %}
 7584 
 7585 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7586 %{
 7587   predicate(CompressedOops::shift() == 0);
 7588   match(Set dst mem);
 7589 
 7590   ins_cost(110);
 7591   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7592   ins_encode %{
 7593     __ leaq($dst$$Register, $mem$$Address);
 7594   %}
 7595   ins_pipe(ialu_reg_reg_fat);
 7596 %}
 7597 
 7598 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7599 %{
 7600   predicate(CompressedOops::shift() == 0);
 7601   match(Set dst mem);
 7602 
 7603   ins_cost(110);
 7604   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7605   ins_encode %{
 7606     __ leaq($dst$$Register, $mem$$Address);
 7607   %}
 7608   ins_pipe(ialu_reg_reg_fat);
 7609 %}
 7610 
 7611 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7612 %{
 7613   predicate(CompressedOops::shift() == 0);
 7614   match(Set dst mem);
 7615 
 7616   ins_cost(110);
 7617   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7618   ins_encode %{
 7619     __ leaq($dst$$Register, $mem$$Address);
 7620   %}
 7621   ins_pipe(ialu_reg_reg_fat);
 7622 %}
 7623 
 7624 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7625 %{
 7626   predicate(CompressedOops::shift() == 0);
 7627   match(Set dst mem);
 7628 
 7629   ins_cost(110);
 7630   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7631   ins_encode %{
 7632     __ leaq($dst$$Register, $mem$$Address);
 7633   %}
 7634   ins_pipe(ialu_reg_reg_fat);
 7635 %}
 7636 
 7637 instruct loadConI(rRegI dst, immI src)
 7638 %{
 7639   match(Set dst src);
 7640 
 7641   format %{ "movl    $dst, $src\t# int" %}
 7642   ins_encode %{
 7643     __ movl($dst$$Register, $src$$constant);
 7644   %}
 7645   ins_pipe(ialu_reg_fat); // XXX
 7646 %}
 7647 
 7648 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7649 %{
 7650   match(Set dst src);
 7651   effect(KILL cr);
 7652 
 7653   ins_cost(50);
 7654   format %{ "xorl    $dst, $dst\t# int" %}
 7655   ins_encode %{
 7656     __ xorl($dst$$Register, $dst$$Register);
 7657   %}
 7658   ins_pipe(ialu_reg);
 7659 %}
 7660 
 7661 instruct loadConL(rRegL dst, immL src)
 7662 %{
 7663   match(Set dst src);
 7664 
 7665   ins_cost(150);
 7666   format %{ "movq    $dst, $src\t# long" %}
 7667   ins_encode %{
 7668     __ mov64($dst$$Register, $src$$constant);
 7669   %}
 7670   ins_pipe(ialu_reg);
 7671 %}
 7672 
 7673 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7674 %{
 7675   match(Set dst src);
 7676   effect(KILL cr);
 7677 
 7678   ins_cost(50);
 7679   format %{ "xorl    $dst, $dst\t# long" %}
 7680   ins_encode %{
 7681     __ xorl($dst$$Register, $dst$$Register);
 7682   %}
 7683   ins_pipe(ialu_reg); // XXX
 7684 %}
 7685 
 7686 instruct loadConUL32(rRegL dst, immUL32 src)
 7687 %{
 7688   match(Set dst src);
 7689 
 7690   ins_cost(60);
 7691   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7692   ins_encode %{
 7693     __ movl($dst$$Register, $src$$constant);
 7694   %}
 7695   ins_pipe(ialu_reg);
 7696 %}
 7697 
 7698 instruct loadConL32(rRegL dst, immL32 src)
 7699 %{
 7700   match(Set dst src);
 7701 
 7702   ins_cost(70);
 7703   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7704   ins_encode %{
 7705     __ movq($dst$$Register, $src$$constant);
 7706   %}
 7707   ins_pipe(ialu_reg);
 7708 %}
 7709 
 7710 instruct loadConP(rRegP dst, immP con) %{
 7711   match(Set dst con);
 7712 
 7713   format %{ "movq    $dst, $con\t# ptr" %}
 7714   ins_encode %{
 7715     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7716   %}
 7717   ins_pipe(ialu_reg_fat); // XXX
 7718 %}
 7719 
 7720 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7721 %{
 7722   match(Set dst src);
 7723   effect(KILL cr);
 7724 
 7725   ins_cost(50);
 7726   format %{ "xorl    $dst, $dst\t# ptr" %}
 7727   ins_encode %{
 7728     __ xorl($dst$$Register, $dst$$Register);
 7729   %}
 7730   ins_pipe(ialu_reg);
 7731 %}
 7732 
 7733 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7734 %{
 7735   match(Set dst src);
 7736   effect(KILL cr);
 7737 
 7738   ins_cost(60);
 7739   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7740   ins_encode %{
 7741     __ movl($dst$$Register, $src$$constant);
 7742   %}
 7743   ins_pipe(ialu_reg);
 7744 %}
 7745 
 7746 instruct loadConF(regF dst, immF con) %{
 7747   match(Set dst con);
 7748   ins_cost(125);
 7749   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7750   ins_encode %{
 7751     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7752   %}
 7753   ins_pipe(pipe_slow);
 7754 %}
 7755 
 7756 instruct loadConH(regF dst, immH con) %{
 7757   match(Set dst con);
 7758   ins_cost(125);
 7759   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7760   ins_encode %{
 7761     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7762   %}
 7763   ins_pipe(pipe_slow);
 7764 %}
 7765 
 7766 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7767   match(Set dst src);
 7768   effect(KILL cr);
 7769   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7770   ins_encode %{
 7771     __ xorq($dst$$Register, $dst$$Register);
 7772   %}
 7773   ins_pipe(ialu_reg);
 7774 %}
 7775 
 7776 instruct loadConN(rRegN dst, immN src) %{
 7777   match(Set dst src);
 7778 
 7779   ins_cost(125);
 7780   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7781   ins_encode %{
 7782     address con = (address)$src$$constant;
 7783     if (con == nullptr) {
 7784       ShouldNotReachHere();
 7785     } else {
 7786       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7787     }
 7788   %}
 7789   ins_pipe(ialu_reg_fat); // XXX
 7790 %}
 7791 
 7792 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7793   match(Set dst src);
 7794 
 7795   ins_cost(125);
 7796   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7797   ins_encode %{
 7798     address con = (address)$src$$constant;
 7799     if (con == nullptr) {
 7800       ShouldNotReachHere();
 7801     } else {
 7802       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7803     }
 7804   %}
 7805   ins_pipe(ialu_reg_fat); // XXX
 7806 %}
 7807 
 7808 instruct loadConF0(regF dst, immF0 src)
 7809 %{
 7810   match(Set dst src);
 7811   ins_cost(100);
 7812 
 7813   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7814   ins_encode %{
 7815     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7816   %}
 7817   ins_pipe(pipe_slow);
 7818 %}
 7819 
 7820 // Use the same format since predicate() can not be used here.
 7821 instruct loadConD(regD dst, immD con) %{
 7822   match(Set dst con);
 7823   ins_cost(125);
 7824   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7825   ins_encode %{
 7826     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7827   %}
 7828   ins_pipe(pipe_slow);
 7829 %}
 7830 
 7831 instruct loadConD0(regD dst, immD0 src)
 7832 %{
 7833   match(Set dst src);
 7834   ins_cost(100);
 7835 
 7836   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7837   ins_encode %{
 7838     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7839   %}
 7840   ins_pipe(pipe_slow);
 7841 %}
 7842 
 7843 instruct loadSSI(rRegI dst, stackSlotI src)
 7844 %{
 7845   match(Set dst src);
 7846 
 7847   ins_cost(125);
 7848   format %{ "movl    $dst, $src\t# int stk" %}
 7849   ins_encode %{
 7850     __ movl($dst$$Register, $src$$Address);
 7851   %}
 7852   ins_pipe(ialu_reg_mem);
 7853 %}
 7854 
 7855 instruct loadSSL(rRegL dst, stackSlotL src)
 7856 %{
 7857   match(Set dst src);
 7858 
 7859   ins_cost(125);
 7860   format %{ "movq    $dst, $src\t# long stk" %}
 7861   ins_encode %{
 7862     __ movq($dst$$Register, $src$$Address);
 7863   %}
 7864   ins_pipe(ialu_reg_mem);
 7865 %}
 7866 
 7867 instruct loadSSP(rRegP dst, stackSlotP src)
 7868 %{
 7869   match(Set dst src);
 7870 
 7871   ins_cost(125);
 7872   format %{ "movq    $dst, $src\t# ptr stk" %}
 7873   ins_encode %{
 7874     __ movq($dst$$Register, $src$$Address);
 7875   %}
 7876   ins_pipe(ialu_reg_mem);
 7877 %}
 7878 
 7879 instruct loadSSF(regF dst, stackSlotF src)
 7880 %{
 7881   match(Set dst src);
 7882 
 7883   ins_cost(125);
 7884   format %{ "movss   $dst, $src\t# float stk" %}
 7885   ins_encode %{
 7886     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 7887   %}
 7888   ins_pipe(pipe_slow); // XXX
 7889 %}
 7890 
 7891 // Use the same format since predicate() can not be used here.
 7892 instruct loadSSD(regD dst, stackSlotD src)
 7893 %{
 7894   match(Set dst src);
 7895 
 7896   ins_cost(125);
 7897   format %{ "movsd   $dst, $src\t# double stk" %}
 7898   ins_encode  %{
 7899     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 7900   %}
 7901   ins_pipe(pipe_slow); // XXX
 7902 %}
 7903 
 7904 // Prefetch instructions for allocation.
 7905 // Must be safe to execute with invalid address (cannot fault).
 7906 
 7907 instruct prefetchAlloc( memory mem ) %{
 7908   predicate(AllocatePrefetchInstr==3);
 7909   match(PrefetchAllocation mem);
 7910   ins_cost(125);
 7911 
 7912   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 7913   ins_encode %{
 7914     __ prefetchw($mem$$Address);
 7915   %}
 7916   ins_pipe(ialu_mem);
 7917 %}
 7918 
 7919 instruct prefetchAllocNTA( memory mem ) %{
 7920   predicate(AllocatePrefetchInstr==0);
 7921   match(PrefetchAllocation mem);
 7922   ins_cost(125);
 7923 
 7924   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 7925   ins_encode %{
 7926     __ prefetchnta($mem$$Address);
 7927   %}
 7928   ins_pipe(ialu_mem);
 7929 %}
 7930 
 7931 instruct prefetchAllocT0( memory mem ) %{
 7932   predicate(AllocatePrefetchInstr==1);
 7933   match(PrefetchAllocation mem);
 7934   ins_cost(125);
 7935 
 7936   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 7937   ins_encode %{
 7938     __ prefetcht0($mem$$Address);
 7939   %}
 7940   ins_pipe(ialu_mem);
 7941 %}
 7942 
 7943 instruct prefetchAllocT2( memory mem ) %{
 7944   predicate(AllocatePrefetchInstr==2);
 7945   match(PrefetchAllocation mem);
 7946   ins_cost(125);
 7947 
 7948   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 7949   ins_encode %{
 7950     __ prefetcht2($mem$$Address);
 7951   %}
 7952   ins_pipe(ialu_mem);
 7953 %}
 7954 
 7955 //----------Store Instructions-------------------------------------------------
 7956 
 7957 // Store Byte
 7958 instruct storeB(memory mem, rRegI src)
 7959 %{
 7960   match(Set mem (StoreB mem src));
 7961 
 7962   ins_cost(125); // XXX
 7963   format %{ "movb    $mem, $src\t# byte" %}
 7964   ins_encode %{
 7965     __ movb($mem$$Address, $src$$Register);
 7966   %}
 7967   ins_pipe(ialu_mem_reg);
 7968 %}
 7969 
 7970 // Store Char/Short
 7971 instruct storeC(memory mem, rRegI src)
 7972 %{
 7973   match(Set mem (StoreC mem src));
 7974 
 7975   ins_cost(125); // XXX
 7976   format %{ "movw    $mem, $src\t# char/short" %}
 7977   ins_encode %{
 7978     __ movw($mem$$Address, $src$$Register);
 7979   %}
 7980   ins_pipe(ialu_mem_reg);
 7981 %}
 7982 
 7983 // Store Integer
 7984 instruct storeI(memory mem, rRegI src)
 7985 %{
 7986   match(Set mem (StoreI mem src));
 7987 
 7988   ins_cost(125); // XXX
 7989   format %{ "movl    $mem, $src\t# int" %}
 7990   ins_encode %{
 7991     __ movl($mem$$Address, $src$$Register);
 7992   %}
 7993   ins_pipe(ialu_mem_reg);
 7994 %}
 7995 
 7996 // Store Long
 7997 instruct storeL(memory mem, rRegL src)
 7998 %{
 7999   match(Set mem (StoreL mem src));
 8000 
 8001   ins_cost(125); // XXX
 8002   format %{ "movq    $mem, $src\t# long" %}
 8003   ins_encode %{
 8004     __ movq($mem$$Address, $src$$Register);
 8005   %}
 8006   ins_pipe(ialu_mem_reg); // XXX
 8007 %}
 8008 
 8009 // Store Pointer
 8010 instruct storeP(memory mem, any_RegP src)
 8011 %{
 8012   predicate(n->as_Store()->barrier_data() == 0);
 8013   match(Set mem (StoreP mem src));
 8014 
 8015   ins_cost(125); // XXX
 8016   format %{ "movq    $mem, $src\t# ptr" %}
 8017   ins_encode %{
 8018     __ movq($mem$$Address, $src$$Register);
 8019   %}
 8020   ins_pipe(ialu_mem_reg);
 8021 %}
 8022 
 8023 instruct storeImmP0(memory mem, immP0 zero)
 8024 %{
 8025   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 8026   match(Set mem (StoreP mem zero));
 8027 
 8028   ins_cost(125); // XXX
 8029   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 8030   ins_encode %{
 8031     __ movq($mem$$Address, r12);
 8032   %}
 8033   ins_pipe(ialu_mem_reg);
 8034 %}
 8035 
 8036 // Store Null Pointer, mark word, or other simple pointer constant.
 8037 instruct storeImmP(memory mem, immP31 src)
 8038 %{
 8039   predicate(n->as_Store()->barrier_data() == 0);
 8040   match(Set mem (StoreP mem src));
 8041 
 8042   ins_cost(150); // XXX
 8043   format %{ "movq    $mem, $src\t# ptr" %}
 8044   ins_encode %{
 8045     __ movq($mem$$Address, $src$$constant);
 8046   %}
 8047   ins_pipe(ialu_mem_imm);
 8048 %}
 8049 
 8050 // Store Compressed Pointer
 8051 instruct storeN(memory mem, rRegN src)
 8052 %{
 8053   predicate(n->as_Store()->barrier_data() == 0);
 8054   match(Set mem (StoreN mem src));
 8055 
 8056   ins_cost(125); // XXX
 8057   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8058   ins_encode %{
 8059     __ movl($mem$$Address, $src$$Register);
 8060   %}
 8061   ins_pipe(ialu_mem_reg);
 8062 %}
 8063 
 8064 instruct storeNKlass(memory mem, rRegN src)
 8065 %{
 8066   match(Set mem (StoreNKlass mem src));
 8067 
 8068   ins_cost(125); // XXX
 8069   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8070   ins_encode %{
 8071     __ movl($mem$$Address, $src$$Register);
 8072   %}
 8073   ins_pipe(ialu_mem_reg);
 8074 %}
 8075 
 8076 instruct storeImmN0(memory mem, immN0 zero)
 8077 %{
 8078   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8079   match(Set mem (StoreN mem zero));
 8080 
 8081   ins_cost(125); // XXX
 8082   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8083   ins_encode %{
 8084     __ movl($mem$$Address, r12);
 8085   %}
 8086   ins_pipe(ialu_mem_reg);
 8087 %}
 8088 
 8089 instruct storeImmN(memory mem, immN src)
 8090 %{
 8091   predicate(n->as_Store()->barrier_data() == 0);
 8092   match(Set mem (StoreN mem src));
 8093 
 8094   ins_cost(150); // XXX
 8095   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8096   ins_encode %{
 8097     address con = (address)$src$$constant;
 8098     if (con == nullptr) {
 8099       __ movl($mem$$Address, 0);
 8100     } else {
 8101       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8102     }
 8103   %}
 8104   ins_pipe(ialu_mem_imm);
 8105 %}
 8106 
 8107 instruct storeImmNKlass(memory mem, immNKlass src)
 8108 %{
 8109   match(Set mem (StoreNKlass mem src));
 8110 
 8111   ins_cost(150); // XXX
 8112   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8113   ins_encode %{
 8114     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8115   %}
 8116   ins_pipe(ialu_mem_imm);
 8117 %}
 8118 
 8119 // Store Integer Immediate
 8120 instruct storeImmI0(memory mem, immI_0 zero)
 8121 %{
 8122   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8123   match(Set mem (StoreI mem zero));
 8124 
 8125   ins_cost(125); // XXX
 8126   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8127   ins_encode %{
 8128     __ movl($mem$$Address, r12);
 8129   %}
 8130   ins_pipe(ialu_mem_reg);
 8131 %}
 8132 
 8133 instruct storeImmI(memory mem, immI src)
 8134 %{
 8135   match(Set mem (StoreI mem src));
 8136 
 8137   ins_cost(150);
 8138   format %{ "movl    $mem, $src\t# int" %}
 8139   ins_encode %{
 8140     __ movl($mem$$Address, $src$$constant);
 8141   %}
 8142   ins_pipe(ialu_mem_imm);
 8143 %}
 8144 
 8145 // Store Long Immediate
 8146 instruct storeImmL0(memory mem, immL0 zero)
 8147 %{
 8148   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8149   match(Set mem (StoreL mem zero));
 8150 
 8151   ins_cost(125); // XXX
 8152   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8153   ins_encode %{
 8154     __ movq($mem$$Address, r12);
 8155   %}
 8156   ins_pipe(ialu_mem_reg);
 8157 %}
 8158 
 8159 instruct storeImmL(memory mem, immL32 src)
 8160 %{
 8161   match(Set mem (StoreL mem src));
 8162 
 8163   ins_cost(150);
 8164   format %{ "movq    $mem, $src\t# long" %}
 8165   ins_encode %{
 8166     __ movq($mem$$Address, $src$$constant);
 8167   %}
 8168   ins_pipe(ialu_mem_imm);
 8169 %}
 8170 
 8171 // Store Short/Char Immediate
 8172 instruct storeImmC0(memory mem, immI_0 zero)
 8173 %{
 8174   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8175   match(Set mem (StoreC mem zero));
 8176 
 8177   ins_cost(125); // XXX
 8178   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8179   ins_encode %{
 8180     __ movw($mem$$Address, r12);
 8181   %}
 8182   ins_pipe(ialu_mem_reg);
 8183 %}
 8184 
 8185 instruct storeImmI16(memory mem, immI16 src)
 8186 %{
 8187   predicate(UseStoreImmI16);
 8188   match(Set mem (StoreC mem src));
 8189 
 8190   ins_cost(150);
 8191   format %{ "movw    $mem, $src\t# short/char" %}
 8192   ins_encode %{
 8193     __ movw($mem$$Address, $src$$constant);
 8194   %}
 8195   ins_pipe(ialu_mem_imm);
 8196 %}
 8197 
 8198 // Store Byte Immediate
 8199 instruct storeImmB0(memory mem, immI_0 zero)
 8200 %{
 8201   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8202   match(Set mem (StoreB mem zero));
 8203 
 8204   ins_cost(125); // XXX
 8205   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8206   ins_encode %{
 8207     __ movb($mem$$Address, r12);
 8208   %}
 8209   ins_pipe(ialu_mem_reg);
 8210 %}
 8211 
 8212 instruct storeImmB(memory mem, immI8 src)
 8213 %{
 8214   match(Set mem (StoreB mem src));
 8215 
 8216   ins_cost(150); // XXX
 8217   format %{ "movb    $mem, $src\t# byte" %}
 8218   ins_encode %{
 8219     __ movb($mem$$Address, $src$$constant);
 8220   %}
 8221   ins_pipe(ialu_mem_imm);
 8222 %}
 8223 
 8224 // Store Float
 8225 instruct storeF(memory mem, regF src)
 8226 %{
 8227   match(Set mem (StoreF mem src));
 8228 
 8229   ins_cost(95); // XXX
 8230   format %{ "movss   $mem, $src\t# float" %}
 8231   ins_encode %{
 8232     __ movflt($mem$$Address, $src$$XMMRegister);
 8233   %}
 8234   ins_pipe(pipe_slow); // XXX
 8235 %}
 8236 
 8237 // Store immediate Float value (it is faster than store from XMM register)
 8238 instruct storeF0(memory mem, immF0 zero)
 8239 %{
 8240   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8241   match(Set mem (StoreF mem zero));
 8242 
 8243   ins_cost(25); // XXX
 8244   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8245   ins_encode %{
 8246     __ movl($mem$$Address, r12);
 8247   %}
 8248   ins_pipe(ialu_mem_reg);
 8249 %}
 8250 
 8251 instruct storeF_imm(memory mem, immF src)
 8252 %{
 8253   match(Set mem (StoreF mem src));
 8254 
 8255   ins_cost(50);
 8256   format %{ "movl    $mem, $src\t# float" %}
 8257   ins_encode %{
 8258     __ movl($mem$$Address, jint_cast($src$$constant));
 8259   %}
 8260   ins_pipe(ialu_mem_imm);
 8261 %}
 8262 
 8263 // Store Double
 8264 instruct storeD(memory mem, regD src)
 8265 %{
 8266   match(Set mem (StoreD mem src));
 8267 
 8268   ins_cost(95); // XXX
 8269   format %{ "movsd   $mem, $src\t# double" %}
 8270   ins_encode %{
 8271     __ movdbl($mem$$Address, $src$$XMMRegister);
 8272   %}
 8273   ins_pipe(pipe_slow); // XXX
 8274 %}
 8275 
 8276 // Store immediate double 0.0 (it is faster than store from XMM register)
 8277 instruct storeD0_imm(memory mem, immD0 src)
 8278 %{
 8279   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8280   match(Set mem (StoreD mem src));
 8281 
 8282   ins_cost(50);
 8283   format %{ "movq    $mem, $src\t# double 0." %}
 8284   ins_encode %{
 8285     __ movq($mem$$Address, $src$$constant);
 8286   %}
 8287   ins_pipe(ialu_mem_imm);
 8288 %}
 8289 
 8290 instruct storeD0(memory mem, immD0 zero)
 8291 %{
 8292   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8293   match(Set mem (StoreD mem zero));
 8294 
 8295   ins_cost(25); // XXX
 8296   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8297   ins_encode %{
 8298     __ movq($mem$$Address, r12);
 8299   %}
 8300   ins_pipe(ialu_mem_reg);
 8301 %}
 8302 
 8303 instruct storeSSI(stackSlotI dst, rRegI src)
 8304 %{
 8305   match(Set dst src);
 8306 
 8307   ins_cost(100);
 8308   format %{ "movl    $dst, $src\t# int stk" %}
 8309   ins_encode %{
 8310     __ movl($dst$$Address, $src$$Register);
 8311   %}
 8312   ins_pipe( ialu_mem_reg );
 8313 %}
 8314 
 8315 instruct storeSSL(stackSlotL dst, rRegL src)
 8316 %{
 8317   match(Set dst src);
 8318 
 8319   ins_cost(100);
 8320   format %{ "movq    $dst, $src\t# long stk" %}
 8321   ins_encode %{
 8322     __ movq($dst$$Address, $src$$Register);
 8323   %}
 8324   ins_pipe(ialu_mem_reg);
 8325 %}
 8326 
 8327 instruct storeSSP(stackSlotP dst, rRegP src)
 8328 %{
 8329   match(Set dst src);
 8330 
 8331   ins_cost(100);
 8332   format %{ "movq    $dst, $src\t# ptr stk" %}
 8333   ins_encode %{
 8334     __ movq($dst$$Address, $src$$Register);
 8335   %}
 8336   ins_pipe(ialu_mem_reg);
 8337 %}
 8338 
 8339 instruct storeSSF(stackSlotF dst, regF src)
 8340 %{
 8341   match(Set dst src);
 8342 
 8343   ins_cost(95); // XXX
 8344   format %{ "movss   $dst, $src\t# float stk" %}
 8345   ins_encode %{
 8346     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8347   %}
 8348   ins_pipe(pipe_slow); // XXX
 8349 %}
 8350 
 8351 instruct storeSSD(stackSlotD dst, regD src)
 8352 %{
 8353   match(Set dst src);
 8354 
 8355   ins_cost(95); // XXX
 8356   format %{ "movsd   $dst, $src\t# double stk" %}
 8357   ins_encode %{
 8358     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8359   %}
 8360   ins_pipe(pipe_slow); // XXX
 8361 %}
 8362 
 8363 instruct cacheWB(indirect addr)
 8364 %{
 8365   predicate(VM_Version::supports_data_cache_line_flush());
 8366   match(CacheWB addr);
 8367 
 8368   ins_cost(100);
 8369   format %{"cache wb $addr" %}
 8370   ins_encode %{
 8371     assert($addr->index_position() < 0, "should be");
 8372     assert($addr$$disp == 0, "should be");
 8373     __ cache_wb(Address($addr$$base$$Register, 0));
 8374   %}
 8375   ins_pipe(pipe_slow); // XXX
 8376 %}
 8377 
 8378 instruct cacheWBPreSync()
 8379 %{
 8380   predicate(VM_Version::supports_data_cache_line_flush());
 8381   match(CacheWBPreSync);
 8382 
 8383   ins_cost(100);
 8384   format %{"cache wb presync" %}
 8385   ins_encode %{
 8386     __ cache_wbsync(true);
 8387   %}
 8388   ins_pipe(pipe_slow); // XXX
 8389 %}
 8390 
 8391 instruct cacheWBPostSync()
 8392 %{
 8393   predicate(VM_Version::supports_data_cache_line_flush());
 8394   match(CacheWBPostSync);
 8395 
 8396   ins_cost(100);
 8397   format %{"cache wb postsync" %}
 8398   ins_encode %{
 8399     __ cache_wbsync(false);
 8400   %}
 8401   ins_pipe(pipe_slow); // XXX
 8402 %}
 8403 
 8404 //----------BSWAP Instructions-------------------------------------------------
 8405 instruct bytes_reverse_int(rRegI dst) %{
 8406   match(Set dst (ReverseBytesI dst));
 8407 
 8408   format %{ "bswapl  $dst" %}
 8409   ins_encode %{
 8410     __ bswapl($dst$$Register);
 8411   %}
 8412   ins_pipe( ialu_reg );
 8413 %}
 8414 
 8415 instruct bytes_reverse_long(rRegL dst) %{
 8416   match(Set dst (ReverseBytesL dst));
 8417 
 8418   format %{ "bswapq  $dst" %}
 8419   ins_encode %{
 8420     __ bswapq($dst$$Register);
 8421   %}
 8422   ins_pipe( ialu_reg);
 8423 %}
 8424 
 8425 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8426   match(Set dst (ReverseBytesUS dst));
 8427   effect(KILL cr);
 8428 
 8429   format %{ "bswapl  $dst\n\t"
 8430             "shrl    $dst,16\n\t" %}
 8431   ins_encode %{
 8432     __ bswapl($dst$$Register);
 8433     __ shrl($dst$$Register, 16);
 8434   %}
 8435   ins_pipe( ialu_reg );
 8436 %}
 8437 
 8438 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8439   match(Set dst (ReverseBytesS dst));
 8440   effect(KILL cr);
 8441 
 8442   format %{ "bswapl  $dst\n\t"
 8443             "sar     $dst,16\n\t" %}
 8444   ins_encode %{
 8445     __ bswapl($dst$$Register);
 8446     __ sarl($dst$$Register, 16);
 8447   %}
 8448   ins_pipe( ialu_reg );
 8449 %}
 8450 
 8451 //---------- Zeros Count Instructions ------------------------------------------
 8452 
 8453 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8454   predicate(UseCountLeadingZerosInstruction);
 8455   match(Set dst (CountLeadingZerosI src));
 8456   effect(KILL cr);
 8457 
 8458   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8459   ins_encode %{
 8460     __ lzcntl($dst$$Register, $src$$Register);
 8461   %}
 8462   ins_pipe(ialu_reg);
 8463 %}
 8464 
 8465 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8466   predicate(UseCountLeadingZerosInstruction);
 8467   match(Set dst (CountLeadingZerosI (LoadI src)));
 8468   effect(KILL cr);
 8469   ins_cost(175);
 8470   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8471   ins_encode %{
 8472     __ lzcntl($dst$$Register, $src$$Address);
 8473   %}
 8474   ins_pipe(ialu_reg_mem);
 8475 %}
 8476 
 8477 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8478   predicate(!UseCountLeadingZerosInstruction);
 8479   match(Set dst (CountLeadingZerosI src));
 8480   effect(KILL cr);
 8481 
 8482   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8483             "jnz     skip\n\t"
 8484             "movl    $dst, -1\n"
 8485       "skip:\n\t"
 8486             "negl    $dst\n\t"
 8487             "addl    $dst, 31" %}
 8488   ins_encode %{
 8489     Register Rdst = $dst$$Register;
 8490     Register Rsrc = $src$$Register;
 8491     Label skip;
 8492     __ bsrl(Rdst, Rsrc);
 8493     __ jccb(Assembler::notZero, skip);
 8494     __ movl(Rdst, -1);
 8495     __ bind(skip);
 8496     __ negl(Rdst);
 8497     __ addl(Rdst, BitsPerInt - 1);
 8498   %}
 8499   ins_pipe(ialu_reg);
 8500 %}
 8501 
 8502 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8503   predicate(UseCountLeadingZerosInstruction);
 8504   match(Set dst (CountLeadingZerosL src));
 8505   effect(KILL cr);
 8506 
 8507   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8508   ins_encode %{
 8509     __ lzcntq($dst$$Register, $src$$Register);
 8510   %}
 8511   ins_pipe(ialu_reg);
 8512 %}
 8513 
 8514 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8515   predicate(UseCountLeadingZerosInstruction);
 8516   match(Set dst (CountLeadingZerosL (LoadL src)));
 8517   effect(KILL cr);
 8518   ins_cost(175);
 8519   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8520   ins_encode %{
 8521     __ lzcntq($dst$$Register, $src$$Address);
 8522   %}
 8523   ins_pipe(ialu_reg_mem);
 8524 %}
 8525 
 8526 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8527   predicate(!UseCountLeadingZerosInstruction);
 8528   match(Set dst (CountLeadingZerosL src));
 8529   effect(KILL cr);
 8530 
 8531   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8532             "jnz     skip\n\t"
 8533             "movl    $dst, -1\n"
 8534       "skip:\n\t"
 8535             "negl    $dst\n\t"
 8536             "addl    $dst, 63" %}
 8537   ins_encode %{
 8538     Register Rdst = $dst$$Register;
 8539     Register Rsrc = $src$$Register;
 8540     Label skip;
 8541     __ bsrq(Rdst, Rsrc);
 8542     __ jccb(Assembler::notZero, skip);
 8543     __ movl(Rdst, -1);
 8544     __ bind(skip);
 8545     __ negl(Rdst);
 8546     __ addl(Rdst, BitsPerLong - 1);
 8547   %}
 8548   ins_pipe(ialu_reg);
 8549 %}
 8550 
 8551 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8552   predicate(UseCountTrailingZerosInstruction);
 8553   match(Set dst (CountTrailingZerosI src));
 8554   effect(KILL cr);
 8555 
 8556   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8557   ins_encode %{
 8558     __ tzcntl($dst$$Register, $src$$Register);
 8559   %}
 8560   ins_pipe(ialu_reg);
 8561 %}
 8562 
 8563 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8564   predicate(UseCountTrailingZerosInstruction);
 8565   match(Set dst (CountTrailingZerosI (LoadI src)));
 8566   effect(KILL cr);
 8567   ins_cost(175);
 8568   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8569   ins_encode %{
 8570     __ tzcntl($dst$$Register, $src$$Address);
 8571   %}
 8572   ins_pipe(ialu_reg_mem);
 8573 %}
 8574 
 8575 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8576   predicate(!UseCountTrailingZerosInstruction);
 8577   match(Set dst (CountTrailingZerosI src));
 8578   effect(KILL cr);
 8579 
 8580   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8581             "jnz     done\n\t"
 8582             "movl    $dst, 32\n"
 8583       "done:" %}
 8584   ins_encode %{
 8585     Register Rdst = $dst$$Register;
 8586     Label done;
 8587     __ bsfl(Rdst, $src$$Register);
 8588     __ jccb(Assembler::notZero, done);
 8589     __ movl(Rdst, BitsPerInt);
 8590     __ bind(done);
 8591   %}
 8592   ins_pipe(ialu_reg);
 8593 %}
 8594 
 8595 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8596   predicate(UseCountTrailingZerosInstruction);
 8597   match(Set dst (CountTrailingZerosL src));
 8598   effect(KILL cr);
 8599 
 8600   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8601   ins_encode %{
 8602     __ tzcntq($dst$$Register, $src$$Register);
 8603   %}
 8604   ins_pipe(ialu_reg);
 8605 %}
 8606 
 8607 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8608   predicate(UseCountTrailingZerosInstruction);
 8609   match(Set dst (CountTrailingZerosL (LoadL src)));
 8610   effect(KILL cr);
 8611   ins_cost(175);
 8612   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8613   ins_encode %{
 8614     __ tzcntq($dst$$Register, $src$$Address);
 8615   %}
 8616   ins_pipe(ialu_reg_mem);
 8617 %}
 8618 
 8619 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8620   predicate(!UseCountTrailingZerosInstruction);
 8621   match(Set dst (CountTrailingZerosL src));
 8622   effect(KILL cr);
 8623 
 8624   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8625             "jnz     done\n\t"
 8626             "movl    $dst, 64\n"
 8627       "done:" %}
 8628   ins_encode %{
 8629     Register Rdst = $dst$$Register;
 8630     Label done;
 8631     __ bsfq(Rdst, $src$$Register);
 8632     __ jccb(Assembler::notZero, done);
 8633     __ movl(Rdst, BitsPerLong);
 8634     __ bind(done);
 8635   %}
 8636   ins_pipe(ialu_reg);
 8637 %}
 8638 
 8639 //--------------- Reverse Operation Instructions ----------------
 8640 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8641   predicate(!VM_Version::supports_gfni());
 8642   match(Set dst (ReverseI src));
 8643   effect(TEMP dst, TEMP rtmp, KILL cr);
 8644   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8645   ins_encode %{
 8646     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8647   %}
 8648   ins_pipe( ialu_reg );
 8649 %}
 8650 
 8651 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8652   predicate(VM_Version::supports_gfni());
 8653   match(Set dst (ReverseI src));
 8654   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8655   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8656   ins_encode %{
 8657     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8658   %}
 8659   ins_pipe( ialu_reg );
 8660 %}
 8661 
 8662 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8663   predicate(!VM_Version::supports_gfni());
 8664   match(Set dst (ReverseL src));
 8665   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8666   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8667   ins_encode %{
 8668     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8669   %}
 8670   ins_pipe( ialu_reg );
 8671 %}
 8672 
 8673 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8674   predicate(VM_Version::supports_gfni());
 8675   match(Set dst (ReverseL src));
 8676   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8677   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8678   ins_encode %{
 8679     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8680   %}
 8681   ins_pipe( ialu_reg );
 8682 %}
 8683 
 8684 //---------- Population Count Instructions -------------------------------------
 8685 
 8686 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8687   predicate(UsePopCountInstruction);
 8688   match(Set dst (PopCountI src));
 8689   effect(KILL cr);
 8690 
 8691   format %{ "popcnt  $dst, $src" %}
 8692   ins_encode %{
 8693     __ popcntl($dst$$Register, $src$$Register);
 8694   %}
 8695   ins_pipe(ialu_reg);
 8696 %}
 8697 
 8698 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8699   predicate(UsePopCountInstruction);
 8700   match(Set dst (PopCountI (LoadI mem)));
 8701   effect(KILL cr);
 8702 
 8703   format %{ "popcnt  $dst, $mem" %}
 8704   ins_encode %{
 8705     __ popcntl($dst$$Register, $mem$$Address);
 8706   %}
 8707   ins_pipe(ialu_reg);
 8708 %}
 8709 
 8710 // Note: Long.bitCount(long) returns an int.
 8711 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8712   predicate(UsePopCountInstruction);
 8713   match(Set dst (PopCountL src));
 8714   effect(KILL cr);
 8715 
 8716   format %{ "popcnt  $dst, $src" %}
 8717   ins_encode %{
 8718     __ popcntq($dst$$Register, $src$$Register);
 8719   %}
 8720   ins_pipe(ialu_reg);
 8721 %}
 8722 
 8723 // Note: Long.bitCount(long) returns an int.
 8724 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8725   predicate(UsePopCountInstruction);
 8726   match(Set dst (PopCountL (LoadL mem)));
 8727   effect(KILL cr);
 8728 
 8729   format %{ "popcnt  $dst, $mem" %}
 8730   ins_encode %{
 8731     __ popcntq($dst$$Register, $mem$$Address);
 8732   %}
 8733   ins_pipe(ialu_reg);
 8734 %}
 8735 
 8736 
 8737 //----------MemBar Instructions-----------------------------------------------
 8738 // Memory barrier flavors
 8739 
 8740 instruct membar_acquire()
 8741 %{
 8742   match(MemBarAcquire);
 8743   match(LoadFence);
 8744   ins_cost(0);
 8745 
 8746   size(0);
 8747   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8748   ins_encode();
 8749   ins_pipe(empty);
 8750 %}
 8751 
 8752 instruct membar_acquire_lock()
 8753 %{
 8754   match(MemBarAcquireLock);
 8755   ins_cost(0);
 8756 
 8757   size(0);
 8758   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8759   ins_encode();
 8760   ins_pipe(empty);
 8761 %}
 8762 
 8763 instruct membar_release()
 8764 %{
 8765   match(MemBarRelease);
 8766   match(StoreFence);
 8767   ins_cost(0);
 8768 
 8769   size(0);
 8770   format %{ "MEMBAR-release ! (empty encoding)" %}
 8771   ins_encode();
 8772   ins_pipe(empty);
 8773 %}
 8774 
 8775 instruct membar_release_lock()
 8776 %{
 8777   match(MemBarReleaseLock);
 8778   ins_cost(0);
 8779 
 8780   size(0);
 8781   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8782   ins_encode();
 8783   ins_pipe(empty);
 8784 %}
 8785 
 8786 instruct membar_volatile(rFlagsReg cr) %{
 8787   match(MemBarVolatile);
 8788   effect(KILL cr);
 8789   ins_cost(400);
 8790 
 8791   format %{
 8792     $$template
 8793     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8794   %}
 8795   ins_encode %{
 8796     __ membar(Assembler::StoreLoad);
 8797   %}
 8798   ins_pipe(pipe_slow);
 8799 %}
 8800 
 8801 instruct unnecessary_membar_volatile()
 8802 %{
 8803   match(MemBarVolatile);
 8804   predicate(Matcher::post_store_load_barrier(n));
 8805   ins_cost(0);
 8806 
 8807   size(0);
 8808   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8809   ins_encode();
 8810   ins_pipe(empty);
 8811 %}
 8812 
 8813 instruct membar_storestore() %{
 8814   match(MemBarStoreStore);
 8815   match(StoreStoreFence);
 8816   ins_cost(0);
 8817 
 8818   size(0);
 8819   format %{ "MEMBAR-storestore (empty encoding)" %}
 8820   ins_encode( );
 8821   ins_pipe(empty);
 8822 %}
 8823 
 8824 //----------Move Instructions--------------------------------------------------
 8825 
 8826 instruct castX2P(rRegP dst, rRegL src)
 8827 %{
 8828   match(Set dst (CastX2P src));
 8829 
 8830   format %{ "movq    $dst, $src\t# long->ptr" %}
 8831   ins_encode %{
 8832     if ($dst$$reg != $src$$reg) {
 8833       __ movptr($dst$$Register, $src$$Register);
 8834     }
 8835   %}
 8836   ins_pipe(ialu_reg_reg); // XXX
 8837 %}
 8838 
 8839 instruct castP2X(rRegL dst, rRegP src)
 8840 %{
 8841   match(Set dst (CastP2X src));
 8842 
 8843   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8844   ins_encode %{
 8845     if ($dst$$reg != $src$$reg) {
 8846       __ movptr($dst$$Register, $src$$Register);
 8847     }
 8848   %}
 8849   ins_pipe(ialu_reg_reg); // XXX
 8850 %}
 8851 
 8852 // Convert oop into int for vectors alignment masking
 8853 instruct convP2I(rRegI dst, rRegP src)
 8854 %{
 8855   match(Set dst (ConvL2I (CastP2X src)));
 8856 
 8857   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8858   ins_encode %{
 8859     __ movl($dst$$Register, $src$$Register);
 8860   %}
 8861   ins_pipe(ialu_reg_reg); // XXX
 8862 %}
 8863 
 8864 // Convert compressed oop into int for vectors alignment masking
 8865 // in case of 32bit oops (heap < 4Gb).
 8866 instruct convN2I(rRegI dst, rRegN src)
 8867 %{
 8868   predicate(CompressedOops::shift() == 0);
 8869   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 8870 
 8871   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 8872   ins_encode %{
 8873     __ movl($dst$$Register, $src$$Register);
 8874   %}
 8875   ins_pipe(ialu_reg_reg); // XXX
 8876 %}
 8877 
 8878 // Convert oop pointer into compressed form
 8879 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 8880   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 8881   match(Set dst (EncodeP src));
 8882   effect(KILL cr);
 8883   format %{ "encode_heap_oop $dst,$src" %}
 8884   ins_encode %{
 8885     Register s = $src$$Register;
 8886     Register d = $dst$$Register;
 8887     if (s != d) {
 8888       __ movq(d, s);
 8889     }
 8890     __ encode_heap_oop(d);
 8891   %}
 8892   ins_pipe(ialu_reg_long);
 8893 %}
 8894 
 8895 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8896   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 8897   match(Set dst (EncodeP src));
 8898   effect(KILL cr);
 8899   format %{ "encode_heap_oop_not_null $dst,$src" %}
 8900   ins_encode %{
 8901     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 8902   %}
 8903   ins_pipe(ialu_reg_long);
 8904 %}
 8905 
 8906 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 8907   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 8908             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 8909   match(Set dst (DecodeN src));
 8910   effect(KILL cr);
 8911   format %{ "decode_heap_oop $dst,$src" %}
 8912   ins_encode %{
 8913     Register s = $src$$Register;
 8914     Register d = $dst$$Register;
 8915     if (s != d) {
 8916       __ movq(d, s);
 8917     }
 8918     __ decode_heap_oop(d);
 8919   %}
 8920   ins_pipe(ialu_reg_long);
 8921 %}
 8922 
 8923 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 8924   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 8925             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 8926   match(Set dst (DecodeN src));
 8927   effect(KILL cr);
 8928   format %{ "decode_heap_oop_not_null $dst,$src" %}
 8929   ins_encode %{
 8930     Register s = $src$$Register;
 8931     Register d = $dst$$Register;
 8932     if (s != d) {
 8933       __ decode_heap_oop_not_null(d, s);
 8934     } else {
 8935       __ decode_heap_oop_not_null(d);
 8936     }
 8937   %}
 8938   ins_pipe(ialu_reg_long);
 8939 %}
 8940 
 8941 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8942   match(Set dst (EncodePKlass src));
 8943   effect(TEMP dst, KILL cr);
 8944   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 8945   ins_encode %{
 8946     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 8947   %}
 8948   ins_pipe(ialu_reg_long);
 8949 %}
 8950 
 8951 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 8952   match(Set dst (DecodeNKlass src));
 8953   effect(TEMP dst, KILL cr);
 8954   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 8955   ins_encode %{
 8956     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 8957   %}
 8958   ins_pipe(ialu_reg_long);
 8959 %}
 8960 
 8961 //----------Conditional Move---------------------------------------------------
 8962 // Jump
 8963 // dummy instruction for generating temp registers
 8964 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 8965   match(Jump (LShiftL switch_val shift));
 8966   ins_cost(350);
 8967   predicate(false);
 8968   effect(TEMP dest);
 8969 
 8970   format %{ "leaq    $dest, [$constantaddress]\n\t"
 8971             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 8972   ins_encode %{
 8973     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 8974     // to do that and the compiler is using that register as one it can allocate.
 8975     // So we build it all by hand.
 8976     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 8977     // ArrayAddress dispatch(table, index);
 8978     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 8979     __ lea($dest$$Register, $constantaddress);
 8980     __ jmp(dispatch);
 8981   %}
 8982   ins_pipe(pipe_jmp);
 8983 %}
 8984 
 8985 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 8986   match(Jump (AddL (LShiftL switch_val shift) offset));
 8987   ins_cost(350);
 8988   effect(TEMP dest);
 8989 
 8990   format %{ "leaq    $dest, [$constantaddress]\n\t"
 8991             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 8992   ins_encode %{
 8993     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 8994     // to do that and the compiler is using that register as one it can allocate.
 8995     // So we build it all by hand.
 8996     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 8997     // ArrayAddress dispatch(table, index);
 8998     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 8999     __ lea($dest$$Register, $constantaddress);
 9000     __ jmp(dispatch);
 9001   %}
 9002   ins_pipe(pipe_jmp);
 9003 %}
 9004 
 9005 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 9006   match(Jump switch_val);
 9007   ins_cost(350);
 9008   effect(TEMP dest);
 9009 
 9010   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9011             "jmp     [$dest + $switch_val]\n\t" %}
 9012   ins_encode %{
 9013     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9014     // to do that and the compiler is using that register as one it can allocate.
 9015     // So we build it all by hand.
 9016     // Address index(noreg, switch_reg, Address::times_1);
 9017     // ArrayAddress dispatch(table, index);
 9018     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 9019     __ lea($dest$$Register, $constantaddress);
 9020     __ jmp(dispatch);
 9021   %}
 9022   ins_pipe(pipe_jmp);
 9023 %}
 9024 
 9025 // Conditional move
 9026 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 9027 %{
 9028   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9029   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9030 
 9031   ins_cost(100); // XXX
 9032   format %{ "setbn$cop $dst\t# signed, int" %}
 9033   ins_encode %{
 9034     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9035     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9036   %}
 9037   ins_pipe(ialu_reg);
 9038 %}
 9039 
 9040 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 9041 %{
 9042   predicate(!UseAPX);
 9043   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9044 
 9045   ins_cost(200); // XXX
 9046   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9047   ins_encode %{
 9048     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9049   %}
 9050   ins_pipe(pipe_cmov_reg);
 9051 %}
 9052 
 9053 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 9054 %{
 9055   predicate(UseAPX);
 9056   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9057 
 9058   ins_cost(200);
 9059   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9060   ins_encode %{
 9061     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9062   %}
 9063   ins_pipe(pipe_cmov_reg);
 9064 %}
 9065 
 9066 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 9067 %{
 9068   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9069   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9070 
 9071   ins_cost(100); // XXX
 9072   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9073   ins_encode %{
 9074     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9075     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9076   %}
 9077   ins_pipe(ialu_reg);
 9078 %}
 9079 
 9080 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9081   predicate(!UseAPX);
 9082   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9083 
 9084   ins_cost(200); // XXX
 9085   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9086   ins_encode %{
 9087     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9088   %}
 9089   ins_pipe(pipe_cmov_reg);
 9090 %}
 9091 
 9092 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9093   predicate(UseAPX);
 9094   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9095 
 9096   ins_cost(200);
 9097   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9098   ins_encode %{
 9099     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9100   %}
 9101   ins_pipe(pipe_cmov_reg);
 9102 %}
 9103 
 9104 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9105 %{
 9106   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9107   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9108 
 9109   ins_cost(100); // XXX
 9110   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9111   ins_encode %{
 9112     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9113     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9114   %}
 9115   ins_pipe(ialu_reg);
 9116 %}
 9117 
 9118 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9119   predicate(!UseAPX);
 9120   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9121   ins_cost(200);
 9122   expand %{
 9123     cmovI_regU(cop, cr, dst, src);
 9124   %}
 9125 %}
 9126 
 9127 instruct cmovI_regUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, rRegI src2) %{
 9128   predicate(UseAPX);
 9129   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9130   ins_cost(200);
 9131   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9132   ins_encode %{
 9133     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9134   %}
 9135   ins_pipe(pipe_cmov_reg);
 9136 %}
 9137 
 9138 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9139   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9140   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9141 
 9142   ins_cost(200); // XXX
 9143   format %{ "cmovpl  $dst, $src\n\t"
 9144             "cmovnel $dst, $src" %}
 9145   ins_encode %{
 9146     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9147     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9148   %}
 9149   ins_pipe(pipe_cmov_reg);
 9150 %}
 9151 
 9152 instruct cmovI_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
 9153   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9154   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9155   effect(TEMP dst);
 9156 
 9157   ins_cost(200);
 9158   format %{ "ecmovpl  $dst, $src1, $src2\n\t"
 9159             "cmovnel  $dst, $src2" %}
 9160   ins_encode %{
 9161     __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9162     __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9163   %}
 9164   ins_pipe(pipe_cmov_reg);
 9165 %}
 9166 
 9167 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9168 // inputs of the CMove
 9169 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9170   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9171   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9172   effect(TEMP dst);
 9173 
 9174   ins_cost(200); // XXX
 9175   format %{ "cmovpl  $dst, $src\n\t"
 9176             "cmovnel $dst, $src" %}
 9177   ins_encode %{
 9178     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9179     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9180   %}
 9181   ins_pipe(pipe_cmov_reg);
 9182 %}
 9183 
 9184 // We need this special handling for only eq / neq comparison since NaN == NaN is false,
 9185 // and parity flag bit is set if any of the operand is a NaN.
 9186 instruct cmovI_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
 9187   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9188   match(Set dst (CMoveI (Binary cop cr) (Binary src2 src1)));
 9189   effect(TEMP dst);
 9190 
 9191   ins_cost(200);
 9192   format %{ "ecmovpl  $dst, $src1, $src2\n\t"
 9193             "cmovnel  $dst, $src2" %}
 9194   ins_encode %{
 9195     __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9196     __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9197   %}
 9198   ins_pipe(pipe_cmov_reg);
 9199 %}
 9200 
 9201 // Conditional move
 9202 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9203   predicate(!UseAPX);
 9204   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9205 
 9206   ins_cost(250); // XXX
 9207   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9208   ins_encode %{
 9209     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9210   %}
 9211   ins_pipe(pipe_cmov_mem);
 9212 %}
 9213 
 9214 // Conditional move
 9215 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
 9216 %{
 9217   predicate(UseAPX);
 9218   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9219 
 9220   ins_cost(250);
 9221   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9222   ins_encode %{
 9223     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9224   %}
 9225   ins_pipe(pipe_cmov_mem);
 9226 %}
 9227 
 9228 // Conditional move
 9229 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9230 %{
 9231   predicate(!UseAPX);
 9232   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9233 
 9234   ins_cost(250); // XXX
 9235   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9236   ins_encode %{
 9237     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9238   %}
 9239   ins_pipe(pipe_cmov_mem);
 9240 %}
 9241 
 9242 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9243   predicate(!UseAPX);
 9244   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9245   ins_cost(250);
 9246   expand %{
 9247     cmovI_memU(cop, cr, dst, src);
 9248   %}
 9249 %}
 9250 
 9251 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
 9252 %{
 9253   predicate(UseAPX);
 9254   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9255 
 9256   ins_cost(250);
 9257   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9258   ins_encode %{
 9259     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9260   %}
 9261   ins_pipe(pipe_cmov_mem);
 9262 %}
 9263 
 9264 instruct cmovI_rReg_rReg_memUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, memory src2)
 9265 %{
 9266   predicate(UseAPX);
 9267   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9268   ins_cost(250);
 9269   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9270   ins_encode %{
 9271     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9272   %}
 9273   ins_pipe(pipe_cmov_mem);
 9274 %}
 9275 
 9276 // Conditional move
 9277 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9278 %{
 9279   predicate(!UseAPX);
 9280   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9281 
 9282   ins_cost(200); // XXX
 9283   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9284   ins_encode %{
 9285     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9286   %}
 9287   ins_pipe(pipe_cmov_reg);
 9288 %}
 9289 
 9290 // Conditional move ndd
 9291 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9292 %{
 9293   predicate(UseAPX);
 9294   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9295 
 9296   ins_cost(200);
 9297   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9298   ins_encode %{
 9299     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9300   %}
 9301   ins_pipe(pipe_cmov_reg);
 9302 %}
 9303 
 9304 // Conditional move
 9305 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9306 %{
 9307   predicate(!UseAPX);
 9308   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9309 
 9310   ins_cost(200); // XXX
 9311   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9312   ins_encode %{
 9313     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9314   %}
 9315   ins_pipe(pipe_cmov_reg);
 9316 %}
 9317 
 9318 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9319   predicate(!UseAPX);
 9320   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9321   ins_cost(200);
 9322   expand %{
 9323     cmovN_regU(cop, cr, dst, src);
 9324   %}
 9325 %}
 9326 
 9327 // Conditional move ndd
 9328 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9329 %{
 9330   predicate(UseAPX);
 9331   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9332 
 9333   ins_cost(200);
 9334   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9335   ins_encode %{
 9336     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9337   %}
 9338   ins_pipe(pipe_cmov_reg);
 9339 %}
 9340 
 9341 instruct cmovN_regUCF_ndd(rRegN dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegN src1, rRegN src2) %{
 9342   predicate(UseAPX);
 9343   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9344   ins_cost(200);
 9345   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9346   ins_encode %{
 9347     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9348   %}
 9349   ins_pipe(pipe_cmov_reg);
 9350 %}
 9351 
 9352 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9353   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9354   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9355 
 9356   ins_cost(200); // XXX
 9357   format %{ "cmovpl  $dst, $src\n\t"
 9358             "cmovnel $dst, $src" %}
 9359   ins_encode %{
 9360     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9361     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9362   %}
 9363   ins_pipe(pipe_cmov_reg);
 9364 %}
 9365 
 9366 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9367 // inputs of the CMove
 9368 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9369   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9370   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9371 
 9372   ins_cost(200); // XXX
 9373   format %{ "cmovpl  $dst, $src\n\t"
 9374             "cmovnel $dst, $src" %}
 9375   ins_encode %{
 9376     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9377     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9378   %}
 9379   ins_pipe(pipe_cmov_reg);
 9380 %}
 9381 
 9382 // Conditional move
 9383 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9384 %{
 9385   predicate(!UseAPX);
 9386   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9387 
 9388   ins_cost(200); // XXX
 9389   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9390   ins_encode %{
 9391     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9392   %}
 9393   ins_pipe(pipe_cmov_reg);  // XXX
 9394 %}
 9395 
 9396 // Conditional move ndd
 9397 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9398 %{
 9399   predicate(UseAPX);
 9400   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9401 
 9402   ins_cost(200);
 9403   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9404   ins_encode %{
 9405     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9406   %}
 9407   ins_pipe(pipe_cmov_reg);
 9408 %}
 9409 
 9410 // Conditional move
 9411 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9412 %{
 9413   predicate(!UseAPX);
 9414   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9415 
 9416   ins_cost(200); // XXX
 9417   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9418   ins_encode %{
 9419     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9420   %}
 9421   ins_pipe(pipe_cmov_reg); // XXX
 9422 %}
 9423 
 9424 // Conditional move ndd
 9425 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9426 %{
 9427   predicate(UseAPX);
 9428   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9429 
 9430   ins_cost(200);
 9431   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9432   ins_encode %{
 9433     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9434   %}
 9435   ins_pipe(pipe_cmov_reg);
 9436 %}
 9437 
 9438 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9439   predicate(!UseAPX);
 9440   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9441   ins_cost(200);
 9442   expand %{
 9443     cmovP_regU(cop, cr, dst, src);
 9444   %}
 9445 %}
 9446 
 9447 instruct cmovP_regUCF_ndd(rRegP dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegP src1, rRegP src2) %{
 9448   predicate(UseAPX);
 9449   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9450   ins_cost(200);
 9451   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9452   ins_encode %{
 9453     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9454   %}
 9455   ins_pipe(pipe_cmov_reg);
 9456 %}
 9457 
 9458 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9459   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9460   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9461 
 9462   ins_cost(200); // XXX
 9463   format %{ "cmovpq  $dst, $src\n\t"
 9464             "cmovneq $dst, $src" %}
 9465   ins_encode %{
 9466     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9467     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9468   %}
 9469   ins_pipe(pipe_cmov_reg);
 9470 %}
 9471 
 9472 instruct cmovP_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
 9473   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9474   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9475   effect(TEMP dst);
 9476 
 9477   ins_cost(200);
 9478   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9479             "cmovneq  $dst, $src2" %}
 9480   ins_encode %{
 9481     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9482     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9483   %}
 9484   ins_pipe(pipe_cmov_reg);
 9485 %}
 9486 
 9487 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9488 // inputs of the CMove
 9489 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9490   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9491   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9492 
 9493   ins_cost(200); // XXX
 9494   format %{ "cmovpq  $dst, $src\n\t"
 9495             "cmovneq $dst, $src" %}
 9496   ins_encode %{
 9497     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9498     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9499   %}
 9500   ins_pipe(pipe_cmov_reg);
 9501 %}
 9502 
 9503 instruct cmovP_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
 9504   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9505   match(Set dst (CMoveP (Binary cop cr) (Binary src2 src1)));
 9506   effect(TEMP dst);
 9507 
 9508   ins_cost(200);
 9509   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9510             "cmovneq  $dst, $src2" %}
 9511   ins_encode %{
 9512     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9513     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9514   %}
 9515   ins_pipe(pipe_cmov_reg);
 9516 %}
 9517 
 9518 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9519 %{
 9520   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9521   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9522 
 9523   ins_cost(100); // XXX
 9524   format %{ "setbn$cop $dst\t# signed, long" %}
 9525   ins_encode %{
 9526     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9527     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9528   %}
 9529   ins_pipe(ialu_reg);
 9530 %}
 9531 
 9532 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9533 %{
 9534   predicate(!UseAPX);
 9535   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9536 
 9537   ins_cost(200); // XXX
 9538   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9539   ins_encode %{
 9540     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9541   %}
 9542   ins_pipe(pipe_cmov_reg);  // XXX
 9543 %}
 9544 
 9545 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9546 %{
 9547   predicate(UseAPX);
 9548   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9549 
 9550   ins_cost(200);
 9551   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9552   ins_encode %{
 9553     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9554   %}
 9555   ins_pipe(pipe_cmov_reg);
 9556 %}
 9557 
 9558 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9559 %{
 9560   predicate(!UseAPX);
 9561   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9562 
 9563   ins_cost(200); // XXX
 9564   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9565   ins_encode %{
 9566     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9567   %}
 9568   ins_pipe(pipe_cmov_mem);  // XXX
 9569 %}
 9570 
 9571 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
 9572 %{
 9573   predicate(UseAPX);
 9574   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9575 
 9576   ins_cost(200);
 9577   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9578   ins_encode %{
 9579     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9580   %}
 9581   ins_pipe(pipe_cmov_mem);
 9582 %}
 9583 
 9584 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9585 %{
 9586   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9587   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9588 
 9589   ins_cost(100); // XXX
 9590   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9591   ins_encode %{
 9592     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9593     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9594   %}
 9595   ins_pipe(ialu_reg);
 9596 %}
 9597 
 9598 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9599 %{
 9600   predicate(!UseAPX);
 9601   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9602 
 9603   ins_cost(200); // XXX
 9604   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9605   ins_encode %{
 9606     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9607   %}
 9608   ins_pipe(pipe_cmov_reg); // XXX
 9609 %}
 9610 
 9611 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9612 %{
 9613   predicate(UseAPX);
 9614   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9615 
 9616   ins_cost(200);
 9617   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9618   ins_encode %{
 9619     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9620   %}
 9621   ins_pipe(pipe_cmov_reg);
 9622 %}
 9623 
 9624 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9625 %{
 9626   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9627   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9628 
 9629   ins_cost(100); // XXX
 9630   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9631   ins_encode %{
 9632     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9633     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9634   %}
 9635   ins_pipe(ialu_reg);
 9636 %}
 9637 
 9638 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9639   predicate(!UseAPX);
 9640   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9641   ins_cost(200);
 9642   expand %{
 9643     cmovL_regU(cop, cr, dst, src);
 9644   %}
 9645 %}
 9646 
 9647 instruct cmovL_regUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, rRegL src2)
 9648 %{
 9649   predicate(UseAPX);
 9650   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9651   ins_cost(200);
 9652   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9653   ins_encode %{
 9654     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9655   %}
 9656   ins_pipe(pipe_cmov_reg);
 9657 %}
 9658 
 9659 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9660   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9661   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9662 
 9663   ins_cost(200); // XXX
 9664   format %{ "cmovpq  $dst, $src\n\t"
 9665             "cmovneq $dst, $src" %}
 9666   ins_encode %{
 9667     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9668     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9669   %}
 9670   ins_pipe(pipe_cmov_reg);
 9671 %}
 9672 
 9673 instruct cmovL_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
 9674   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9675   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9676   effect(TEMP dst);
 9677 
 9678   ins_cost(200);
 9679   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9680             "cmovneq  $dst, $src2" %}
 9681   ins_encode %{
 9682     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9683     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9684   %}
 9685   ins_pipe(pipe_cmov_reg);
 9686 %}
 9687 
 9688 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9689 // inputs of the CMove
 9690 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9691   predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9692   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9693 
 9694   ins_cost(200); // XXX
 9695   format %{ "cmovpq  $dst, $src\n\t"
 9696             "cmovneq $dst, $src" %}
 9697   ins_encode %{
 9698     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9699     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9700   %}
 9701   ins_pipe(pipe_cmov_reg);
 9702 %}
 9703 
 9704 instruct cmovL_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
 9705   predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9706   match(Set dst (CMoveL (Binary cop cr) (Binary src2 src1)));
 9707   effect(TEMP dst);
 9708 
 9709   ins_cost(200);
 9710   format %{ "ecmovpq  $dst, $src1, $src2\n\t"
 9711             "cmovneq $dst, $src2" %}
 9712   ins_encode %{
 9713     __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
 9714     __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
 9715   %}
 9716   ins_pipe(pipe_cmov_reg);
 9717 %}
 9718 
 9719 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9720 %{
 9721   predicate(!UseAPX);
 9722   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9723 
 9724   ins_cost(200); // XXX
 9725   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9726   ins_encode %{
 9727     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9728   %}
 9729   ins_pipe(pipe_cmov_mem); // XXX
 9730 %}
 9731 
 9732 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9733   predicate(!UseAPX);
 9734   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9735   ins_cost(200);
 9736   expand %{
 9737     cmovL_memU(cop, cr, dst, src);
 9738   %}
 9739 %}
 9740 
 9741 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
 9742 %{
 9743   predicate(UseAPX);
 9744   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9745 
 9746   ins_cost(200);
 9747   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9748   ins_encode %{
 9749     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9750   %}
 9751   ins_pipe(pipe_cmov_mem);
 9752 %}
 9753 
 9754 instruct cmovL_rReg_rReg_memUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, memory src2)
 9755 %{
 9756   predicate(UseAPX);
 9757   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9758   ins_cost(200);
 9759   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9760   ins_encode %{
 9761     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9762   %}
 9763   ins_pipe(pipe_cmov_mem);
 9764 %}
 9765 
 9766 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9767 %{
 9768   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9769 
 9770   ins_cost(200); // XXX
 9771   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9772             "movss     $dst, $src\n"
 9773     "skip:" %}
 9774   ins_encode %{
 9775     Label Lskip;
 9776     // Invert sense of branch from sense of CMOV
 9777     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9778     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9779     __ bind(Lskip);
 9780   %}
 9781   ins_pipe(pipe_slow);
 9782 %}
 9783 
 9784 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9785 %{
 9786   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9787 
 9788   ins_cost(200); // XXX
 9789   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9790             "movss     $dst, $src\n"
 9791     "skip:" %}
 9792   ins_encode %{
 9793     Label Lskip;
 9794     // Invert sense of branch from sense of CMOV
 9795     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9796     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9797     __ bind(Lskip);
 9798   %}
 9799   ins_pipe(pipe_slow);
 9800 %}
 9801 
 9802 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9803   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9804   ins_cost(200);
 9805   expand %{
 9806     cmovF_regU(cop, cr, dst, src);
 9807   %}
 9808 %}
 9809 
 9810 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9811 %{
 9812   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9813 
 9814   ins_cost(200); // XXX
 9815   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9816             "movsd     $dst, $src\n"
 9817     "skip:" %}
 9818   ins_encode %{
 9819     Label Lskip;
 9820     // Invert sense of branch from sense of CMOV
 9821     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9822     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9823     __ bind(Lskip);
 9824   %}
 9825   ins_pipe(pipe_slow);
 9826 %}
 9827 
 9828 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9829 %{
 9830   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9831 
 9832   ins_cost(200); // XXX
 9833   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9834             "movsd     $dst, $src\n"
 9835     "skip:" %}
 9836   ins_encode %{
 9837     Label Lskip;
 9838     // Invert sense of branch from sense of CMOV
 9839     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9840     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9841     __ bind(Lskip);
 9842   %}
 9843   ins_pipe(pipe_slow);
 9844 %}
 9845 
 9846 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9847   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9848   ins_cost(200);
 9849   expand %{
 9850     cmovD_regU(cop, cr, dst, src);
 9851   %}
 9852 %}
 9853 
 9854 //----------Arithmetic Instructions--------------------------------------------
 9855 //----------Addition Instructions----------------------------------------------
 9856 
 9857 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9858 %{
 9859   predicate(!UseAPX);
 9860   match(Set dst (AddI dst src));
 9861   effect(KILL cr);
 9862   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9863   format %{ "addl    $dst, $src\t# int" %}
 9864   ins_encode %{
 9865     __ addl($dst$$Register, $src$$Register);
 9866   %}
 9867   ins_pipe(ialu_reg_reg);
 9868 %}
 9869 
 9870 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
 9871 %{
 9872   predicate(UseAPX);
 9873   match(Set dst (AddI src1 src2));
 9874   effect(KILL cr);
 9875   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
 9876 
 9877   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9878   ins_encode %{
 9879     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
 9880   %}
 9881   ins_pipe(ialu_reg_reg);
 9882 %}
 9883 
 9884 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9885 %{
 9886   predicate(!UseAPX);
 9887   match(Set dst (AddI dst src));
 9888   effect(KILL cr);
 9889   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9890 
 9891   format %{ "addl    $dst, $src\t# int" %}
 9892   ins_encode %{
 9893     __ addl($dst$$Register, $src$$constant);
 9894   %}
 9895   ins_pipe( ialu_reg );
 9896 %}
 9897 
 9898 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
 9899 %{
 9900   predicate(UseAPX);
 9901   match(Set dst (AddI src1 src2));
 9902   effect(KILL cr);
 9903   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
 9904 
 9905   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9906   ins_encode %{
 9907     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
 9908   %}
 9909   ins_pipe( ialu_reg );
 9910 %}
 9911 
 9912 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
 9913 %{
 9914   predicate(UseAPX);
 9915   match(Set dst (AddI (LoadI src1) src2));
 9916   effect(KILL cr);
 9917   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9918 
 9919   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9920   ins_encode %{
 9921     __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
 9922   %}
 9923   ins_pipe( ialu_reg );
 9924 %}
 9925 
 9926 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 9927 %{
 9928   predicate(!UseAPX);
 9929   match(Set dst (AddI dst (LoadI src)));
 9930   effect(KILL cr);
 9931   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9932 
 9933   ins_cost(150); // XXX
 9934   format %{ "addl    $dst, $src\t# int" %}
 9935   ins_encode %{
 9936     __ addl($dst$$Register, $src$$Address);
 9937   %}
 9938   ins_pipe(ialu_reg_mem);
 9939 %}
 9940 
 9941 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
 9942 %{
 9943   predicate(UseAPX);
 9944   match(Set dst (AddI src1 (LoadI src2)));
 9945   effect(KILL cr);
 9946   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
 9947 
 9948   ins_cost(150);
 9949   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9950   ins_encode %{
 9951     __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
 9952   %}
 9953   ins_pipe(ialu_reg_mem);
 9954 %}
 9955 
 9956 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9957 %{
 9958   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 9959   effect(KILL cr);
 9960   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9961 
 9962   ins_cost(150); // XXX
 9963   format %{ "addl    $dst, $src\t# int" %}
 9964   ins_encode %{
 9965     __ addl($dst$$Address, $src$$Register);
 9966   %}
 9967   ins_pipe(ialu_mem_reg);
 9968 %}
 9969 
 9970 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
 9971 %{
 9972   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 9973   effect(KILL cr);
 9974   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9975 
 9976 
 9977   ins_cost(125); // XXX
 9978   format %{ "addl    $dst, $src\t# int" %}
 9979   ins_encode %{
 9980     __ addl($dst$$Address, $src$$constant);
 9981   %}
 9982   ins_pipe(ialu_mem_imm);
 9983 %}
 9984 
 9985 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
 9986 %{
 9987   predicate(!UseAPX && UseIncDec);
 9988   match(Set dst (AddI dst src));
 9989   effect(KILL cr);
 9990 
 9991   format %{ "incl    $dst\t# int" %}
 9992   ins_encode %{
 9993     __ incrementl($dst$$Register);
 9994   %}
 9995   ins_pipe(ialu_reg);
 9996 %}
 9997 
 9998 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
 9999 %{
10000   predicate(UseAPX && UseIncDec);
10001   match(Set dst (AddI src val));
10002   effect(KILL cr);
10003   flag(PD::Flag_ndd_demotable);
10004 
10005   format %{ "eincl    $dst, $src\t# int ndd" %}
10006   ins_encode %{
10007     __ eincl($dst$$Register, $src$$Register, false);
10008   %}
10009   ins_pipe(ialu_reg);
10010 %}
10011 
10012 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10013 %{
10014   predicate(UseAPX && UseIncDec);
10015   match(Set dst (AddI (LoadI src) val));
10016   effect(KILL cr);
10017 
10018   format %{ "eincl    $dst, $src\t# int ndd" %}
10019   ins_encode %{
10020     __ eincl($dst$$Register, $src$$Address, false);
10021   %}
10022   ins_pipe(ialu_reg);
10023 %}
10024 
10025 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10026 %{
10027   predicate(UseIncDec);
10028   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10029   effect(KILL cr);
10030 
10031   ins_cost(125); // XXX
10032   format %{ "incl    $dst\t# int" %}
10033   ins_encode %{
10034     __ incrementl($dst$$Address);
10035   %}
10036   ins_pipe(ialu_mem_imm);
10037 %}
10038 
10039 // XXX why does that use AddI
10040 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10041 %{
10042   predicate(!UseAPX && UseIncDec);
10043   match(Set dst (AddI dst src));
10044   effect(KILL cr);
10045 
10046   format %{ "decl    $dst\t# int" %}
10047   ins_encode %{
10048     __ decrementl($dst$$Register);
10049   %}
10050   ins_pipe(ialu_reg);
10051 %}
10052 
10053 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10054 %{
10055   predicate(UseAPX && UseIncDec);
10056   match(Set dst (AddI src val));
10057   effect(KILL cr);
10058   flag(PD::Flag_ndd_demotable);
10059 
10060   format %{ "edecl    $dst, $src\t# int ndd" %}
10061   ins_encode %{
10062     __ edecl($dst$$Register, $src$$Register, false);
10063   %}
10064   ins_pipe(ialu_reg);
10065 %}
10066 
10067 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10068 %{
10069   predicate(UseAPX && UseIncDec);
10070   match(Set dst (AddI (LoadI src) val));
10071   effect(KILL cr);
10072 
10073   format %{ "edecl    $dst, $src\t# int ndd" %}
10074   ins_encode %{
10075     __ edecl($dst$$Register, $src$$Address, false);
10076   %}
10077   ins_pipe(ialu_reg);
10078 %}
10079 
10080 // XXX why does that use AddI
10081 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10082 %{
10083   predicate(UseIncDec);
10084   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10085   effect(KILL cr);
10086 
10087   ins_cost(125); // XXX
10088   format %{ "decl    $dst\t# int" %}
10089   ins_encode %{
10090     __ decrementl($dst$$Address);
10091   %}
10092   ins_pipe(ialu_mem_imm);
10093 %}
10094 
10095 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10096 %{
10097   predicate(VM_Version::supports_fast_2op_lea());
10098   match(Set dst (AddI (LShiftI index scale) disp));
10099 
10100   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10101   ins_encode %{
10102     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10103     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10104   %}
10105   ins_pipe(ialu_reg_reg);
10106 %}
10107 
10108 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10109 %{
10110   predicate(VM_Version::supports_fast_3op_lea());
10111   match(Set dst (AddI (AddI base index) disp));
10112 
10113   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10114   ins_encode %{
10115     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10116   %}
10117   ins_pipe(ialu_reg_reg);
10118 %}
10119 
10120 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10121 %{
10122   predicate(VM_Version::supports_fast_2op_lea());
10123   match(Set dst (AddI base (LShiftI index scale)));
10124 
10125   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10126   ins_encode %{
10127     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10128     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10129   %}
10130   ins_pipe(ialu_reg_reg);
10131 %}
10132 
10133 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10134 %{
10135   predicate(VM_Version::supports_fast_3op_lea());
10136   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10137 
10138   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10139   ins_encode %{
10140     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10141     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10142   %}
10143   ins_pipe(ialu_reg_reg);
10144 %}
10145 
10146 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10147 %{
10148   predicate(!UseAPX);
10149   match(Set dst (AddL dst src));
10150   effect(KILL cr);
10151   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10152 
10153   format %{ "addq    $dst, $src\t# long" %}
10154   ins_encode %{
10155     __ addq($dst$$Register, $src$$Register);
10156   %}
10157   ins_pipe(ialu_reg_reg);
10158 %}
10159 
10160 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10161 %{
10162   predicate(UseAPX);
10163   match(Set dst (AddL src1 src2));
10164   effect(KILL cr);
10165   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
10166 
10167   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10168   ins_encode %{
10169     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10170   %}
10171   ins_pipe(ialu_reg_reg);
10172 %}
10173 
10174 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10175 %{
10176   predicate(!UseAPX);
10177   match(Set dst (AddL dst src));
10178   effect(KILL cr);
10179   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10180 
10181   format %{ "addq    $dst, $src\t# long" %}
10182   ins_encode %{
10183     __ addq($dst$$Register, $src$$constant);
10184   %}
10185   ins_pipe( ialu_reg );
10186 %}
10187 
10188 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10189 %{
10190   predicate(UseAPX);
10191   match(Set dst (AddL src1 src2));
10192   effect(KILL cr);
10193   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
10194 
10195   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10196   ins_encode %{
10197     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10198   %}
10199   ins_pipe( ialu_reg );
10200 %}
10201 
10202 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10203 %{
10204   predicate(UseAPX);
10205   match(Set dst (AddL (LoadL src1) src2));
10206   effect(KILL cr);
10207   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10208 
10209   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10210   ins_encode %{
10211     __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10212   %}
10213   ins_pipe( ialu_reg );
10214 %}
10215 
10216 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10217 %{
10218   predicate(!UseAPX);
10219   match(Set dst (AddL dst (LoadL src)));
10220   effect(KILL cr);
10221   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10222 
10223   ins_cost(150); // XXX
10224   format %{ "addq    $dst, $src\t# long" %}
10225   ins_encode %{
10226     __ addq($dst$$Register, $src$$Address);
10227   %}
10228   ins_pipe(ialu_reg_mem);
10229 %}
10230 
10231 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10232 %{
10233   predicate(UseAPX);
10234   match(Set dst (AddL src1 (LoadL src2)));
10235   effect(KILL cr);
10236   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_commutative);
10237 
10238   ins_cost(150);
10239   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10240   ins_encode %{
10241     __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10242   %}
10243   ins_pipe(ialu_reg_mem);
10244 %}
10245 
10246 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10247 %{
10248   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10249   effect(KILL cr);
10250   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10251 
10252   ins_cost(150); // XXX
10253   format %{ "addq    $dst, $src\t# long" %}
10254   ins_encode %{
10255     __ addq($dst$$Address, $src$$Register);
10256   %}
10257   ins_pipe(ialu_mem_reg);
10258 %}
10259 
10260 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10261 %{
10262   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10263   effect(KILL cr);
10264   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10265 
10266   ins_cost(125); // XXX
10267   format %{ "addq    $dst, $src\t# long" %}
10268   ins_encode %{
10269     __ addq($dst$$Address, $src$$constant);
10270   %}
10271   ins_pipe(ialu_mem_imm);
10272 %}
10273 
10274 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10275 %{
10276   predicate(!UseAPX && UseIncDec);
10277   match(Set dst (AddL dst src));
10278   effect(KILL cr);
10279 
10280   format %{ "incq    $dst\t# long" %}
10281   ins_encode %{
10282     __ incrementq($dst$$Register);
10283   %}
10284   ins_pipe(ialu_reg);
10285 %}
10286 
10287 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10288 %{
10289   predicate(UseAPX && UseIncDec);
10290   match(Set dst (AddL src val));
10291   effect(KILL cr);
10292   flag(PD::Flag_ndd_demotable);
10293 
10294   format %{ "eincq    $dst, $src\t# long ndd" %}
10295   ins_encode %{
10296     __ eincq($dst$$Register, $src$$Register, false);
10297   %}
10298   ins_pipe(ialu_reg);
10299 %}
10300 
10301 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10302 %{
10303   predicate(UseAPX && UseIncDec);
10304   match(Set dst (AddL (LoadL src) val));
10305   effect(KILL cr);
10306 
10307   format %{ "eincq    $dst, $src\t# long ndd" %}
10308   ins_encode %{
10309     __ eincq($dst$$Register, $src$$Address, false);
10310   %}
10311   ins_pipe(ialu_reg);
10312 %}
10313 
10314 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10315 %{
10316   predicate(UseIncDec);
10317   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10318   effect(KILL cr);
10319 
10320   ins_cost(125); // XXX
10321   format %{ "incq    $dst\t# long" %}
10322   ins_encode %{
10323     __ incrementq($dst$$Address);
10324   %}
10325   ins_pipe(ialu_mem_imm);
10326 %}
10327 
10328 // XXX why does that use AddL
10329 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10330 %{
10331   predicate(!UseAPX && UseIncDec);
10332   match(Set dst (AddL dst src));
10333   effect(KILL cr);
10334 
10335   format %{ "decq    $dst\t# long" %}
10336   ins_encode %{
10337     __ decrementq($dst$$Register);
10338   %}
10339   ins_pipe(ialu_reg);
10340 %}
10341 
10342 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10343 %{
10344   predicate(UseAPX && UseIncDec);
10345   match(Set dst (AddL src val));
10346   effect(KILL cr);
10347   flag(PD::Flag_ndd_demotable);
10348 
10349   format %{ "edecq    $dst, $src\t# long ndd" %}
10350   ins_encode %{
10351     __ edecq($dst$$Register, $src$$Register, false);
10352   %}
10353   ins_pipe(ialu_reg);
10354 %}
10355 
10356 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10357 %{
10358   predicate(UseAPX && UseIncDec);
10359   match(Set dst (AddL (LoadL src) val));
10360   effect(KILL cr);
10361 
10362   format %{ "edecq    $dst, $src\t# long ndd" %}
10363   ins_encode %{
10364     __ edecq($dst$$Register, $src$$Address, false);
10365   %}
10366   ins_pipe(ialu_reg);
10367 %}
10368 
10369 // XXX why does that use AddL
10370 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10371 %{
10372   predicate(UseIncDec);
10373   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10374   effect(KILL cr);
10375 
10376   ins_cost(125); // XXX
10377   format %{ "decq    $dst\t# long" %}
10378   ins_encode %{
10379     __ decrementq($dst$$Address);
10380   %}
10381   ins_pipe(ialu_mem_imm);
10382 %}
10383 
10384 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10385 %{
10386   predicate(VM_Version::supports_fast_2op_lea());
10387   match(Set dst (AddL (LShiftL index scale) disp));
10388 
10389   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10390   ins_encode %{
10391     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10392     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10393   %}
10394   ins_pipe(ialu_reg_reg);
10395 %}
10396 
10397 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10398 %{
10399   predicate(VM_Version::supports_fast_3op_lea());
10400   match(Set dst (AddL (AddL base index) disp));
10401 
10402   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10403   ins_encode %{
10404     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10405   %}
10406   ins_pipe(ialu_reg_reg);
10407 %}
10408 
10409 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10410 %{
10411   predicate(VM_Version::supports_fast_2op_lea());
10412   match(Set dst (AddL base (LShiftL index scale)));
10413 
10414   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10415   ins_encode %{
10416     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10417     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10418   %}
10419   ins_pipe(ialu_reg_reg);
10420 %}
10421 
10422 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10423 %{
10424   predicate(VM_Version::supports_fast_3op_lea());
10425   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10426 
10427   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10428   ins_encode %{
10429     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10430     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10431   %}
10432   ins_pipe(ialu_reg_reg);
10433 %}
10434 
10435 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10436 %{
10437   match(Set dst (AddP dst src));
10438   effect(KILL cr);
10439   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10440 
10441   format %{ "addq    $dst, $src\t# ptr" %}
10442   ins_encode %{
10443     __ addq($dst$$Register, $src$$Register);
10444   %}
10445   ins_pipe(ialu_reg_reg);
10446 %}
10447 
10448 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10449 %{
10450   match(Set dst (AddP dst src));
10451   effect(KILL cr);
10452   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10453 
10454   format %{ "addq    $dst, $src\t# ptr" %}
10455   ins_encode %{
10456     __ addq($dst$$Register, $src$$constant);
10457   %}
10458   ins_pipe( ialu_reg );
10459 %}
10460 
10461 // XXX addP mem ops ????
10462 
10463 instruct checkCastPP(rRegP dst)
10464 %{
10465   match(Set dst (CheckCastPP dst));
10466 
10467   size(0);
10468   format %{ "# checkcastPP of $dst" %}
10469   ins_encode(/* empty encoding */);
10470   ins_pipe(empty);
10471 %}
10472 
10473 instruct castPP(rRegP dst)
10474 %{
10475   match(Set dst (CastPP dst));
10476 
10477   size(0);
10478   format %{ "# castPP of $dst" %}
10479   ins_encode(/* empty encoding */);
10480   ins_pipe(empty);
10481 %}
10482 
10483 instruct castII(rRegI dst)
10484 %{
10485   predicate(VerifyConstraintCasts == 0);
10486   match(Set dst (CastII dst));
10487 
10488   size(0);
10489   format %{ "# castII of $dst" %}
10490   ins_encode(/* empty encoding */);
10491   ins_cost(0);
10492   ins_pipe(empty);
10493 %}
10494 
10495 instruct castII_checked(rRegI dst, rFlagsReg cr)
10496 %{
10497   predicate(VerifyConstraintCasts > 0);
10498   match(Set dst (CastII dst));
10499 
10500   effect(KILL cr);
10501   format %{ "# cast_checked_II $dst" %}
10502   ins_encode %{
10503     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10504   %}
10505   ins_pipe(pipe_slow);
10506 %}
10507 
10508 instruct castLL(rRegL dst)
10509 %{
10510   predicate(VerifyConstraintCasts == 0);
10511   match(Set dst (CastLL dst));
10512 
10513   size(0);
10514   format %{ "# castLL of $dst" %}
10515   ins_encode(/* empty encoding */);
10516   ins_cost(0);
10517   ins_pipe(empty);
10518 %}
10519 
10520 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10521 %{
10522   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10523   match(Set dst (CastLL dst));
10524 
10525   effect(KILL cr);
10526   format %{ "# cast_checked_LL $dst" %}
10527   ins_encode %{
10528     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10529   %}
10530   ins_pipe(pipe_slow);
10531 %}
10532 
10533 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10534 %{
10535   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10536   match(Set dst (CastLL dst));
10537 
10538   effect(KILL cr, TEMP tmp);
10539   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10540   ins_encode %{
10541     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10542   %}
10543   ins_pipe(pipe_slow);
10544 %}
10545 
10546 instruct castFF(regF dst)
10547 %{
10548   match(Set dst (CastFF dst));
10549 
10550   size(0);
10551   format %{ "# castFF of $dst" %}
10552   ins_encode(/* empty encoding */);
10553   ins_cost(0);
10554   ins_pipe(empty);
10555 %}
10556 
10557 instruct castHH(regF dst)
10558 %{
10559   match(Set dst (CastHH dst));
10560 
10561   size(0);
10562   format %{ "# castHH of $dst" %}
10563   ins_encode(/* empty encoding */);
10564   ins_cost(0);
10565   ins_pipe(empty);
10566 %}
10567 
10568 instruct castDD(regD dst)
10569 %{
10570   match(Set dst (CastDD dst));
10571 
10572   size(0);
10573   format %{ "# castDD of $dst" %}
10574   ins_encode(/* empty encoding */);
10575   ins_cost(0);
10576   ins_pipe(empty);
10577 %}
10578 
10579 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10580 instruct compareAndSwapP(rRegI res,
10581                          memory mem_ptr,
10582                          rax_RegP oldval, rRegP newval,
10583                          rFlagsReg cr)
10584 %{
10585   predicate(n->as_LoadStore()->barrier_data() == 0);
10586   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10587   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10588   effect(KILL cr, KILL oldval);
10589 
10590   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10591             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10592             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10593   ins_encode %{
10594     __ lock();
10595     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10596     __ setcc(Assembler::equal, $res$$Register);
10597   %}
10598   ins_pipe( pipe_cmpxchg );
10599 %}
10600 
10601 instruct compareAndSwapL(rRegI res,
10602                          memory mem_ptr,
10603                          rax_RegL oldval, rRegL newval,
10604                          rFlagsReg cr)
10605 %{
10606   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10607   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10608   effect(KILL cr, KILL oldval);
10609 
10610   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10611             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10612             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10613   ins_encode %{
10614     __ lock();
10615     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10616     __ setcc(Assembler::equal, $res$$Register);
10617   %}
10618   ins_pipe( pipe_cmpxchg );
10619 %}
10620 
10621 instruct compareAndSwapI(rRegI res,
10622                          memory mem_ptr,
10623                          rax_RegI oldval, rRegI newval,
10624                          rFlagsReg cr)
10625 %{
10626   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10627   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10628   effect(KILL cr, KILL oldval);
10629 
10630   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10631             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10632             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10633   ins_encode %{
10634     __ lock();
10635     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10636     __ setcc(Assembler::equal, $res$$Register);
10637   %}
10638   ins_pipe( pipe_cmpxchg );
10639 %}
10640 
10641 instruct compareAndSwapB(rRegI res,
10642                          memory mem_ptr,
10643                          rax_RegI oldval, rRegI newval,
10644                          rFlagsReg cr)
10645 %{
10646   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10647   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10648   effect(KILL cr, KILL oldval);
10649 
10650   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10651             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10652             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10653   ins_encode %{
10654     __ lock();
10655     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10656     __ setcc(Assembler::equal, $res$$Register);
10657   %}
10658   ins_pipe( pipe_cmpxchg );
10659 %}
10660 
10661 instruct compareAndSwapS(rRegI res,
10662                          memory mem_ptr,
10663                          rax_RegI oldval, rRegI newval,
10664                          rFlagsReg cr)
10665 %{
10666   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10667   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10668   effect(KILL cr, KILL oldval);
10669 
10670   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10671             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10672             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10673   ins_encode %{
10674     __ lock();
10675     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10676     __ setcc(Assembler::equal, $res$$Register);
10677   %}
10678   ins_pipe( pipe_cmpxchg );
10679 %}
10680 
10681 instruct compareAndSwapN(rRegI res,
10682                           memory mem_ptr,
10683                           rax_RegN oldval, rRegN newval,
10684                           rFlagsReg cr) %{
10685   predicate(n->as_LoadStore()->barrier_data() == 0);
10686   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10687   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10688   effect(KILL cr, KILL oldval);
10689 
10690   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10691             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10692             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10693   ins_encode %{
10694     __ lock();
10695     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10696     __ setcc(Assembler::equal, $res$$Register);
10697   %}
10698   ins_pipe( pipe_cmpxchg );
10699 %}
10700 
10701 instruct compareAndExchangeB(
10702                          memory mem_ptr,
10703                          rax_RegI oldval, rRegI newval,
10704                          rFlagsReg cr)
10705 %{
10706   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10707   effect(KILL cr);
10708 
10709   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10710             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10711   ins_encode %{
10712     __ lock();
10713     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10714   %}
10715   ins_pipe( pipe_cmpxchg );
10716 %}
10717 
10718 instruct compareAndExchangeS(
10719                          memory mem_ptr,
10720                          rax_RegI oldval, rRegI newval,
10721                          rFlagsReg cr)
10722 %{
10723   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10724   effect(KILL cr);
10725 
10726   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10727             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10728   ins_encode %{
10729     __ lock();
10730     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10731   %}
10732   ins_pipe( pipe_cmpxchg );
10733 %}
10734 
10735 instruct compareAndExchangeI(
10736                          memory mem_ptr,
10737                          rax_RegI oldval, rRegI newval,
10738                          rFlagsReg cr)
10739 %{
10740   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10741   effect(KILL cr);
10742 
10743   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10744             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10745   ins_encode %{
10746     __ lock();
10747     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10748   %}
10749   ins_pipe( pipe_cmpxchg );
10750 %}
10751 
10752 instruct compareAndExchangeL(
10753                          memory mem_ptr,
10754                          rax_RegL oldval, rRegL newval,
10755                          rFlagsReg cr)
10756 %{
10757   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10758   effect(KILL cr);
10759 
10760   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10761             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10762   ins_encode %{
10763     __ lock();
10764     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10765   %}
10766   ins_pipe( pipe_cmpxchg );
10767 %}
10768 
10769 instruct compareAndExchangeN(
10770                           memory mem_ptr,
10771                           rax_RegN oldval, rRegN newval,
10772                           rFlagsReg cr) %{
10773   predicate(n->as_LoadStore()->barrier_data() == 0);
10774   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10775   effect(KILL cr);
10776 
10777   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10778             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10779   ins_encode %{
10780     __ lock();
10781     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10782   %}
10783   ins_pipe( pipe_cmpxchg );
10784 %}
10785 
10786 instruct compareAndExchangeP(
10787                          memory mem_ptr,
10788                          rax_RegP oldval, rRegP newval,
10789                          rFlagsReg cr)
10790 %{
10791   predicate(n->as_LoadStore()->barrier_data() == 0);
10792   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10793   effect(KILL cr);
10794 
10795   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10796             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10797   ins_encode %{
10798     __ lock();
10799     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10800   %}
10801   ins_pipe( pipe_cmpxchg );
10802 %}
10803 
10804 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10805   predicate(n->as_LoadStore()->result_not_used());
10806   match(Set dummy (GetAndAddB mem add));
10807   effect(KILL cr);
10808   format %{ "addb_lock   $mem, $add" %}
10809   ins_encode %{
10810     __ lock();
10811     __ addb($mem$$Address, $add$$Register);
10812   %}
10813   ins_pipe(pipe_cmpxchg);
10814 %}
10815 
10816 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10817   predicate(n->as_LoadStore()->result_not_used());
10818   match(Set dummy (GetAndAddB mem add));
10819   effect(KILL cr);
10820   format %{ "addb_lock   $mem, $add" %}
10821   ins_encode %{
10822     __ lock();
10823     __ addb($mem$$Address, $add$$constant);
10824   %}
10825   ins_pipe(pipe_cmpxchg);
10826 %}
10827 
10828 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10829   predicate(!n->as_LoadStore()->result_not_used());
10830   match(Set newval (GetAndAddB mem newval));
10831   effect(KILL cr);
10832   format %{ "xaddb_lock  $mem, $newval" %}
10833   ins_encode %{
10834     __ lock();
10835     __ xaddb($mem$$Address, $newval$$Register);
10836   %}
10837   ins_pipe(pipe_cmpxchg);
10838 %}
10839 
10840 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10841   predicate(n->as_LoadStore()->result_not_used());
10842   match(Set dummy (GetAndAddS mem add));
10843   effect(KILL cr);
10844   format %{ "addw_lock   $mem, $add" %}
10845   ins_encode %{
10846     __ lock();
10847     __ addw($mem$$Address, $add$$Register);
10848   %}
10849   ins_pipe(pipe_cmpxchg);
10850 %}
10851 
10852 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10853   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10854   match(Set dummy (GetAndAddS mem add));
10855   effect(KILL cr);
10856   format %{ "addw_lock   $mem, $add" %}
10857   ins_encode %{
10858     __ lock();
10859     __ addw($mem$$Address, $add$$constant);
10860   %}
10861   ins_pipe(pipe_cmpxchg);
10862 %}
10863 
10864 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10865   predicate(!n->as_LoadStore()->result_not_used());
10866   match(Set newval (GetAndAddS mem newval));
10867   effect(KILL cr);
10868   format %{ "xaddw_lock  $mem, $newval" %}
10869   ins_encode %{
10870     __ lock();
10871     __ xaddw($mem$$Address, $newval$$Register);
10872   %}
10873   ins_pipe(pipe_cmpxchg);
10874 %}
10875 
10876 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10877   predicate(n->as_LoadStore()->result_not_used());
10878   match(Set dummy (GetAndAddI mem add));
10879   effect(KILL cr);
10880   format %{ "addl_lock   $mem, $add" %}
10881   ins_encode %{
10882     __ lock();
10883     __ addl($mem$$Address, $add$$Register);
10884   %}
10885   ins_pipe(pipe_cmpxchg);
10886 %}
10887 
10888 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10889   predicate(n->as_LoadStore()->result_not_used());
10890   match(Set dummy (GetAndAddI mem add));
10891   effect(KILL cr);
10892   format %{ "addl_lock   $mem, $add" %}
10893   ins_encode %{
10894     __ lock();
10895     __ addl($mem$$Address, $add$$constant);
10896   %}
10897   ins_pipe(pipe_cmpxchg);
10898 %}
10899 
10900 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10901   predicate(!n->as_LoadStore()->result_not_used());
10902   match(Set newval (GetAndAddI mem newval));
10903   effect(KILL cr);
10904   format %{ "xaddl_lock  $mem, $newval" %}
10905   ins_encode %{
10906     __ lock();
10907     __ xaddl($mem$$Address, $newval$$Register);
10908   %}
10909   ins_pipe(pipe_cmpxchg);
10910 %}
10911 
10912 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10913   predicate(n->as_LoadStore()->result_not_used());
10914   match(Set dummy (GetAndAddL mem add));
10915   effect(KILL cr);
10916   format %{ "addq_lock   $mem, $add" %}
10917   ins_encode %{
10918     __ lock();
10919     __ addq($mem$$Address, $add$$Register);
10920   %}
10921   ins_pipe(pipe_cmpxchg);
10922 %}
10923 
10924 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10925   predicate(n->as_LoadStore()->result_not_used());
10926   match(Set dummy (GetAndAddL mem add));
10927   effect(KILL cr);
10928   format %{ "addq_lock   $mem, $add" %}
10929   ins_encode %{
10930     __ lock();
10931     __ addq($mem$$Address, $add$$constant);
10932   %}
10933   ins_pipe(pipe_cmpxchg);
10934 %}
10935 
10936 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10937   predicate(!n->as_LoadStore()->result_not_used());
10938   match(Set newval (GetAndAddL mem newval));
10939   effect(KILL cr);
10940   format %{ "xaddq_lock  $mem, $newval" %}
10941   ins_encode %{
10942     __ lock();
10943     __ xaddq($mem$$Address, $newval$$Register);
10944   %}
10945   ins_pipe(pipe_cmpxchg);
10946 %}
10947 
10948 instruct xchgB( memory mem, rRegI newval) %{
10949   match(Set newval (GetAndSetB mem newval));
10950   format %{ "XCHGB  $newval,[$mem]" %}
10951   ins_encode %{
10952     __ xchgb($newval$$Register, $mem$$Address);
10953   %}
10954   ins_pipe( pipe_cmpxchg );
10955 %}
10956 
10957 instruct xchgS( memory mem, rRegI newval) %{
10958   match(Set newval (GetAndSetS mem newval));
10959   format %{ "XCHGW  $newval,[$mem]" %}
10960   ins_encode %{
10961     __ xchgw($newval$$Register, $mem$$Address);
10962   %}
10963   ins_pipe( pipe_cmpxchg );
10964 %}
10965 
10966 instruct xchgI( memory mem, rRegI newval) %{
10967   match(Set newval (GetAndSetI mem newval));
10968   format %{ "XCHGL  $newval,[$mem]" %}
10969   ins_encode %{
10970     __ xchgl($newval$$Register, $mem$$Address);
10971   %}
10972   ins_pipe( pipe_cmpxchg );
10973 %}
10974 
10975 instruct xchgL( memory mem, rRegL newval) %{
10976   match(Set newval (GetAndSetL mem newval));
10977   format %{ "XCHGL  $newval,[$mem]" %}
10978   ins_encode %{
10979     __ xchgq($newval$$Register, $mem$$Address);
10980   %}
10981   ins_pipe( pipe_cmpxchg );
10982 %}
10983 
10984 instruct xchgP( memory mem, rRegP newval) %{
10985   match(Set newval (GetAndSetP mem newval));
10986   predicate(n->as_LoadStore()->barrier_data() == 0);
10987   format %{ "XCHGQ  $newval,[$mem]" %}
10988   ins_encode %{
10989     __ xchgq($newval$$Register, $mem$$Address);
10990   %}
10991   ins_pipe( pipe_cmpxchg );
10992 %}
10993 
10994 instruct xchgN( memory mem, rRegN newval) %{
10995   predicate(n->as_LoadStore()->barrier_data() == 0);
10996   match(Set newval (GetAndSetN mem newval));
10997   format %{ "XCHGL  $newval,$mem]" %}
10998   ins_encode %{
10999     __ xchgl($newval$$Register, $mem$$Address);
11000   %}
11001   ins_pipe( pipe_cmpxchg );
11002 %}
11003 
11004 //----------Abs Instructions-------------------------------------------
11005 
11006 // Integer Absolute Instructions
11007 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11008 %{
11009   match(Set dst (AbsI src));
11010   effect(TEMP dst, KILL cr);
11011   format %{ "xorl    $dst, $dst\t# abs int\n\t"
11012             "subl    $dst, $src\n\t"
11013             "cmovll  $dst, $src" %}
11014   ins_encode %{
11015     __ xorl($dst$$Register, $dst$$Register);
11016     __ subl($dst$$Register, $src$$Register);
11017     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11018   %}
11019 
11020   ins_pipe(ialu_reg_reg);
11021 %}
11022 
11023 // Long Absolute Instructions
11024 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11025 %{
11026   match(Set dst (AbsL src));
11027   effect(TEMP dst, KILL cr);
11028   format %{ "xorl    $dst, $dst\t# abs long\n\t"
11029             "subq    $dst, $src\n\t"
11030             "cmovlq  $dst, $src" %}
11031   ins_encode %{
11032     __ xorl($dst$$Register, $dst$$Register);
11033     __ subq($dst$$Register, $src$$Register);
11034     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11035   %}
11036 
11037   ins_pipe(ialu_reg_reg);
11038 %}
11039 
11040 //----------Subtraction Instructions-------------------------------------------
11041 
11042 // Integer Subtraction Instructions
11043 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11044 %{
11045   predicate(!UseAPX);
11046   match(Set dst (SubI dst src));
11047   effect(KILL cr);
11048   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11049 
11050   format %{ "subl    $dst, $src\t# int" %}
11051   ins_encode %{
11052     __ subl($dst$$Register, $src$$Register);
11053   %}
11054   ins_pipe(ialu_reg_reg);
11055 %}
11056 
11057 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11058 %{
11059   predicate(UseAPX);
11060   match(Set dst (SubI src1 src2));
11061   effect(KILL cr);
11062   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11063 
11064   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11065   ins_encode %{
11066     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11067   %}
11068   ins_pipe(ialu_reg_reg);
11069 %}
11070 
11071 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11072 %{
11073   predicate(UseAPX);
11074   match(Set dst (SubI src1 src2));
11075   effect(KILL cr);
11076   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11077 
11078   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11079   ins_encode %{
11080     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11081   %}
11082   ins_pipe(ialu_reg_reg);
11083 %}
11084 
11085 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11086 %{
11087   predicate(UseAPX);
11088   match(Set dst (SubI (LoadI src1) src2));
11089   effect(KILL cr);
11090   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11091 
11092   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11093   ins_encode %{
11094     __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11095   %}
11096   ins_pipe(ialu_reg_reg);
11097 %}
11098 
11099 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11100 %{
11101   predicate(!UseAPX);
11102   match(Set dst (SubI dst (LoadI src)));
11103   effect(KILL cr);
11104   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11105 
11106   ins_cost(150);
11107   format %{ "subl    $dst, $src\t# int" %}
11108   ins_encode %{
11109     __ subl($dst$$Register, $src$$Address);
11110   %}
11111   ins_pipe(ialu_reg_mem);
11112 %}
11113 
11114 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11115 %{
11116   predicate(UseAPX);
11117   match(Set dst (SubI src1 (LoadI src2)));
11118   effect(KILL cr);
11119   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11120 
11121   ins_cost(150);
11122   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11123   ins_encode %{
11124     __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11125   %}
11126   ins_pipe(ialu_reg_mem);
11127 %}
11128 
11129 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11130 %{
11131   predicate(UseAPX);
11132   match(Set dst (SubI (LoadI src1) src2));
11133   effect(KILL cr);
11134   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11135 
11136   ins_cost(150);
11137   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11138   ins_encode %{
11139     __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11140   %}
11141   ins_pipe(ialu_reg_mem);
11142 %}
11143 
11144 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11145 %{
11146   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11147   effect(KILL cr);
11148   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11149 
11150   ins_cost(150);
11151   format %{ "subl    $dst, $src\t# int" %}
11152   ins_encode %{
11153     __ subl($dst$$Address, $src$$Register);
11154   %}
11155   ins_pipe(ialu_mem_reg);
11156 %}
11157 
11158 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11159 %{
11160   predicate(!UseAPX);
11161   match(Set dst (SubL dst src));
11162   effect(KILL cr);
11163   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11164 
11165   format %{ "subq    $dst, $src\t# long" %}
11166   ins_encode %{
11167     __ subq($dst$$Register, $src$$Register);
11168   %}
11169   ins_pipe(ialu_reg_reg);
11170 %}
11171 
11172 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11173 %{
11174   predicate(UseAPX);
11175   match(Set dst (SubL src1 src2));
11176   effect(KILL cr);
11177   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11178 
11179   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11180   ins_encode %{
11181     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11182   %}
11183   ins_pipe(ialu_reg_reg);
11184 %}
11185 
11186 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11187 %{
11188   predicate(UseAPX);
11189   match(Set dst (SubL src1 src2));
11190   effect(KILL cr);
11191   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11192 
11193   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11194   ins_encode %{
11195     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11196   %}
11197   ins_pipe(ialu_reg_reg);
11198 %}
11199 
11200 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11201 %{
11202   predicate(UseAPX);
11203   match(Set dst (SubL (LoadL src1) src2));
11204   effect(KILL cr);
11205   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11206 
11207   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11208   ins_encode %{
11209     __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11210   %}
11211   ins_pipe(ialu_reg_reg);
11212 %}
11213 
11214 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11215 %{
11216   predicate(!UseAPX);
11217   match(Set dst (SubL dst (LoadL src)));
11218   effect(KILL cr);
11219   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11220 
11221   ins_cost(150);
11222   format %{ "subq    $dst, $src\t# long" %}
11223   ins_encode %{
11224     __ subq($dst$$Register, $src$$Address);
11225   %}
11226   ins_pipe(ialu_reg_mem);
11227 %}
11228 
11229 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11230 %{
11231   predicate(UseAPX);
11232   match(Set dst (SubL src1 (LoadL src2)));
11233   effect(KILL cr);
11234   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11235 
11236   ins_cost(150);
11237   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11238   ins_encode %{
11239     __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11240   %}
11241   ins_pipe(ialu_reg_mem);
11242 %}
11243 
11244 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11245 %{
11246   predicate(UseAPX);
11247   match(Set dst (SubL (LoadL src1) src2));
11248   effect(KILL cr);
11249   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11250 
11251   ins_cost(150);
11252   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11253   ins_encode %{
11254     __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11255   %}
11256   ins_pipe(ialu_reg_mem);
11257 %}
11258 
11259 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11260 %{
11261   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11262   effect(KILL cr);
11263   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11264 
11265   ins_cost(150);
11266   format %{ "subq    $dst, $src\t# long" %}
11267   ins_encode %{
11268     __ subq($dst$$Address, $src$$Register);
11269   %}
11270   ins_pipe(ialu_mem_reg);
11271 %}
11272 
11273 // Subtract from a pointer
11274 // XXX hmpf???
11275 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11276 %{
11277   match(Set dst (AddP dst (SubI zero src)));
11278   effect(KILL cr);
11279 
11280   format %{ "subq    $dst, $src\t# ptr - int" %}
11281   ins_encode %{
11282     __ subq($dst$$Register, $src$$Register);
11283   %}
11284   ins_pipe(ialu_reg_reg);
11285 %}
11286 
11287 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11288 %{
11289   predicate(!UseAPX);
11290   match(Set dst (SubI zero dst));
11291   effect(KILL cr);
11292   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11293 
11294   format %{ "negl    $dst\t# int" %}
11295   ins_encode %{
11296     __ negl($dst$$Register);
11297   %}
11298   ins_pipe(ialu_reg);
11299 %}
11300 
11301 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11302 %{
11303   predicate(UseAPX);
11304   match(Set dst (SubI zero src));
11305   effect(KILL cr);
11306   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11307 
11308   format %{ "enegl    $dst, $src\t# int ndd" %}
11309   ins_encode %{
11310     __ enegl($dst$$Register, $src$$Register, false);
11311   %}
11312   ins_pipe(ialu_reg);
11313 %}
11314 
11315 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11316 %{
11317   predicate(!UseAPX);
11318   match(Set dst (NegI dst));
11319   effect(KILL cr);
11320   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11321 
11322   format %{ "negl    $dst\t# int" %}
11323   ins_encode %{
11324     __ negl($dst$$Register);
11325   %}
11326   ins_pipe(ialu_reg);
11327 %}
11328 
11329 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11330 %{
11331   predicate(UseAPX);
11332   match(Set dst (NegI src));
11333   effect(KILL cr);
11334   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11335 
11336   format %{ "enegl    $dst, $src\t# int ndd" %}
11337   ins_encode %{
11338     __ enegl($dst$$Register, $src$$Register, false);
11339   %}
11340   ins_pipe(ialu_reg);
11341 %}
11342 
11343 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11344 %{
11345   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11346   effect(KILL cr);
11347   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11348 
11349   format %{ "negl    $dst\t# int" %}
11350   ins_encode %{
11351     __ negl($dst$$Address);
11352   %}
11353   ins_pipe(ialu_reg);
11354 %}
11355 
11356 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11357 %{
11358   predicate(!UseAPX);
11359   match(Set dst (SubL zero dst));
11360   effect(KILL cr);
11361   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11362 
11363   format %{ "negq    $dst\t# long" %}
11364   ins_encode %{
11365     __ negq($dst$$Register);
11366   %}
11367   ins_pipe(ialu_reg);
11368 %}
11369 
11370 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11371 %{
11372   predicate(UseAPX);
11373   match(Set dst (SubL zero src));
11374   effect(KILL cr);
11375   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11376 
11377   format %{ "enegq    $dst, $src\t# long ndd" %}
11378   ins_encode %{
11379     __ enegq($dst$$Register, $src$$Register, false);
11380   %}
11381   ins_pipe(ialu_reg);
11382 %}
11383 
11384 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11385 %{
11386   predicate(!UseAPX);
11387   match(Set dst (NegL dst));
11388   effect(KILL cr);
11389   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11390 
11391   format %{ "negq    $dst\t# int" %}
11392   ins_encode %{
11393     __ negq($dst$$Register);
11394   %}
11395   ins_pipe(ialu_reg);
11396 %}
11397 
11398 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11399 %{
11400   predicate(UseAPX);
11401   match(Set dst (NegL src));
11402   effect(KILL cr);
11403   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable);
11404 
11405   format %{ "enegq    $dst, $src\t# long ndd" %}
11406   ins_encode %{
11407     __ enegq($dst$$Register, $src$$Register, false);
11408   %}
11409   ins_pipe(ialu_reg);
11410 %}
11411 
11412 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11413 %{
11414   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11415   effect(KILL cr);
11416   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11417 
11418   format %{ "negq    $dst\t# long" %}
11419   ins_encode %{
11420     __ negq($dst$$Address);
11421   %}
11422   ins_pipe(ialu_reg);
11423 %}
11424 
11425 //----------Multiplication/Division Instructions-------------------------------
11426 // Integer Multiplication Instructions
11427 // Multiply Register
11428 
11429 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11430 %{
11431   predicate(!UseAPX);
11432   match(Set dst (MulI dst src));
11433   effect(KILL cr);
11434 
11435   ins_cost(300);
11436   format %{ "imull   $dst, $src\t# int" %}
11437   ins_encode %{
11438     __ imull($dst$$Register, $src$$Register);
11439   %}
11440   ins_pipe(ialu_reg_reg_alu0);
11441 %}
11442 
11443 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11444 %{
11445   predicate(UseAPX);
11446   match(Set dst (MulI src1 src2));
11447   effect(KILL cr);
11448   flag(PD::Flag_ndd_demotable_commutative);
11449 
11450   ins_cost(300);
11451   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11452   ins_encode %{
11453     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11454   %}
11455   ins_pipe(ialu_reg_reg_alu0);
11456 %}
11457 
11458 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11459 %{
11460   match(Set dst (MulI src imm));
11461   effect(KILL cr);
11462 
11463   ins_cost(300);
11464   format %{ "imull   $dst, $src, $imm\t# int" %}
11465   ins_encode %{
11466     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11467   %}
11468   ins_pipe(ialu_reg_reg_alu0);
11469 %}
11470 
11471 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11472 %{
11473   predicate(!UseAPX);
11474   match(Set dst (MulI dst (LoadI src)));
11475   effect(KILL cr);
11476 
11477   ins_cost(350);
11478   format %{ "imull   $dst, $src\t# int" %}
11479   ins_encode %{
11480     __ imull($dst$$Register, $src$$Address);
11481   %}
11482   ins_pipe(ialu_reg_mem_alu0);
11483 %}
11484 
11485 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11486 %{
11487   predicate(UseAPX);
11488   match(Set dst (MulI src1 (LoadI src2)));
11489   effect(KILL cr);
11490   flag(PD::Flag_ndd_demotable);
11491 
11492   ins_cost(350);
11493   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11494   ins_encode %{
11495     __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11496   %}
11497   ins_pipe(ialu_reg_mem_alu0);
11498 %}
11499 
11500 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11501 %{
11502   match(Set dst (MulI (LoadI src) imm));
11503   effect(KILL cr);
11504 
11505   ins_cost(300);
11506   format %{ "imull   $dst, $src, $imm\t# int" %}
11507   ins_encode %{
11508     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11509   %}
11510   ins_pipe(ialu_reg_mem_alu0);
11511 %}
11512 
11513 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11514 %{
11515   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11516   effect(KILL cr, KILL src2);
11517 
11518   expand %{ mulI_rReg(dst, src1, cr);
11519            mulI_rReg(src2, src3, cr);
11520            addI_rReg(dst, src2, cr); %}
11521 %}
11522 
11523 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11524 %{
11525   predicate(!UseAPX);
11526   match(Set dst (MulL dst src));
11527   effect(KILL cr);
11528 
11529   ins_cost(300);
11530   format %{ "imulq   $dst, $src\t# long" %}
11531   ins_encode %{
11532     __ imulq($dst$$Register, $src$$Register);
11533   %}
11534   ins_pipe(ialu_reg_reg_alu0);
11535 %}
11536 
11537 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11538 %{
11539   predicate(UseAPX);
11540   match(Set dst (MulL src1 src2));
11541   effect(KILL cr);
11542   flag(PD::Flag_ndd_demotable_commutative);
11543 
11544   ins_cost(300);
11545   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11546   ins_encode %{
11547     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11548   %}
11549   ins_pipe(ialu_reg_reg_alu0);
11550 %}
11551 
11552 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11553 %{
11554   match(Set dst (MulL src imm));
11555   effect(KILL cr);
11556 
11557   ins_cost(300);
11558   format %{ "imulq   $dst, $src, $imm\t# long" %}
11559   ins_encode %{
11560     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11561   %}
11562   ins_pipe(ialu_reg_reg_alu0);
11563 %}
11564 
11565 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11566 %{
11567   predicate(!UseAPX);
11568   match(Set dst (MulL dst (LoadL src)));
11569   effect(KILL cr);
11570 
11571   ins_cost(350);
11572   format %{ "imulq   $dst, $src\t# long" %}
11573   ins_encode %{
11574     __ imulq($dst$$Register, $src$$Address);
11575   %}
11576   ins_pipe(ialu_reg_mem_alu0);
11577 %}
11578 
11579 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11580 %{
11581   predicate(UseAPX);
11582   match(Set dst (MulL src1 (LoadL src2)));
11583   effect(KILL cr);
11584   flag(PD::Flag_ndd_demotable_commutative);
11585 
11586   ins_cost(350);
11587   format %{ "eimulq   $dst, $src1, $src2 \t# long" %}
11588   ins_encode %{
11589     __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11590   %}
11591   ins_pipe(ialu_reg_mem_alu0);
11592 %}
11593 
11594 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11595 %{
11596   match(Set dst (MulL (LoadL src) imm));
11597   effect(KILL cr);
11598 
11599   ins_cost(300);
11600   format %{ "imulq   $dst, $src, $imm\t# long" %}
11601   ins_encode %{
11602     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11603   %}
11604   ins_pipe(ialu_reg_mem_alu0);
11605 %}
11606 
11607 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11608 %{
11609   match(Set dst (MulHiL src rax));
11610   effect(USE_KILL rax, KILL cr);
11611 
11612   ins_cost(300);
11613   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11614   ins_encode %{
11615     __ imulq($src$$Register);
11616   %}
11617   ins_pipe(ialu_reg_reg_alu0);
11618 %}
11619 
11620 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11621 %{
11622   match(Set dst (UMulHiL src rax));
11623   effect(USE_KILL rax, KILL cr);
11624 
11625   ins_cost(300);
11626   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11627   ins_encode %{
11628     __ mulq($src$$Register);
11629   %}
11630   ins_pipe(ialu_reg_reg_alu0);
11631 %}
11632 
11633 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11634                    rFlagsReg cr)
11635 %{
11636   match(Set rax (DivI rax div));
11637   effect(KILL rdx, KILL cr);
11638 
11639   ins_cost(30*100+10*100); // XXX
11640   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11641             "jne,s   normal\n\t"
11642             "xorl    rdx, rdx\n\t"
11643             "cmpl    $div, -1\n\t"
11644             "je,s    done\n"
11645     "normal: cdql\n\t"
11646             "idivl   $div\n"
11647     "done:"        %}
11648   ins_encode(cdql_enc(div));
11649   ins_pipe(ialu_reg_reg_alu0);
11650 %}
11651 
11652 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11653                    rFlagsReg cr)
11654 %{
11655   match(Set rax (DivL rax div));
11656   effect(KILL rdx, KILL cr);
11657 
11658   ins_cost(30*100+10*100); // XXX
11659   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11660             "cmpq    rax, rdx\n\t"
11661             "jne,s   normal\n\t"
11662             "xorl    rdx, rdx\n\t"
11663             "cmpq    $div, -1\n\t"
11664             "je,s    done\n"
11665     "normal: cdqq\n\t"
11666             "idivq   $div\n"
11667     "done:"        %}
11668   ins_encode(cdqq_enc(div));
11669   ins_pipe(ialu_reg_reg_alu0);
11670 %}
11671 
11672 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11673 %{
11674   match(Set rax (UDivI rax div));
11675   effect(KILL rdx, KILL cr);
11676 
11677   ins_cost(300);
11678   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11679   ins_encode %{
11680     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11681   %}
11682   ins_pipe(ialu_reg_reg_alu0);
11683 %}
11684 
11685 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11686 %{
11687   match(Set rax (UDivL rax div));
11688   effect(KILL rdx, KILL cr);
11689 
11690   ins_cost(300);
11691   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11692   ins_encode %{
11693      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11694   %}
11695   ins_pipe(ialu_reg_reg_alu0);
11696 %}
11697 
11698 // Integer DIVMOD with Register, both quotient and mod results
11699 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11700                              rFlagsReg cr)
11701 %{
11702   match(DivModI rax div);
11703   effect(KILL cr);
11704 
11705   ins_cost(30*100+10*100); // XXX
11706   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11707             "jne,s   normal\n\t"
11708             "xorl    rdx, rdx\n\t"
11709             "cmpl    $div, -1\n\t"
11710             "je,s    done\n"
11711     "normal: cdql\n\t"
11712             "idivl   $div\n"
11713     "done:"        %}
11714   ins_encode(cdql_enc(div));
11715   ins_pipe(pipe_slow);
11716 %}
11717 
11718 // Long DIVMOD with Register, both quotient and mod results
11719 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11720                              rFlagsReg cr)
11721 %{
11722   match(DivModL rax div);
11723   effect(KILL cr);
11724 
11725   ins_cost(30*100+10*100); // XXX
11726   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11727             "cmpq    rax, rdx\n\t"
11728             "jne,s   normal\n\t"
11729             "xorl    rdx, rdx\n\t"
11730             "cmpq    $div, -1\n\t"
11731             "je,s    done\n"
11732     "normal: cdqq\n\t"
11733             "idivq   $div\n"
11734     "done:"        %}
11735   ins_encode(cdqq_enc(div));
11736   ins_pipe(pipe_slow);
11737 %}
11738 
11739 // Unsigned integer DIVMOD with Register, both quotient and mod results
11740 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11741                               no_rax_rdx_RegI div, rFlagsReg cr)
11742 %{
11743   match(UDivModI rax div);
11744   effect(TEMP tmp, KILL cr);
11745 
11746   ins_cost(300);
11747   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11748             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11749           %}
11750   ins_encode %{
11751     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11752   %}
11753   ins_pipe(pipe_slow);
11754 %}
11755 
11756 // Unsigned long DIVMOD with Register, both quotient and mod results
11757 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11758                               no_rax_rdx_RegL div, rFlagsReg cr)
11759 %{
11760   match(UDivModL rax div);
11761   effect(TEMP tmp, KILL cr);
11762 
11763   ins_cost(300);
11764   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11765             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11766           %}
11767   ins_encode %{
11768     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11769   %}
11770   ins_pipe(pipe_slow);
11771 %}
11772 
11773 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11774                    rFlagsReg cr)
11775 %{
11776   match(Set rdx (ModI rax div));
11777   effect(KILL rax, KILL cr);
11778 
11779   ins_cost(300); // XXX
11780   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11781             "jne,s   normal\n\t"
11782             "xorl    rdx, rdx\n\t"
11783             "cmpl    $div, -1\n\t"
11784             "je,s    done\n"
11785     "normal: cdql\n\t"
11786             "idivl   $div\n"
11787     "done:"        %}
11788   ins_encode(cdql_enc(div));
11789   ins_pipe(ialu_reg_reg_alu0);
11790 %}
11791 
11792 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11793                    rFlagsReg cr)
11794 %{
11795   match(Set rdx (ModL rax div));
11796   effect(KILL rax, KILL cr);
11797 
11798   ins_cost(300); // XXX
11799   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11800             "cmpq    rax, rdx\n\t"
11801             "jne,s   normal\n\t"
11802             "xorl    rdx, rdx\n\t"
11803             "cmpq    $div, -1\n\t"
11804             "je,s    done\n"
11805     "normal: cdqq\n\t"
11806             "idivq   $div\n"
11807     "done:"        %}
11808   ins_encode(cdqq_enc(div));
11809   ins_pipe(ialu_reg_reg_alu0);
11810 %}
11811 
11812 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11813 %{
11814   match(Set rdx (UModI rax div));
11815   effect(KILL rax, KILL cr);
11816 
11817   ins_cost(300);
11818   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11819   ins_encode %{
11820     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11821   %}
11822   ins_pipe(ialu_reg_reg_alu0);
11823 %}
11824 
11825 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11826 %{
11827   match(Set rdx (UModL rax div));
11828   effect(KILL rax, KILL cr);
11829 
11830   ins_cost(300);
11831   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11832   ins_encode %{
11833     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11834   %}
11835   ins_pipe(ialu_reg_reg_alu0);
11836 %}
11837 
11838 // Integer Shift Instructions
11839 // Shift Left by one, two, three
11840 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11841 %{
11842   predicate(!UseAPX);
11843   match(Set dst (LShiftI dst shift));
11844   effect(KILL cr);
11845 
11846   format %{ "sall    $dst, $shift" %}
11847   ins_encode %{
11848     __ sall($dst$$Register, $shift$$constant);
11849   %}
11850   ins_pipe(ialu_reg);
11851 %}
11852 
11853 // Shift Left by one, two, three
11854 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11855 %{
11856   predicate(UseAPX);
11857   match(Set dst (LShiftI src shift));
11858   effect(KILL cr);
11859   flag(PD::Flag_ndd_demotable);
11860 
11861   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
11862   ins_encode %{
11863     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11864   %}
11865   ins_pipe(ialu_reg);
11866 %}
11867 
11868 // Shift Left by 8-bit immediate
11869 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11870 %{
11871   predicate(!UseAPX);
11872   match(Set dst (LShiftI dst shift));
11873   effect(KILL cr);
11874 
11875   format %{ "sall    $dst, $shift" %}
11876   ins_encode %{
11877     __ sall($dst$$Register, $shift$$constant);
11878   %}
11879   ins_pipe(ialu_reg);
11880 %}
11881 
11882 // Shift Left by 8-bit immediate
11883 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11884 %{
11885   predicate(UseAPX);
11886   match(Set dst (LShiftI src shift));
11887   effect(KILL cr);
11888   flag(PD::Flag_ndd_demotable);
11889 
11890   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11891   ins_encode %{
11892     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11893   %}
11894   ins_pipe(ialu_reg);
11895 %}
11896 
11897 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11898 %{
11899   predicate(UseAPX);
11900   match(Set dst (LShiftI (LoadI src) shift));
11901   effect(KILL cr);
11902 
11903   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11904   ins_encode %{
11905     __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11906   %}
11907   ins_pipe(ialu_reg);
11908 %}
11909 
11910 // Shift Left by 8-bit immediate
11911 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11912 %{
11913   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11914   effect(KILL cr);
11915 
11916   format %{ "sall    $dst, $shift" %}
11917   ins_encode %{
11918     __ sall($dst$$Address, $shift$$constant);
11919   %}
11920   ins_pipe(ialu_mem_imm);
11921 %}
11922 
11923 // Shift Left by variable
11924 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11925 %{
11926   predicate(!VM_Version::supports_bmi2());
11927   match(Set dst (LShiftI dst shift));
11928   effect(KILL cr);
11929 
11930   format %{ "sall    $dst, $shift" %}
11931   ins_encode %{
11932     __ sall($dst$$Register);
11933   %}
11934   ins_pipe(ialu_reg_reg);
11935 %}
11936 
11937 // Shift Left by variable
11938 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11939 %{
11940   predicate(!VM_Version::supports_bmi2());
11941   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11942   effect(KILL cr);
11943 
11944   format %{ "sall    $dst, $shift" %}
11945   ins_encode %{
11946     __ sall($dst$$Address);
11947   %}
11948   ins_pipe(ialu_mem_reg);
11949 %}
11950 
11951 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11952 %{
11953   predicate(VM_Version::supports_bmi2());
11954   match(Set dst (LShiftI src shift));
11955 
11956   format %{ "shlxl   $dst, $src, $shift" %}
11957   ins_encode %{
11958     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
11959   %}
11960   ins_pipe(ialu_reg_reg);
11961 %}
11962 
11963 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
11964 %{
11965   predicate(VM_Version::supports_bmi2());
11966   match(Set dst (LShiftI (LoadI src) shift));
11967   ins_cost(175);
11968   format %{ "shlxl   $dst, $src, $shift" %}
11969   ins_encode %{
11970     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
11971   %}
11972   ins_pipe(ialu_reg_mem);
11973 %}
11974 
11975 // Arithmetic Shift Right by 8-bit immediate
11976 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11977 %{
11978   predicate(!UseAPX);
11979   match(Set dst (RShiftI dst shift));
11980   effect(KILL cr);
11981 
11982   format %{ "sarl    $dst, $shift" %}
11983   ins_encode %{
11984     __ sarl($dst$$Register, $shift$$constant);
11985   %}
11986   ins_pipe(ialu_mem_imm);
11987 %}
11988 
11989 // Arithmetic Shift Right by 8-bit immediate
11990 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11991 %{
11992   predicate(UseAPX);
11993   match(Set dst (RShiftI src shift));
11994   effect(KILL cr);
11995   flag(PD::Flag_ndd_demotable);
11996 
11997   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
11998   ins_encode %{
11999     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12000   %}
12001   ins_pipe(ialu_mem_imm);
12002 %}
12003 
12004 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12005 %{
12006   predicate(UseAPX);
12007   match(Set dst (RShiftI (LoadI src) shift));
12008   effect(KILL cr);
12009 
12010   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12011   ins_encode %{
12012     __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12013   %}
12014   ins_pipe(ialu_mem_imm);
12015 %}
12016 
12017 // Arithmetic Shift Right by 8-bit immediate
12018 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12019 %{
12020   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12021   effect(KILL cr);
12022 
12023   format %{ "sarl    $dst, $shift" %}
12024   ins_encode %{
12025     __ sarl($dst$$Address, $shift$$constant);
12026   %}
12027   ins_pipe(ialu_mem_imm);
12028 %}
12029 
12030 // Arithmetic Shift Right by variable
12031 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12032 %{
12033   predicate(!VM_Version::supports_bmi2());
12034   match(Set dst (RShiftI dst shift));
12035   effect(KILL cr);
12036 
12037   format %{ "sarl    $dst, $shift" %}
12038   ins_encode %{
12039     __ sarl($dst$$Register);
12040   %}
12041   ins_pipe(ialu_reg_reg);
12042 %}
12043 
12044 // Arithmetic Shift Right by variable
12045 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12046 %{
12047   predicate(!VM_Version::supports_bmi2());
12048   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12049   effect(KILL cr);
12050 
12051   format %{ "sarl    $dst, $shift" %}
12052   ins_encode %{
12053     __ sarl($dst$$Address);
12054   %}
12055   ins_pipe(ialu_mem_reg);
12056 %}
12057 
12058 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12059 %{
12060   predicate(VM_Version::supports_bmi2());
12061   match(Set dst (RShiftI src shift));
12062 
12063   format %{ "sarxl   $dst, $src, $shift" %}
12064   ins_encode %{
12065     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12066   %}
12067   ins_pipe(ialu_reg_reg);
12068 %}
12069 
12070 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12071 %{
12072   predicate(VM_Version::supports_bmi2());
12073   match(Set dst (RShiftI (LoadI src) shift));
12074   ins_cost(175);
12075   format %{ "sarxl   $dst, $src, $shift" %}
12076   ins_encode %{
12077     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12078   %}
12079   ins_pipe(ialu_reg_mem);
12080 %}
12081 
12082 // Logical Shift Right by 8-bit immediate
12083 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12084 %{
12085   predicate(!UseAPX);
12086   match(Set dst (URShiftI dst shift));
12087   effect(KILL cr);
12088 
12089   format %{ "shrl    $dst, $shift" %}
12090   ins_encode %{
12091     __ shrl($dst$$Register, $shift$$constant);
12092   %}
12093   ins_pipe(ialu_reg);
12094 %}
12095 
12096 // Logical Shift Right by 8-bit immediate
12097 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12098 %{
12099   predicate(UseAPX);
12100   match(Set dst (URShiftI src shift));
12101   effect(KILL cr);
12102   flag(PD::Flag_ndd_demotable);
12103 
12104   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12105   ins_encode %{
12106     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12107   %}
12108   ins_pipe(ialu_reg);
12109 %}
12110 
12111 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12112 %{
12113   predicate(UseAPX);
12114   match(Set dst (URShiftI (LoadI src) shift));
12115   effect(KILL cr);
12116 
12117   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12118   ins_encode %{
12119     __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12120   %}
12121   ins_pipe(ialu_reg);
12122 %}
12123 
12124 // Logical Shift Right by 8-bit immediate
12125 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12126 %{
12127   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12128   effect(KILL cr);
12129 
12130   format %{ "shrl    $dst, $shift" %}
12131   ins_encode %{
12132     __ shrl($dst$$Address, $shift$$constant);
12133   %}
12134   ins_pipe(ialu_mem_imm);
12135 %}
12136 
12137 // Logical Shift Right by variable
12138 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12139 %{
12140   predicate(!VM_Version::supports_bmi2());
12141   match(Set dst (URShiftI dst shift));
12142   effect(KILL cr);
12143 
12144   format %{ "shrl    $dst, $shift" %}
12145   ins_encode %{
12146     __ shrl($dst$$Register);
12147   %}
12148   ins_pipe(ialu_reg_reg);
12149 %}
12150 
12151 // Logical Shift Right by variable
12152 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12153 %{
12154   predicate(!VM_Version::supports_bmi2());
12155   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12156   effect(KILL cr);
12157 
12158   format %{ "shrl    $dst, $shift" %}
12159   ins_encode %{
12160     __ shrl($dst$$Address);
12161   %}
12162   ins_pipe(ialu_mem_reg);
12163 %}
12164 
12165 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12166 %{
12167   predicate(VM_Version::supports_bmi2());
12168   match(Set dst (URShiftI src shift));
12169 
12170   format %{ "shrxl   $dst, $src, $shift" %}
12171   ins_encode %{
12172     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12173   %}
12174   ins_pipe(ialu_reg_reg);
12175 %}
12176 
12177 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12178 %{
12179   predicate(VM_Version::supports_bmi2());
12180   match(Set dst (URShiftI (LoadI src) shift));
12181   ins_cost(175);
12182   format %{ "shrxl   $dst, $src, $shift" %}
12183   ins_encode %{
12184     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12185   %}
12186   ins_pipe(ialu_reg_mem);
12187 %}
12188 
12189 // Long Shift Instructions
12190 // Shift Left by one, two, three
12191 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12192 %{
12193   predicate(!UseAPX);
12194   match(Set dst (LShiftL dst shift));
12195   effect(KILL cr);
12196 
12197   format %{ "salq    $dst, $shift" %}
12198   ins_encode %{
12199     __ salq($dst$$Register, $shift$$constant);
12200   %}
12201   ins_pipe(ialu_reg);
12202 %}
12203 
12204 // Shift Left by one, two, three
12205 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12206 %{
12207   predicate(UseAPX);
12208   match(Set dst (LShiftL src shift));
12209   effect(KILL cr);
12210   flag(PD::Flag_ndd_demotable);
12211 
12212   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12213   ins_encode %{
12214     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12215   %}
12216   ins_pipe(ialu_reg);
12217 %}
12218 
12219 // Shift Left by 8-bit immediate
12220 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12221 %{
12222   predicate(!UseAPX);
12223   match(Set dst (LShiftL dst shift));
12224   effect(KILL cr);
12225 
12226   format %{ "salq    $dst, $shift" %}
12227   ins_encode %{
12228     __ salq($dst$$Register, $shift$$constant);
12229   %}
12230   ins_pipe(ialu_reg);
12231 %}
12232 
12233 // Shift Left by 8-bit immediate
12234 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12235 %{
12236   predicate(UseAPX);
12237   match(Set dst (LShiftL src shift));
12238   effect(KILL cr);
12239   flag(PD::Flag_ndd_demotable);
12240 
12241   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12242   ins_encode %{
12243     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12244   %}
12245   ins_pipe(ialu_reg);
12246 %}
12247 
12248 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12249 %{
12250   predicate(UseAPX);
12251   match(Set dst (LShiftL (LoadL src) shift));
12252   effect(KILL cr);
12253 
12254   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12255   ins_encode %{
12256     __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12257   %}
12258   ins_pipe(ialu_reg);
12259 %}
12260 
12261 // Shift Left by 8-bit immediate
12262 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12263 %{
12264   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12265   effect(KILL cr);
12266 
12267   format %{ "salq    $dst, $shift" %}
12268   ins_encode %{
12269     __ salq($dst$$Address, $shift$$constant);
12270   %}
12271   ins_pipe(ialu_mem_imm);
12272 %}
12273 
12274 // Shift Left by variable
12275 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12276 %{
12277   predicate(!VM_Version::supports_bmi2());
12278   match(Set dst (LShiftL dst shift));
12279   effect(KILL cr);
12280 
12281   format %{ "salq    $dst, $shift" %}
12282   ins_encode %{
12283     __ salq($dst$$Register);
12284   %}
12285   ins_pipe(ialu_reg_reg);
12286 %}
12287 
12288 // Shift Left by variable
12289 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12290 %{
12291   predicate(!VM_Version::supports_bmi2());
12292   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12293   effect(KILL cr);
12294 
12295   format %{ "salq    $dst, $shift" %}
12296   ins_encode %{
12297     __ salq($dst$$Address);
12298   %}
12299   ins_pipe(ialu_mem_reg);
12300 %}
12301 
12302 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12303 %{
12304   predicate(VM_Version::supports_bmi2());
12305   match(Set dst (LShiftL src shift));
12306 
12307   format %{ "shlxq   $dst, $src, $shift" %}
12308   ins_encode %{
12309     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12310   %}
12311   ins_pipe(ialu_reg_reg);
12312 %}
12313 
12314 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12315 %{
12316   predicate(VM_Version::supports_bmi2());
12317   match(Set dst (LShiftL (LoadL src) shift));
12318   ins_cost(175);
12319   format %{ "shlxq   $dst, $src, $shift" %}
12320   ins_encode %{
12321     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12322   %}
12323   ins_pipe(ialu_reg_mem);
12324 %}
12325 
12326 // Arithmetic Shift Right by 8-bit immediate
12327 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12328 %{
12329   predicate(!UseAPX);
12330   match(Set dst (RShiftL dst shift));
12331   effect(KILL cr);
12332 
12333   format %{ "sarq    $dst, $shift" %}
12334   ins_encode %{
12335     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12336   %}
12337   ins_pipe(ialu_mem_imm);
12338 %}
12339 
12340 // Arithmetic Shift Right by 8-bit immediate
12341 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12342 %{
12343   predicate(UseAPX);
12344   match(Set dst (RShiftL src shift));
12345   effect(KILL cr);
12346   flag(PD::Flag_ndd_demotable);
12347 
12348   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12349   ins_encode %{
12350     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12351   %}
12352   ins_pipe(ialu_mem_imm);
12353 %}
12354 
12355 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12356 %{
12357   predicate(UseAPX);
12358   match(Set dst (RShiftL (LoadL src) shift));
12359   effect(KILL cr);
12360 
12361   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12362   ins_encode %{
12363     __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12364   %}
12365   ins_pipe(ialu_mem_imm);
12366 %}
12367 
12368 // Arithmetic Shift Right by 8-bit immediate
12369 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12370 %{
12371   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12372   effect(KILL cr);
12373 
12374   format %{ "sarq    $dst, $shift" %}
12375   ins_encode %{
12376     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12377   %}
12378   ins_pipe(ialu_mem_imm);
12379 %}
12380 
12381 // Arithmetic Shift Right by variable
12382 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12383 %{
12384   predicate(!VM_Version::supports_bmi2());
12385   match(Set dst (RShiftL dst shift));
12386   effect(KILL cr);
12387 
12388   format %{ "sarq    $dst, $shift" %}
12389   ins_encode %{
12390     __ sarq($dst$$Register);
12391   %}
12392   ins_pipe(ialu_reg_reg);
12393 %}
12394 
12395 // Arithmetic Shift Right by variable
12396 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12397 %{
12398   predicate(!VM_Version::supports_bmi2());
12399   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12400   effect(KILL cr);
12401 
12402   format %{ "sarq    $dst, $shift" %}
12403   ins_encode %{
12404     __ sarq($dst$$Address);
12405   %}
12406   ins_pipe(ialu_mem_reg);
12407 %}
12408 
12409 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12410 %{
12411   predicate(VM_Version::supports_bmi2());
12412   match(Set dst (RShiftL src shift));
12413 
12414   format %{ "sarxq   $dst, $src, $shift" %}
12415   ins_encode %{
12416     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12417   %}
12418   ins_pipe(ialu_reg_reg);
12419 %}
12420 
12421 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12422 %{
12423   predicate(VM_Version::supports_bmi2());
12424   match(Set dst (RShiftL (LoadL src) shift));
12425   ins_cost(175);
12426   format %{ "sarxq   $dst, $src, $shift" %}
12427   ins_encode %{
12428     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12429   %}
12430   ins_pipe(ialu_reg_mem);
12431 %}
12432 
12433 // Logical Shift Right by 8-bit immediate
12434 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12435 %{
12436   predicate(!UseAPX);
12437   match(Set dst (URShiftL dst shift));
12438   effect(KILL cr);
12439 
12440   format %{ "shrq    $dst, $shift" %}
12441   ins_encode %{
12442     __ shrq($dst$$Register, $shift$$constant);
12443   %}
12444   ins_pipe(ialu_reg);
12445 %}
12446 
12447 // Logical Shift Right by 8-bit immediate
12448 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12449 %{
12450   predicate(UseAPX);
12451   match(Set dst (URShiftL src shift));
12452   effect(KILL cr);
12453   flag(PD::Flag_ndd_demotable);
12454 
12455   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12456   ins_encode %{
12457     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12458   %}
12459   ins_pipe(ialu_reg);
12460 %}
12461 
12462 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12463 %{
12464   predicate(UseAPX);
12465   match(Set dst (URShiftL (LoadL src) shift));
12466   effect(KILL cr);
12467 
12468   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12469   ins_encode %{
12470     __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12471   %}
12472   ins_pipe(ialu_reg);
12473 %}
12474 
12475 // Logical Shift Right by 8-bit immediate
12476 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12477 %{
12478   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12479   effect(KILL cr);
12480 
12481   format %{ "shrq    $dst, $shift" %}
12482   ins_encode %{
12483     __ shrq($dst$$Address, $shift$$constant);
12484   %}
12485   ins_pipe(ialu_mem_imm);
12486 %}
12487 
12488 // Logical Shift Right by variable
12489 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12490 %{
12491   predicate(!VM_Version::supports_bmi2());
12492   match(Set dst (URShiftL dst shift));
12493   effect(KILL cr);
12494 
12495   format %{ "shrq    $dst, $shift" %}
12496   ins_encode %{
12497     __ shrq($dst$$Register);
12498   %}
12499   ins_pipe(ialu_reg_reg);
12500 %}
12501 
12502 // Logical Shift Right by variable
12503 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12504 %{
12505   predicate(!VM_Version::supports_bmi2());
12506   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12507   effect(KILL cr);
12508 
12509   format %{ "shrq    $dst, $shift" %}
12510   ins_encode %{
12511     __ shrq($dst$$Address);
12512   %}
12513   ins_pipe(ialu_mem_reg);
12514 %}
12515 
12516 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12517 %{
12518   predicate(VM_Version::supports_bmi2());
12519   match(Set dst (URShiftL src shift));
12520 
12521   format %{ "shrxq   $dst, $src, $shift" %}
12522   ins_encode %{
12523     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12524   %}
12525   ins_pipe(ialu_reg_reg);
12526 %}
12527 
12528 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12529 %{
12530   predicate(VM_Version::supports_bmi2());
12531   match(Set dst (URShiftL (LoadL src) shift));
12532   ins_cost(175);
12533   format %{ "shrxq   $dst, $src, $shift" %}
12534   ins_encode %{
12535     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12536   %}
12537   ins_pipe(ialu_reg_mem);
12538 %}
12539 
12540 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12541 // This idiom is used by the compiler for the i2b bytecode.
12542 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12543 %{
12544   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12545 
12546   format %{ "movsbl  $dst, $src\t# i2b" %}
12547   ins_encode %{
12548     __ movsbl($dst$$Register, $src$$Register);
12549   %}
12550   ins_pipe(ialu_reg_reg);
12551 %}
12552 
12553 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12554 // This idiom is used by the compiler the i2s bytecode.
12555 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12556 %{
12557   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12558 
12559   format %{ "movswl  $dst, $src\t# i2s" %}
12560   ins_encode %{
12561     __ movswl($dst$$Register, $src$$Register);
12562   %}
12563   ins_pipe(ialu_reg_reg);
12564 %}
12565 
12566 // ROL/ROR instructions
12567 
12568 // Rotate left by constant.
12569 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12570 %{
12571   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12572   match(Set dst (RotateLeft dst shift));
12573   effect(KILL cr);
12574   format %{ "roll    $dst, $shift" %}
12575   ins_encode %{
12576     __ roll($dst$$Register, $shift$$constant);
12577   %}
12578   ins_pipe(ialu_reg);
12579 %}
12580 
12581 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12582 %{
12583   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12584   match(Set dst (RotateLeft src shift));
12585   format %{ "rolxl   $dst, $src, $shift" %}
12586   ins_encode %{
12587     int shift = 32 - ($shift$$constant & 31);
12588     __ rorxl($dst$$Register, $src$$Register, shift);
12589   %}
12590   ins_pipe(ialu_reg_reg);
12591 %}
12592 
12593 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12594 %{
12595   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12596   match(Set dst (RotateLeft (LoadI src) shift));
12597   ins_cost(175);
12598   format %{ "rolxl   $dst, $src, $shift" %}
12599   ins_encode %{
12600     int shift = 32 - ($shift$$constant & 31);
12601     __ rorxl($dst$$Register, $src$$Address, shift);
12602   %}
12603   ins_pipe(ialu_reg_mem);
12604 %}
12605 
12606 // Rotate Left by variable
12607 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12608 %{
12609   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12610   match(Set dst (RotateLeft dst shift));
12611   effect(KILL cr);
12612   format %{ "roll    $dst, $shift" %}
12613   ins_encode %{
12614     __ roll($dst$$Register);
12615   %}
12616   ins_pipe(ialu_reg_reg);
12617 %}
12618 
12619 // Rotate Left by variable
12620 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12621 %{
12622   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12623   match(Set dst (RotateLeft src shift));
12624   effect(KILL cr);
12625   flag(PD::Flag_ndd_demotable);
12626 
12627   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12628   ins_encode %{
12629     __ eroll($dst$$Register, $src$$Register, false);
12630   %}
12631   ins_pipe(ialu_reg_reg);
12632 %}
12633 
12634 // Rotate Right by constant.
12635 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12636 %{
12637   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12638   match(Set dst (RotateRight dst shift));
12639   effect(KILL cr);
12640   format %{ "rorl    $dst, $shift" %}
12641   ins_encode %{
12642     __ rorl($dst$$Register, $shift$$constant);
12643   %}
12644   ins_pipe(ialu_reg);
12645 %}
12646 
12647 // Rotate Right by constant.
12648 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12649 %{
12650   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12651   match(Set dst (RotateRight src shift));
12652   format %{ "rorxl   $dst, $src, $shift" %}
12653   ins_encode %{
12654     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12655   %}
12656   ins_pipe(ialu_reg_reg);
12657 %}
12658 
12659 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12660 %{
12661   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12662   match(Set dst (RotateRight (LoadI src) shift));
12663   ins_cost(175);
12664   format %{ "rorxl   $dst, $src, $shift" %}
12665   ins_encode %{
12666     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12667   %}
12668   ins_pipe(ialu_reg_mem);
12669 %}
12670 
12671 // Rotate Right by variable
12672 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12673 %{
12674   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12675   match(Set dst (RotateRight dst shift));
12676   effect(KILL cr);
12677   format %{ "rorl    $dst, $shift" %}
12678   ins_encode %{
12679     __ rorl($dst$$Register);
12680   %}
12681   ins_pipe(ialu_reg_reg);
12682 %}
12683 
12684 // Rotate Right by variable
12685 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12686 %{
12687   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12688   match(Set dst (RotateRight src shift));
12689   effect(KILL cr);
12690   flag(PD::Flag_ndd_demotable);
12691 
12692   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12693   ins_encode %{
12694     __ erorl($dst$$Register, $src$$Register, false);
12695   %}
12696   ins_pipe(ialu_reg_reg);
12697 %}
12698 
12699 // Rotate Left by constant.
12700 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12701 %{
12702   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12703   match(Set dst (RotateLeft dst shift));
12704   effect(KILL cr);
12705   format %{ "rolq    $dst, $shift" %}
12706   ins_encode %{
12707     __ rolq($dst$$Register, $shift$$constant);
12708   %}
12709   ins_pipe(ialu_reg);
12710 %}
12711 
12712 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12713 %{
12714   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12715   match(Set dst (RotateLeft src shift));
12716   format %{ "rolxq   $dst, $src, $shift" %}
12717   ins_encode %{
12718     int shift = 64 - ($shift$$constant & 63);
12719     __ rorxq($dst$$Register, $src$$Register, shift);
12720   %}
12721   ins_pipe(ialu_reg_reg);
12722 %}
12723 
12724 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12725 %{
12726   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12727   match(Set dst (RotateLeft (LoadL src) shift));
12728   ins_cost(175);
12729   format %{ "rolxq   $dst, $src, $shift" %}
12730   ins_encode %{
12731     int shift = 64 - ($shift$$constant & 63);
12732     __ rorxq($dst$$Register, $src$$Address, shift);
12733   %}
12734   ins_pipe(ialu_reg_mem);
12735 %}
12736 
12737 // Rotate Left by variable
12738 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12739 %{
12740   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12741   match(Set dst (RotateLeft dst shift));
12742   effect(KILL cr);
12743 
12744   format %{ "rolq    $dst, $shift" %}
12745   ins_encode %{
12746     __ rolq($dst$$Register);
12747   %}
12748   ins_pipe(ialu_reg_reg);
12749 %}
12750 
12751 // Rotate Left by variable
12752 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12753 %{
12754   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12755   match(Set dst (RotateLeft src shift));
12756   effect(KILL cr);
12757   flag(PD::Flag_ndd_demotable);
12758 
12759   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12760   ins_encode %{
12761     __ erolq($dst$$Register, $src$$Register, false);
12762   %}
12763   ins_pipe(ialu_reg_reg);
12764 %}
12765 
12766 // Rotate Right by constant.
12767 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12768 %{
12769   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12770   match(Set dst (RotateRight dst shift));
12771   effect(KILL cr);
12772   format %{ "rorq    $dst, $shift" %}
12773   ins_encode %{
12774     __ rorq($dst$$Register, $shift$$constant);
12775   %}
12776   ins_pipe(ialu_reg);
12777 %}
12778 
12779 // Rotate Right by constant
12780 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12781 %{
12782   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12783   match(Set dst (RotateRight src shift));
12784   format %{ "rorxq   $dst, $src, $shift" %}
12785   ins_encode %{
12786     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12787   %}
12788   ins_pipe(ialu_reg_reg);
12789 %}
12790 
12791 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12792 %{
12793   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12794   match(Set dst (RotateRight (LoadL src) shift));
12795   ins_cost(175);
12796   format %{ "rorxq   $dst, $src, $shift" %}
12797   ins_encode %{
12798     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12799   %}
12800   ins_pipe(ialu_reg_mem);
12801 %}
12802 
12803 // Rotate Right by variable
12804 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12805 %{
12806   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12807   match(Set dst (RotateRight dst shift));
12808   effect(KILL cr);
12809   format %{ "rorq    $dst, $shift" %}
12810   ins_encode %{
12811     __ rorq($dst$$Register);
12812   %}
12813   ins_pipe(ialu_reg_reg);
12814 %}
12815 
12816 // Rotate Right by variable
12817 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12818 %{
12819   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12820   match(Set dst (RotateRight src shift));
12821   effect(KILL cr);
12822   flag(PD::Flag_ndd_demotable);
12823 
12824   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12825   ins_encode %{
12826     __ erorq($dst$$Register, $src$$Register, false);
12827   %}
12828   ins_pipe(ialu_reg_reg);
12829 %}
12830 
12831 //----------------------------- CompressBits/ExpandBits ------------------------
12832 
12833 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12834   predicate(n->bottom_type()->isa_long());
12835   match(Set dst (CompressBits src mask));
12836   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12837   ins_encode %{
12838     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12839   %}
12840   ins_pipe( pipe_slow );
12841 %}
12842 
12843 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12844   predicate(n->bottom_type()->isa_long());
12845   match(Set dst (ExpandBits src mask));
12846   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12847   ins_encode %{
12848     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12849   %}
12850   ins_pipe( pipe_slow );
12851 %}
12852 
12853 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12854   predicate(n->bottom_type()->isa_long());
12855   match(Set dst (CompressBits src (LoadL mask)));
12856   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12857   ins_encode %{
12858     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12859   %}
12860   ins_pipe( pipe_slow );
12861 %}
12862 
12863 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12864   predicate(n->bottom_type()->isa_long());
12865   match(Set dst (ExpandBits src (LoadL mask)));
12866   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12867   ins_encode %{
12868     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12869   %}
12870   ins_pipe( pipe_slow );
12871 %}
12872 
12873 
12874 // Logical Instructions
12875 
12876 // Integer Logical Instructions
12877 
12878 // And Instructions
12879 // And Register with Register
12880 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12881 %{
12882   predicate(!UseAPX);
12883   match(Set dst (AndI dst src));
12884   effect(KILL cr);
12885   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12886 
12887   format %{ "andl    $dst, $src\t# int" %}
12888   ins_encode %{
12889     __ andl($dst$$Register, $src$$Register);
12890   %}
12891   ins_pipe(ialu_reg_reg);
12892 %}
12893 
12894 // And Register with Register using New Data Destination (NDD)
12895 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12896 %{
12897   predicate(UseAPX);
12898   match(Set dst (AndI src1 src2));
12899   effect(KILL cr);
12900   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
12901 
12902   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
12903   ins_encode %{
12904     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12905 
12906   %}
12907   ins_pipe(ialu_reg_reg);
12908 %}
12909 
12910 // And Register with Immediate 255
12911 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12912 %{
12913   match(Set dst (AndI src mask));
12914 
12915   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
12916   ins_encode %{
12917     __ movzbl($dst$$Register, $src$$Register);
12918   %}
12919   ins_pipe(ialu_reg);
12920 %}
12921 
12922 // And Register with Immediate 255 and promote to long
12923 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12924 %{
12925   match(Set dst (ConvI2L (AndI src mask)));
12926 
12927   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
12928   ins_encode %{
12929     __ movzbl($dst$$Register, $src$$Register);
12930   %}
12931   ins_pipe(ialu_reg);
12932 %}
12933 
12934 // And Register with Immediate 65535
12935 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12936 %{
12937   match(Set dst (AndI src mask));
12938 
12939   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
12940   ins_encode %{
12941     __ movzwl($dst$$Register, $src$$Register);
12942   %}
12943   ins_pipe(ialu_reg);
12944 %}
12945 
12946 // And Register with Immediate 65535 and promote to long
12947 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12948 %{
12949   match(Set dst (ConvI2L (AndI src mask)));
12950 
12951   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
12952   ins_encode %{
12953     __ movzwl($dst$$Register, $src$$Register);
12954   %}
12955   ins_pipe(ialu_reg);
12956 %}
12957 
12958 // Can skip int2long conversions after AND with small bitmask
12959 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
12960 %{
12961   predicate(VM_Version::supports_bmi2());
12962   ins_cost(125);
12963   effect(TEMP tmp, KILL cr);
12964   match(Set dst (ConvI2L (AndI src mask)));
12965   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
12966   ins_encode %{
12967     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
12968     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
12969   %}
12970   ins_pipe(ialu_reg_reg);
12971 %}
12972 
12973 // And Register with Immediate
12974 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12975 %{
12976   predicate(!UseAPX);
12977   match(Set dst (AndI dst src));
12978   effect(KILL cr);
12979   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12980 
12981   format %{ "andl    $dst, $src\t# int" %}
12982   ins_encode %{
12983     __ andl($dst$$Register, $src$$constant);
12984   %}
12985   ins_pipe(ialu_reg);
12986 %}
12987 
12988 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
12989 %{
12990   predicate(UseAPX);
12991   match(Set dst (AndI src1 src2));
12992   effect(KILL cr);
12993   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
12994 
12995   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
12996   ins_encode %{
12997     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
12998   %}
12999   ins_pipe(ialu_reg);
13000 %}
13001 
13002 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13003 %{
13004   predicate(UseAPX);
13005   match(Set dst (AndI (LoadI src1) src2));
13006   effect(KILL cr);
13007   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13008 
13009   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13010   ins_encode %{
13011     __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13012   %}
13013   ins_pipe(ialu_reg);
13014 %}
13015 
13016 // And Register with Memory
13017 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13018 %{
13019   predicate(!UseAPX);
13020   match(Set dst (AndI dst (LoadI src)));
13021   effect(KILL cr);
13022   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13023 
13024   ins_cost(150);
13025   format %{ "andl    $dst, $src\t# int" %}
13026   ins_encode %{
13027     __ andl($dst$$Register, $src$$Address);
13028   %}
13029   ins_pipe(ialu_reg_mem);
13030 %}
13031 
13032 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13033 %{
13034   predicate(UseAPX);
13035   match(Set dst (AndI src1 (LoadI src2)));
13036   effect(KILL cr);
13037   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13038 
13039   ins_cost(150);
13040   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13041   ins_encode %{
13042     __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13043   %}
13044   ins_pipe(ialu_reg_mem);
13045 %}
13046 
13047 // And Memory with Register
13048 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13049 %{
13050   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13051   effect(KILL cr);
13052   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13053 
13054   ins_cost(150);
13055   format %{ "andb    $dst, $src\t# byte" %}
13056   ins_encode %{
13057     __ andb($dst$$Address, $src$$Register);
13058   %}
13059   ins_pipe(ialu_mem_reg);
13060 %}
13061 
13062 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13063 %{
13064   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13065   effect(KILL cr);
13066   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13067 
13068   ins_cost(150);
13069   format %{ "andl    $dst, $src\t# int" %}
13070   ins_encode %{
13071     __ andl($dst$$Address, $src$$Register);
13072   %}
13073   ins_pipe(ialu_mem_reg);
13074 %}
13075 
13076 // And Memory with Immediate
13077 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13078 %{
13079   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13080   effect(KILL cr);
13081   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13082 
13083   ins_cost(125);
13084   format %{ "andl    $dst, $src\t# int" %}
13085   ins_encode %{
13086     __ andl($dst$$Address, $src$$constant);
13087   %}
13088   ins_pipe(ialu_mem_imm);
13089 %}
13090 
13091 // BMI1 instructions
13092 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13093   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13094   predicate(UseBMI1Instructions);
13095   effect(KILL cr);
13096   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13097 
13098   ins_cost(125);
13099   format %{ "andnl  $dst, $src1, $src2" %}
13100 
13101   ins_encode %{
13102     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13103   %}
13104   ins_pipe(ialu_reg_mem);
13105 %}
13106 
13107 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13108   match(Set dst (AndI (XorI src1 minus_1) src2));
13109   predicate(UseBMI1Instructions);
13110   effect(KILL cr);
13111   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13112 
13113   format %{ "andnl  $dst, $src1, $src2" %}
13114 
13115   ins_encode %{
13116     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13117   %}
13118   ins_pipe(ialu_reg);
13119 %}
13120 
13121 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13122   match(Set dst (AndI (SubI imm_zero src) src));
13123   predicate(UseBMI1Instructions);
13124   effect(KILL cr);
13125   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13126 
13127   format %{ "blsil  $dst, $src" %}
13128 
13129   ins_encode %{
13130     __ blsil($dst$$Register, $src$$Register);
13131   %}
13132   ins_pipe(ialu_reg);
13133 %}
13134 
13135 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13136   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13137   predicate(UseBMI1Instructions);
13138   effect(KILL cr);
13139   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13140 
13141   ins_cost(125);
13142   format %{ "blsil  $dst, $src" %}
13143 
13144   ins_encode %{
13145     __ blsil($dst$$Register, $src$$Address);
13146   %}
13147   ins_pipe(ialu_reg_mem);
13148 %}
13149 
13150 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13151 %{
13152   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13153   predicate(UseBMI1Instructions);
13154   effect(KILL cr);
13155   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13156 
13157   ins_cost(125);
13158   format %{ "blsmskl $dst, $src" %}
13159 
13160   ins_encode %{
13161     __ blsmskl($dst$$Register, $src$$Address);
13162   %}
13163   ins_pipe(ialu_reg_mem);
13164 %}
13165 
13166 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13167 %{
13168   match(Set dst (XorI (AddI src minus_1) src));
13169   predicate(UseBMI1Instructions);
13170   effect(KILL cr);
13171   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13172 
13173   format %{ "blsmskl $dst, $src" %}
13174 
13175   ins_encode %{
13176     __ blsmskl($dst$$Register, $src$$Register);
13177   %}
13178 
13179   ins_pipe(ialu_reg);
13180 %}
13181 
13182 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13183 %{
13184   match(Set dst (AndI (AddI src minus_1) src) );
13185   predicate(UseBMI1Instructions);
13186   effect(KILL cr);
13187   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13188 
13189   format %{ "blsrl  $dst, $src" %}
13190 
13191   ins_encode %{
13192     __ blsrl($dst$$Register, $src$$Register);
13193   %}
13194 
13195   ins_pipe(ialu_reg_mem);
13196 %}
13197 
13198 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13199 %{
13200   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13201   predicate(UseBMI1Instructions);
13202   effect(KILL cr);
13203   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13204 
13205   ins_cost(125);
13206   format %{ "blsrl  $dst, $src" %}
13207 
13208   ins_encode %{
13209     __ blsrl($dst$$Register, $src$$Address);
13210   %}
13211 
13212   ins_pipe(ialu_reg);
13213 %}
13214 
13215 // Or Instructions
13216 // Or Register with Register
13217 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13218 %{
13219   predicate(!UseAPX);
13220   match(Set dst (OrI dst src));
13221   effect(KILL cr);
13222   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13223 
13224   format %{ "orl     $dst, $src\t# int" %}
13225   ins_encode %{
13226     __ orl($dst$$Register, $src$$Register);
13227   %}
13228   ins_pipe(ialu_reg_reg);
13229 %}
13230 
13231 // Or Register with Register using New Data Destination (NDD)
13232 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13233 %{
13234   predicate(UseAPX);
13235   match(Set dst (OrI src1 src2));
13236   effect(KILL cr);
13237   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13238 
13239   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13240   ins_encode %{
13241     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13242   %}
13243   ins_pipe(ialu_reg_reg);
13244 %}
13245 
13246 // Or Register with Immediate
13247 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13248 %{
13249   predicate(!UseAPX);
13250   match(Set dst (OrI dst src));
13251   effect(KILL cr);
13252   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13253 
13254   format %{ "orl     $dst, $src\t# int" %}
13255   ins_encode %{
13256     __ orl($dst$$Register, $src$$constant);
13257   %}
13258   ins_pipe(ialu_reg);
13259 %}
13260 
13261 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13262 %{
13263   predicate(UseAPX);
13264   match(Set dst (OrI src1 src2));
13265   effect(KILL cr);
13266   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13267 
13268   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13269   ins_encode %{
13270     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13271   %}
13272   ins_pipe(ialu_reg);
13273 %}
13274 
13275 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13276 %{
13277   predicate(UseAPX);
13278   match(Set dst (OrI src1 src2));
13279   effect(KILL cr);
13280   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13281 
13282   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
13283   ins_encode %{
13284     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13285   %}
13286   ins_pipe(ialu_reg);
13287 %}
13288 
13289 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13290 %{
13291   predicate(UseAPX);
13292   match(Set dst (OrI (LoadI src1) src2));
13293   effect(KILL cr);
13294   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13295 
13296   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13297   ins_encode %{
13298     __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13299   %}
13300   ins_pipe(ialu_reg);
13301 %}
13302 
13303 // Or Register with Memory
13304 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13305 %{
13306   predicate(!UseAPX);
13307   match(Set dst (OrI dst (LoadI src)));
13308   effect(KILL cr);
13309   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13310 
13311   ins_cost(150);
13312   format %{ "orl     $dst, $src\t# int" %}
13313   ins_encode %{
13314     __ orl($dst$$Register, $src$$Address);
13315   %}
13316   ins_pipe(ialu_reg_mem);
13317 %}
13318 
13319 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13320 %{
13321   predicate(UseAPX);
13322   match(Set dst (OrI src1 (LoadI src2)));
13323   effect(KILL cr);
13324   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13325 
13326   ins_cost(150);
13327   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13328   ins_encode %{
13329     __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13330   %}
13331   ins_pipe(ialu_reg_mem);
13332 %}
13333 
13334 // Or Memory with Register
13335 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13336 %{
13337   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13338   effect(KILL cr);
13339   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13340 
13341   ins_cost(150);
13342   format %{ "orb    $dst, $src\t# byte" %}
13343   ins_encode %{
13344     __ orb($dst$$Address, $src$$Register);
13345   %}
13346   ins_pipe(ialu_mem_reg);
13347 %}
13348 
13349 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13350 %{
13351   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13352   effect(KILL cr);
13353   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13354 
13355   ins_cost(150);
13356   format %{ "orl     $dst, $src\t# int" %}
13357   ins_encode %{
13358     __ orl($dst$$Address, $src$$Register);
13359   %}
13360   ins_pipe(ialu_mem_reg);
13361 %}
13362 
13363 // Or Memory with Immediate
13364 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13365 %{
13366   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13367   effect(KILL cr);
13368   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13369 
13370   ins_cost(125);
13371   format %{ "orl     $dst, $src\t# int" %}
13372   ins_encode %{
13373     __ orl($dst$$Address, $src$$constant);
13374   %}
13375   ins_pipe(ialu_mem_imm);
13376 %}
13377 
13378 // Xor Instructions
13379 // Xor Register with Register
13380 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13381 %{
13382   predicate(!UseAPX);
13383   match(Set dst (XorI dst src));
13384   effect(KILL cr);
13385   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13386 
13387   format %{ "xorl    $dst, $src\t# int" %}
13388   ins_encode %{
13389     __ xorl($dst$$Register, $src$$Register);
13390   %}
13391   ins_pipe(ialu_reg_reg);
13392 %}
13393 
13394 // Xor Register with Register using New Data Destination (NDD)
13395 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13396 %{
13397   predicate(UseAPX);
13398   match(Set dst (XorI src1 src2));
13399   effect(KILL cr);
13400   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13401 
13402   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13403   ins_encode %{
13404     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13405   %}
13406   ins_pipe(ialu_reg_reg);
13407 %}
13408 
13409 // Xor Register with Immediate -1
13410 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13411 %{
13412   predicate(!UseAPX);
13413   match(Set dst (XorI dst imm));
13414 
13415   format %{ "notl    $dst" %}
13416   ins_encode %{
13417      __ notl($dst$$Register);
13418   %}
13419   ins_pipe(ialu_reg);
13420 %}
13421 
13422 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13423 %{
13424   match(Set dst (XorI src imm));
13425   predicate(UseAPX);
13426   flag(PD::Flag_ndd_demotable);
13427 
13428   format %{ "enotl    $dst, $src" %}
13429   ins_encode %{
13430      __ enotl($dst$$Register, $src$$Register);
13431   %}
13432   ins_pipe(ialu_reg);
13433 %}
13434 
13435 // Xor Register with Immediate
13436 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13437 %{
13438   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13439   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13440   match(Set dst (XorI dst src));
13441   effect(KILL cr);
13442   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13443 
13444   format %{ "xorl    $dst, $src\t# int" %}
13445   ins_encode %{
13446     __ xorl($dst$$Register, $src$$constant);
13447   %}
13448   ins_pipe(ialu_reg);
13449 %}
13450 
13451 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13452 %{
13453   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13454   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13455   match(Set dst (XorI src1 src2));
13456   effect(KILL cr);
13457   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13458 
13459   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13460   ins_encode %{
13461     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13462   %}
13463   ins_pipe(ialu_reg);
13464 %}
13465 
13466 // Xor Memory with Immediate
13467 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13468 %{
13469   predicate(UseAPX);
13470   match(Set dst (XorI (LoadI src1) src2));
13471   effect(KILL cr);
13472   ins_cost(150);
13473   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13474 
13475   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13476   ins_encode %{
13477     __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13478   %}
13479   ins_pipe(ialu_reg);
13480 %}
13481 
13482 // Xor Register with Memory
13483 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13484 %{
13485   predicate(!UseAPX);
13486   match(Set dst (XorI dst (LoadI src)));
13487   effect(KILL cr);
13488   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13489 
13490   ins_cost(150);
13491   format %{ "xorl    $dst, $src\t# int" %}
13492   ins_encode %{
13493     __ xorl($dst$$Register, $src$$Address);
13494   %}
13495   ins_pipe(ialu_reg_mem);
13496 %}
13497 
13498 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13499 %{
13500   predicate(UseAPX);
13501   match(Set dst (XorI src1 (LoadI src2)));
13502   effect(KILL cr);
13503   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13504 
13505   ins_cost(150);
13506   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13507   ins_encode %{
13508     __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13509   %}
13510   ins_pipe(ialu_reg_mem);
13511 %}
13512 
13513 // Xor Memory with Register
13514 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13515 %{
13516   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13517   effect(KILL cr);
13518   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13519 
13520   ins_cost(150);
13521   format %{ "xorb    $dst, $src\t# byte" %}
13522   ins_encode %{
13523     __ xorb($dst$$Address, $src$$Register);
13524   %}
13525   ins_pipe(ialu_mem_reg);
13526 %}
13527 
13528 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13529 %{
13530   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13531   effect(KILL cr);
13532   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13533 
13534   ins_cost(150);
13535   format %{ "xorl    $dst, $src\t# int" %}
13536   ins_encode %{
13537     __ xorl($dst$$Address, $src$$Register);
13538   %}
13539   ins_pipe(ialu_mem_reg);
13540 %}
13541 
13542 // Xor Memory with Immediate
13543 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13544 %{
13545   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13546   effect(KILL cr);
13547   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13548 
13549   ins_cost(125);
13550   format %{ "xorl    $dst, $src\t# int" %}
13551   ins_encode %{
13552     __ xorl($dst$$Address, $src$$constant);
13553   %}
13554   ins_pipe(ialu_mem_imm);
13555 %}
13556 
13557 
13558 // Long Logical Instructions
13559 
13560 // And Instructions
13561 // And Register with Register
13562 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13563 %{
13564   predicate(!UseAPX);
13565   match(Set dst (AndL dst src));
13566   effect(KILL cr);
13567   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13568 
13569   format %{ "andq    $dst, $src\t# long" %}
13570   ins_encode %{
13571     __ andq($dst$$Register, $src$$Register);
13572   %}
13573   ins_pipe(ialu_reg_reg);
13574 %}
13575 
13576 // And Register with Register using New Data Destination (NDD)
13577 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13578 %{
13579   predicate(UseAPX);
13580   match(Set dst (AndL src1 src2));
13581   effect(KILL cr);
13582   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13583 
13584   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13585   ins_encode %{
13586     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13587 
13588   %}
13589   ins_pipe(ialu_reg_reg);
13590 %}
13591 
13592 // And Register with Immediate 255
13593 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13594 %{
13595   match(Set dst (AndL src mask));
13596 
13597   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13598   ins_encode %{
13599     // movzbl zeroes out the upper 32-bit and does not need REX.W
13600     __ movzbl($dst$$Register, $src$$Register);
13601   %}
13602   ins_pipe(ialu_reg);
13603 %}
13604 
13605 // And Register with Immediate 65535
13606 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13607 %{
13608   match(Set dst (AndL src mask));
13609 
13610   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13611   ins_encode %{
13612     // movzwl zeroes out the upper 32-bit and does not need REX.W
13613     __ movzwl($dst$$Register, $src$$Register);
13614   %}
13615   ins_pipe(ialu_reg);
13616 %}
13617 
13618 // And Register with Immediate
13619 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13620 %{
13621   predicate(!UseAPX);
13622   match(Set dst (AndL dst src));
13623   effect(KILL cr);
13624   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13625 
13626   format %{ "andq    $dst, $src\t# long" %}
13627   ins_encode %{
13628     __ andq($dst$$Register, $src$$constant);
13629   %}
13630   ins_pipe(ialu_reg);
13631 %}
13632 
13633 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13634 %{
13635   predicate(UseAPX);
13636   match(Set dst (AndL src1 src2));
13637   effect(KILL cr);
13638   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13639 
13640   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13641   ins_encode %{
13642     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13643   %}
13644   ins_pipe(ialu_reg);
13645 %}
13646 
13647 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13648 %{
13649   predicate(UseAPX);
13650   match(Set dst (AndL (LoadL src1) src2));
13651   effect(KILL cr);
13652   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13653 
13654   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13655   ins_encode %{
13656     __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13657   %}
13658   ins_pipe(ialu_reg);
13659 %}
13660 
13661 // And Register with Memory
13662 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13663 %{
13664   predicate(!UseAPX);
13665   match(Set dst (AndL dst (LoadL src)));
13666   effect(KILL cr);
13667   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13668 
13669   ins_cost(150);
13670   format %{ "andq    $dst, $src\t# long" %}
13671   ins_encode %{
13672     __ andq($dst$$Register, $src$$Address);
13673   %}
13674   ins_pipe(ialu_reg_mem);
13675 %}
13676 
13677 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13678 %{
13679   predicate(UseAPX);
13680   match(Set dst (AndL src1 (LoadL src2)));
13681   effect(KILL cr);
13682   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13683 
13684   ins_cost(150);
13685   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13686   ins_encode %{
13687     __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13688   %}
13689   ins_pipe(ialu_reg_mem);
13690 %}
13691 
13692 // And Memory with Register
13693 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13694 %{
13695   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13696   effect(KILL cr);
13697   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13698 
13699   ins_cost(150);
13700   format %{ "andq    $dst, $src\t# long" %}
13701   ins_encode %{
13702     __ andq($dst$$Address, $src$$Register);
13703   %}
13704   ins_pipe(ialu_mem_reg);
13705 %}
13706 
13707 // And Memory with Immediate
13708 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13709 %{
13710   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13711   effect(KILL cr);
13712   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13713 
13714   ins_cost(125);
13715   format %{ "andq    $dst, $src\t# long" %}
13716   ins_encode %{
13717     __ andq($dst$$Address, $src$$constant);
13718   %}
13719   ins_pipe(ialu_mem_imm);
13720 %}
13721 
13722 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13723 %{
13724   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13725   // because AND/OR works well enough for 8/32-bit values.
13726   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13727 
13728   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13729   effect(KILL cr);
13730 
13731   ins_cost(125);
13732   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13733   ins_encode %{
13734     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13735   %}
13736   ins_pipe(ialu_mem_imm);
13737 %}
13738 
13739 // BMI1 instructions
13740 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13741   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13742   predicate(UseBMI1Instructions);
13743   effect(KILL cr);
13744   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13745 
13746   ins_cost(125);
13747   format %{ "andnq  $dst, $src1, $src2" %}
13748 
13749   ins_encode %{
13750     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13751   %}
13752   ins_pipe(ialu_reg_mem);
13753 %}
13754 
13755 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13756   match(Set dst (AndL (XorL src1 minus_1) src2));
13757   predicate(UseBMI1Instructions);
13758   effect(KILL cr);
13759   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13760 
13761   format %{ "andnq  $dst, $src1, $src2" %}
13762 
13763   ins_encode %{
13764   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13765   %}
13766   ins_pipe(ialu_reg_mem);
13767 %}
13768 
13769 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13770   match(Set dst (AndL (SubL imm_zero src) src));
13771   predicate(UseBMI1Instructions);
13772   effect(KILL cr);
13773   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13774 
13775   format %{ "blsiq  $dst, $src" %}
13776 
13777   ins_encode %{
13778     __ blsiq($dst$$Register, $src$$Register);
13779   %}
13780   ins_pipe(ialu_reg);
13781 %}
13782 
13783 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13784   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13785   predicate(UseBMI1Instructions);
13786   effect(KILL cr);
13787   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13788 
13789   ins_cost(125);
13790   format %{ "blsiq  $dst, $src" %}
13791 
13792   ins_encode %{
13793     __ blsiq($dst$$Register, $src$$Address);
13794   %}
13795   ins_pipe(ialu_reg_mem);
13796 %}
13797 
13798 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13799 %{
13800   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13801   predicate(UseBMI1Instructions);
13802   effect(KILL cr);
13803   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13804 
13805   ins_cost(125);
13806   format %{ "blsmskq $dst, $src" %}
13807 
13808   ins_encode %{
13809     __ blsmskq($dst$$Register, $src$$Address);
13810   %}
13811   ins_pipe(ialu_reg_mem);
13812 %}
13813 
13814 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13815 %{
13816   match(Set dst (XorL (AddL src minus_1) src));
13817   predicate(UseBMI1Instructions);
13818   effect(KILL cr);
13819   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13820 
13821   format %{ "blsmskq $dst, $src" %}
13822 
13823   ins_encode %{
13824     __ blsmskq($dst$$Register, $src$$Register);
13825   %}
13826 
13827   ins_pipe(ialu_reg);
13828 %}
13829 
13830 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13831 %{
13832   match(Set dst (AndL (AddL src minus_1) src) );
13833   predicate(UseBMI1Instructions);
13834   effect(KILL cr);
13835   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13836 
13837   format %{ "blsrq  $dst, $src" %}
13838 
13839   ins_encode %{
13840     __ blsrq($dst$$Register, $src$$Register);
13841   %}
13842 
13843   ins_pipe(ialu_reg);
13844 %}
13845 
13846 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13847 %{
13848   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13849   predicate(UseBMI1Instructions);
13850   effect(KILL cr);
13851   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13852 
13853   ins_cost(125);
13854   format %{ "blsrq  $dst, $src" %}
13855 
13856   ins_encode %{
13857     __ blsrq($dst$$Register, $src$$Address);
13858   %}
13859 
13860   ins_pipe(ialu_reg);
13861 %}
13862 
13863 // Or Instructions
13864 // Or Register with Register
13865 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13866 %{
13867   predicate(!UseAPX);
13868   match(Set dst (OrL dst src));
13869   effect(KILL cr);
13870   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13871 
13872   format %{ "orq     $dst, $src\t# long" %}
13873   ins_encode %{
13874     __ orq($dst$$Register, $src$$Register);
13875   %}
13876   ins_pipe(ialu_reg_reg);
13877 %}
13878 
13879 // Or Register with Register using New Data Destination (NDD)
13880 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13881 %{
13882   predicate(UseAPX);
13883   match(Set dst (OrL src1 src2));
13884   effect(KILL cr);
13885   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
13886 
13887   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13888   ins_encode %{
13889     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13890 
13891   %}
13892   ins_pipe(ialu_reg_reg);
13893 %}
13894 
13895 // Use any_RegP to match R15 (TLS register) without spilling.
13896 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13897   match(Set dst (OrL dst (CastP2X src)));
13898   effect(KILL cr);
13899   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13900 
13901   format %{ "orq     $dst, $src\t# long" %}
13902   ins_encode %{
13903     __ orq($dst$$Register, $src$$Register);
13904   %}
13905   ins_pipe(ialu_reg_reg);
13906 %}
13907 
13908 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13909   match(Set dst (OrL src1 (CastP2X src2)));
13910   effect(KILL cr);
13911   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13912 
13913   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13914   ins_encode %{
13915     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13916   %}
13917   ins_pipe(ialu_reg_reg);
13918 %}
13919 
13920 // Or Register with Immediate
13921 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13922 %{
13923   predicate(!UseAPX);
13924   match(Set dst (OrL dst src));
13925   effect(KILL cr);
13926   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13927 
13928   format %{ "orq     $dst, $src\t# long" %}
13929   ins_encode %{
13930     __ orq($dst$$Register, $src$$constant);
13931   %}
13932   ins_pipe(ialu_reg);
13933 %}
13934 
13935 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13936 %{
13937   predicate(UseAPX);
13938   match(Set dst (OrL src1 src2));
13939   effect(KILL cr);
13940   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13941 
13942   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13943   ins_encode %{
13944     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13945   %}
13946   ins_pipe(ialu_reg);
13947 %}
13948 
13949 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13950 %{
13951   predicate(UseAPX);
13952   match(Set dst (OrL src1 src2));
13953   effect(KILL cr);
13954   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
13955 
13956   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
13957   ins_encode %{
13958     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
13959   %}
13960   ins_pipe(ialu_reg);
13961 %}
13962 
13963 // Or Memory with Immediate
13964 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13965 %{
13966   predicate(UseAPX);
13967   match(Set dst (OrL (LoadL src1) src2));
13968   effect(KILL cr);
13969   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13970 
13971   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13972   ins_encode %{
13973     __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
13974   %}
13975   ins_pipe(ialu_reg);
13976 %}
13977 
13978 // Or Register with Memory
13979 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13980 %{
13981   predicate(!UseAPX);
13982   match(Set dst (OrL dst (LoadL src)));
13983   effect(KILL cr);
13984   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13985 
13986   ins_cost(150);
13987   format %{ "orq     $dst, $src\t# long" %}
13988   ins_encode %{
13989     __ orq($dst$$Register, $src$$Address);
13990   %}
13991   ins_pipe(ialu_reg_mem);
13992 %}
13993 
13994 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13995 %{
13996   predicate(UseAPX);
13997   match(Set dst (OrL src1 (LoadL src2)));
13998   effect(KILL cr);
13999   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
14000 
14001   ins_cost(150);
14002   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14003   ins_encode %{
14004     __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14005   %}
14006   ins_pipe(ialu_reg_mem);
14007 %}
14008 
14009 // Or Memory with Register
14010 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14011 %{
14012   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14013   effect(KILL cr);
14014   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14015 
14016   ins_cost(150);
14017   format %{ "orq     $dst, $src\t# long" %}
14018   ins_encode %{
14019     __ orq($dst$$Address, $src$$Register);
14020   %}
14021   ins_pipe(ialu_mem_reg);
14022 %}
14023 
14024 // Or Memory with Immediate
14025 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14026 %{
14027   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14028   effect(KILL cr);
14029   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14030 
14031   ins_cost(125);
14032   format %{ "orq     $dst, $src\t# long" %}
14033   ins_encode %{
14034     __ orq($dst$$Address, $src$$constant);
14035   %}
14036   ins_pipe(ialu_mem_imm);
14037 %}
14038 
14039 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14040 %{
14041   // con should be a pure 64-bit power of 2 immediate
14042   // because AND/OR works well enough for 8/32-bit values.
14043   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14044 
14045   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14046   effect(KILL cr);
14047 
14048   ins_cost(125);
14049   format %{ "btsq    $dst, log2($con)\t# long" %}
14050   ins_encode %{
14051     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14052   %}
14053   ins_pipe(ialu_mem_imm);
14054 %}
14055 
14056 // Xor Instructions
14057 // Xor Register with Register
14058 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14059 %{
14060   predicate(!UseAPX);
14061   match(Set dst (XorL dst src));
14062   effect(KILL cr);
14063   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14064 
14065   format %{ "xorq    $dst, $src\t# long" %}
14066   ins_encode %{
14067     __ xorq($dst$$Register, $src$$Register);
14068   %}
14069   ins_pipe(ialu_reg_reg);
14070 %}
14071 
14072 // Xor Register with Register using New Data Destination (NDD)
14073 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14074 %{
14075   predicate(UseAPX);
14076   match(Set dst (XorL src1 src2));
14077   effect(KILL cr);
14078   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
14079 
14080   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14081   ins_encode %{
14082     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14083   %}
14084   ins_pipe(ialu_reg_reg);
14085 %}
14086 
14087 // Xor Register with Immediate -1
14088 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14089 %{
14090   predicate(!UseAPX);
14091   match(Set dst (XorL dst imm));
14092 
14093   format %{ "notq   $dst" %}
14094   ins_encode %{
14095      __ notq($dst$$Register);
14096   %}
14097   ins_pipe(ialu_reg);
14098 %}
14099 
14100 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14101 %{
14102   predicate(UseAPX);
14103   match(Set dst (XorL src imm));
14104   flag(PD::Flag_ndd_demotable);
14105 
14106   format %{ "enotq   $dst, $src" %}
14107   ins_encode %{
14108     __ enotq($dst$$Register, $src$$Register);
14109   %}
14110   ins_pipe(ialu_reg);
14111 %}
14112 
14113 // Xor Register with Immediate
14114 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14115 %{
14116   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14117   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14118   match(Set dst (XorL dst src));
14119   effect(KILL cr);
14120   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14121 
14122   format %{ "xorq    $dst, $src\t# long" %}
14123   ins_encode %{
14124     __ xorq($dst$$Register, $src$$constant);
14125   %}
14126   ins_pipe(ialu_reg);
14127 %}
14128 
14129 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14130 %{
14131   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14132   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14133   match(Set dst (XorL src1 src2));
14134   effect(KILL cr);
14135   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable);
14136 
14137   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14138   ins_encode %{
14139     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14140   %}
14141   ins_pipe(ialu_reg);
14142 %}
14143 
14144 // Xor Memory with Immediate
14145 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14146 %{
14147   predicate(UseAPX);
14148   match(Set dst (XorL (LoadL src1) src2));
14149   effect(KILL cr);
14150   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14151   ins_cost(150);
14152 
14153   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14154   ins_encode %{
14155     __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14156   %}
14157   ins_pipe(ialu_reg);
14158 %}
14159 
14160 // Xor Register with Memory
14161 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14162 %{
14163   predicate(!UseAPX);
14164   match(Set dst (XorL dst (LoadL src)));
14165   effect(KILL cr);
14166   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14167 
14168   ins_cost(150);
14169   format %{ "xorq    $dst, $src\t# long" %}
14170   ins_encode %{
14171     __ xorq($dst$$Register, $src$$Address);
14172   %}
14173   ins_pipe(ialu_reg_mem);
14174 %}
14175 
14176 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14177 %{
14178   predicate(UseAPX);
14179   match(Set dst (XorL src1 (LoadL src2)));
14180   effect(KILL cr);
14181   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_commutative);
14182 
14183   ins_cost(150);
14184   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14185   ins_encode %{
14186     __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14187   %}
14188   ins_pipe(ialu_reg_mem);
14189 %}
14190 
14191 // Xor Memory with Register
14192 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14193 %{
14194   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14195   effect(KILL cr);
14196   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14197 
14198   ins_cost(150);
14199   format %{ "xorq    $dst, $src\t# long" %}
14200   ins_encode %{
14201     __ xorq($dst$$Address, $src$$Register);
14202   %}
14203   ins_pipe(ialu_mem_reg);
14204 %}
14205 
14206 // Xor Memory with Immediate
14207 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14208 %{
14209   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14210   effect(KILL cr);
14211   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14212 
14213   ins_cost(125);
14214   format %{ "xorq    $dst, $src\t# long" %}
14215   ins_encode %{
14216     __ xorq($dst$$Address, $src$$constant);
14217   %}
14218   ins_pipe(ialu_mem_imm);
14219 %}
14220 
14221 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14222 %{
14223   match(Set dst (CmpLTMask p q));
14224   effect(KILL cr);
14225 
14226   ins_cost(400);
14227   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
14228             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
14229             "negl    $dst" %}
14230   ins_encode %{
14231     __ cmpl($p$$Register, $q$$Register);
14232     __ setcc(Assembler::less, $dst$$Register);
14233     __ negl($dst$$Register);
14234   %}
14235   ins_pipe(pipe_slow);
14236 %}
14237 
14238 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14239 %{
14240   match(Set dst (CmpLTMask dst zero));
14241   effect(KILL cr);
14242 
14243   ins_cost(100);
14244   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
14245   ins_encode %{
14246     __ sarl($dst$$Register, 31);
14247   %}
14248   ins_pipe(ialu_reg);
14249 %}
14250 
14251 /* Better to save a register than avoid a branch */
14252 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14253 %{
14254   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14255   effect(KILL cr);
14256   ins_cost(300);
14257   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
14258             "jge     done\n\t"
14259             "addl    $p,$y\n"
14260             "done:   " %}
14261   ins_encode %{
14262     Register Rp = $p$$Register;
14263     Register Rq = $q$$Register;
14264     Register Ry = $y$$Register;
14265     Label done;
14266     __ subl(Rp, Rq);
14267     __ jccb(Assembler::greaterEqual, done);
14268     __ addl(Rp, Ry);
14269     __ bind(done);
14270   %}
14271   ins_pipe(pipe_cmplt);
14272 %}
14273 
14274 /* Better to save a register than avoid a branch */
14275 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14276 %{
14277   match(Set y (AndI (CmpLTMask p q) y));
14278   effect(KILL cr);
14279 
14280   ins_cost(300);
14281 
14282   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
14283             "jlt     done\n\t"
14284             "xorl    $y, $y\n"
14285             "done:   " %}
14286   ins_encode %{
14287     Register Rp = $p$$Register;
14288     Register Rq = $q$$Register;
14289     Register Ry = $y$$Register;
14290     Label done;
14291     __ cmpl(Rp, Rq);
14292     __ jccb(Assembler::less, done);
14293     __ xorl(Ry, Ry);
14294     __ bind(done);
14295   %}
14296   ins_pipe(pipe_cmplt);
14297 %}
14298 
14299 
14300 //---------- FP Instructions------------------------------------------------
14301 
14302 // Really expensive, avoid
14303 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14304 %{
14305   match(Set cr (CmpF src1 src2));
14306 
14307   ins_cost(500);
14308   format %{ "ucomiss $src1, $src2\n\t"
14309             "jnp,s   exit\n\t"
14310             "pushfq\t# saw NaN, set CF\n\t"
14311             "andq    [rsp], #0xffffff2b\n\t"
14312             "popfq\n"
14313     "exit:" %}
14314   ins_encode %{
14315     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14316     emit_cmpfp_fixup(masm);
14317   %}
14318   ins_pipe(pipe_slow);
14319 %}
14320 
14321 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
14322   match(Set cr (CmpF src1 src2));
14323 
14324   ins_cost(100);
14325   format %{ "ucomiss $src1, $src2" %}
14326   ins_encode %{
14327     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14328   %}
14329   ins_pipe(pipe_slow);
14330 %}
14331 
14332 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14333   match(Set cr (CmpF src1 (LoadF src2)));
14334 
14335   ins_cost(100);
14336   format %{ "ucomiss $src1, $src2" %}
14337   ins_encode %{
14338     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14339   %}
14340   ins_pipe(pipe_slow);
14341 %}
14342 
14343 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14344   match(Set cr (CmpF src con));
14345   ins_cost(100);
14346   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14347   ins_encode %{
14348     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14349   %}
14350   ins_pipe(pipe_slow);
14351 %}
14352 
14353 // Really expensive, avoid
14354 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14355 %{
14356   match(Set cr (CmpD src1 src2));
14357 
14358   ins_cost(500);
14359   format %{ "ucomisd $src1, $src2\n\t"
14360             "jnp,s   exit\n\t"
14361             "pushfq\t# saw NaN, set CF\n\t"
14362             "andq    [rsp], #0xffffff2b\n\t"
14363             "popfq\n"
14364     "exit:" %}
14365   ins_encode %{
14366     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14367     emit_cmpfp_fixup(masm);
14368   %}
14369   ins_pipe(pipe_slow);
14370 %}
14371 
14372 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
14373   match(Set cr (CmpD src1 src2));
14374 
14375   ins_cost(100);
14376   format %{ "ucomisd $src1, $src2 test" %}
14377   ins_encode %{
14378     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14379   %}
14380   ins_pipe(pipe_slow);
14381 %}
14382 
14383 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14384   match(Set cr (CmpD src1 (LoadD src2)));
14385 
14386   ins_cost(100);
14387   format %{ "ucomisd $src1, $src2" %}
14388   ins_encode %{
14389     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14390   %}
14391   ins_pipe(pipe_slow);
14392 %}
14393 
14394 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14395   match(Set cr (CmpD src con));
14396   ins_cost(100);
14397   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14398   ins_encode %{
14399     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14400   %}
14401   ins_pipe(pipe_slow);
14402 %}
14403 
14404 // Compare into -1,0,1
14405 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14406 %{
14407   match(Set dst (CmpF3 src1 src2));
14408   effect(KILL cr);
14409 
14410   ins_cost(275);
14411   format %{ "ucomiss $src1, $src2\n\t"
14412             "movl    $dst, #-1\n\t"
14413             "jp,s    done\n\t"
14414             "jb,s    done\n\t"
14415             "setne   $dst\n\t"
14416             "movzbl  $dst, $dst\n"
14417     "done:" %}
14418   ins_encode %{
14419     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14420     emit_cmpfp3(masm, $dst$$Register);
14421   %}
14422   ins_pipe(pipe_slow);
14423 %}
14424 
14425 // Compare into -1,0,1
14426 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14427 %{
14428   match(Set dst (CmpF3 src1 (LoadF src2)));
14429   effect(KILL cr);
14430 
14431   ins_cost(275);
14432   format %{ "ucomiss $src1, $src2\n\t"
14433             "movl    $dst, #-1\n\t"
14434             "jp,s    done\n\t"
14435             "jb,s    done\n\t"
14436             "setne   $dst\n\t"
14437             "movzbl  $dst, $dst\n"
14438     "done:" %}
14439   ins_encode %{
14440     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14441     emit_cmpfp3(masm, $dst$$Register);
14442   %}
14443   ins_pipe(pipe_slow);
14444 %}
14445 
14446 // Compare into -1,0,1
14447 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14448   match(Set dst (CmpF3 src con));
14449   effect(KILL cr);
14450 
14451   ins_cost(275);
14452   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14453             "movl    $dst, #-1\n\t"
14454             "jp,s    done\n\t"
14455             "jb,s    done\n\t"
14456             "setne   $dst\n\t"
14457             "movzbl  $dst, $dst\n"
14458     "done:" %}
14459   ins_encode %{
14460     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14461     emit_cmpfp3(masm, $dst$$Register);
14462   %}
14463   ins_pipe(pipe_slow);
14464 %}
14465 
14466 // Compare into -1,0,1
14467 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14468 %{
14469   match(Set dst (CmpD3 src1 src2));
14470   effect(KILL cr);
14471 
14472   ins_cost(275);
14473   format %{ "ucomisd $src1, $src2\n\t"
14474             "movl    $dst, #-1\n\t"
14475             "jp,s    done\n\t"
14476             "jb,s    done\n\t"
14477             "setne   $dst\n\t"
14478             "movzbl  $dst, $dst\n"
14479     "done:" %}
14480   ins_encode %{
14481     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14482     emit_cmpfp3(masm, $dst$$Register);
14483   %}
14484   ins_pipe(pipe_slow);
14485 %}
14486 
14487 // Compare into -1,0,1
14488 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14489 %{
14490   match(Set dst (CmpD3 src1 (LoadD src2)));
14491   effect(KILL cr);
14492 
14493   ins_cost(275);
14494   format %{ "ucomisd $src1, $src2\n\t"
14495             "movl    $dst, #-1\n\t"
14496             "jp,s    done\n\t"
14497             "jb,s    done\n\t"
14498             "setne   $dst\n\t"
14499             "movzbl  $dst, $dst\n"
14500     "done:" %}
14501   ins_encode %{
14502     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14503     emit_cmpfp3(masm, $dst$$Register);
14504   %}
14505   ins_pipe(pipe_slow);
14506 %}
14507 
14508 // Compare into -1,0,1
14509 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14510   match(Set dst (CmpD3 src con));
14511   effect(KILL cr);
14512 
14513   ins_cost(275);
14514   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14515             "movl    $dst, #-1\n\t"
14516             "jp,s    done\n\t"
14517             "jb,s    done\n\t"
14518             "setne   $dst\n\t"
14519             "movzbl  $dst, $dst\n"
14520     "done:" %}
14521   ins_encode %{
14522     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14523     emit_cmpfp3(masm, $dst$$Register);
14524   %}
14525   ins_pipe(pipe_slow);
14526 %}
14527 
14528 //----------Arithmetic Conversion Instructions---------------------------------
14529 
14530 instruct convF2D_reg_reg(regD dst, regF src)
14531 %{
14532   match(Set dst (ConvF2D src));
14533 
14534   format %{ "cvtss2sd $dst, $src" %}
14535   ins_encode %{
14536     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14537   %}
14538   ins_pipe(pipe_slow); // XXX
14539 %}
14540 
14541 instruct convF2D_reg_mem(regD dst, memory src)
14542 %{
14543   predicate(UseAVX == 0);
14544   match(Set dst (ConvF2D (LoadF src)));
14545 
14546   format %{ "cvtss2sd $dst, $src" %}
14547   ins_encode %{
14548     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14549   %}
14550   ins_pipe(pipe_slow); // XXX
14551 %}
14552 
14553 instruct convD2F_reg_reg(regF dst, regD src)
14554 %{
14555   match(Set dst (ConvD2F src));
14556 
14557   format %{ "cvtsd2ss $dst, $src" %}
14558   ins_encode %{
14559     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14560   %}
14561   ins_pipe(pipe_slow); // XXX
14562 %}
14563 
14564 instruct convD2F_reg_mem(regF dst, memory src)
14565 %{
14566   predicate(UseAVX == 0);
14567   match(Set dst (ConvD2F (LoadD src)));
14568 
14569   format %{ "cvtsd2ss $dst, $src" %}
14570   ins_encode %{
14571     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14572   %}
14573   ins_pipe(pipe_slow); // XXX
14574 %}
14575 
14576 // XXX do mem variants
14577 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14578 %{
14579   predicate(!VM_Version::supports_avx10_2());
14580   match(Set dst (ConvF2I src));
14581   effect(KILL cr);
14582   format %{ "convert_f2i $dst, $src" %}
14583   ins_encode %{
14584     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14585   %}
14586   ins_pipe(pipe_slow);
14587 %}
14588 
14589 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14590 %{
14591   predicate(VM_Version::supports_avx10_2());
14592   match(Set dst (ConvF2I src));
14593   format %{ "evcvttss2sisl $dst, $src" %}
14594   ins_encode %{
14595     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14596   %}
14597   ins_pipe(pipe_slow);
14598 %}
14599 
14600 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14601 %{
14602   predicate(VM_Version::supports_avx10_2());
14603   match(Set dst (ConvF2I (LoadF src)));
14604   format %{ "evcvttss2sisl $dst, $src" %}
14605   ins_encode %{
14606     __ evcvttss2sisl($dst$$Register, $src$$Address);
14607   %}
14608   ins_pipe(pipe_slow);
14609 %}
14610 
14611 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14612 %{
14613   predicate(!VM_Version::supports_avx10_2());
14614   match(Set dst (ConvF2L src));
14615   effect(KILL cr);
14616   format %{ "convert_f2l $dst, $src"%}
14617   ins_encode %{
14618     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14619   %}
14620   ins_pipe(pipe_slow);
14621 %}
14622 
14623 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14624 %{
14625   predicate(VM_Version::supports_avx10_2());
14626   match(Set dst (ConvF2L src));
14627   format %{ "evcvttss2sisq $dst, $src" %}
14628   ins_encode %{
14629     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14630   %}
14631   ins_pipe(pipe_slow);
14632 %}
14633 
14634 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14635 %{
14636   predicate(VM_Version::supports_avx10_2());
14637   match(Set dst (ConvF2L (LoadF src)));
14638   format %{ "evcvttss2sisq $dst, $src" %}
14639   ins_encode %{
14640     __ evcvttss2sisq($dst$$Register, $src$$Address);
14641   %}
14642   ins_pipe(pipe_slow);
14643 %}
14644 
14645 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14646 %{
14647   predicate(!VM_Version::supports_avx10_2());
14648   match(Set dst (ConvD2I src));
14649   effect(KILL cr);
14650   format %{ "convert_d2i $dst, $src"%}
14651   ins_encode %{
14652     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14653   %}
14654   ins_pipe(pipe_slow);
14655 %}
14656 
14657 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14658 %{
14659   predicate(VM_Version::supports_avx10_2());
14660   match(Set dst (ConvD2I src));
14661   format %{ "evcvttsd2sisl $dst, $src" %}
14662   ins_encode %{
14663     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14664   %}
14665   ins_pipe(pipe_slow);
14666 %}
14667 
14668 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14669 %{
14670   predicate(VM_Version::supports_avx10_2());
14671   match(Set dst (ConvD2I (LoadD src)));
14672   format %{ "evcvttsd2sisl $dst, $src" %}
14673   ins_encode %{
14674     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14675   %}
14676   ins_pipe(pipe_slow);
14677 %}
14678 
14679 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14680 %{
14681   predicate(!VM_Version::supports_avx10_2());
14682   match(Set dst (ConvD2L src));
14683   effect(KILL cr);
14684   format %{ "convert_d2l $dst, $src"%}
14685   ins_encode %{
14686     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14687   %}
14688   ins_pipe(pipe_slow);
14689 %}
14690 
14691 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14692 %{
14693   predicate(VM_Version::supports_avx10_2());
14694   match(Set dst (ConvD2L src));
14695   format %{ "evcvttsd2sisq $dst, $src" %}
14696   ins_encode %{
14697     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14698   %}
14699   ins_pipe(pipe_slow);
14700 %}
14701 
14702 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14703 %{
14704   predicate(VM_Version::supports_avx10_2());
14705   match(Set dst (ConvD2L (LoadD src)));
14706   format %{ "evcvttsd2sisq $dst, $src" %}
14707   ins_encode %{
14708     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14709   %}
14710   ins_pipe(pipe_slow);
14711 %}
14712 
14713 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14714 %{
14715   match(Set dst (RoundD src));
14716   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14717   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14718   ins_encode %{
14719     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14720   %}
14721   ins_pipe(pipe_slow);
14722 %}
14723 
14724 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14725 %{
14726   match(Set dst (RoundF src));
14727   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14728   format %{ "round_float $dst,$src" %}
14729   ins_encode %{
14730     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14731   %}
14732   ins_pipe(pipe_slow);
14733 %}
14734 
14735 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14736 %{
14737   predicate(!UseXmmI2F);
14738   match(Set dst (ConvI2F src));
14739 
14740   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14741   ins_encode %{
14742     if (UseAVX > 0) {
14743       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14744     }
14745     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14746   %}
14747   ins_pipe(pipe_slow); // XXX
14748 %}
14749 
14750 instruct convI2F_reg_mem(regF dst, memory src)
14751 %{
14752   predicate(UseAVX == 0);
14753   match(Set dst (ConvI2F (LoadI src)));
14754 
14755   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14756   ins_encode %{
14757     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14758   %}
14759   ins_pipe(pipe_slow); // XXX
14760 %}
14761 
14762 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14763 %{
14764   predicate(!UseXmmI2D);
14765   match(Set dst (ConvI2D src));
14766 
14767   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14768   ins_encode %{
14769     if (UseAVX > 0) {
14770       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14771     }
14772     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14773   %}
14774   ins_pipe(pipe_slow); // XXX
14775 %}
14776 
14777 instruct convI2D_reg_mem(regD dst, memory src)
14778 %{
14779   predicate(UseAVX == 0);
14780   match(Set dst (ConvI2D (LoadI src)));
14781 
14782   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14783   ins_encode %{
14784     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14785   %}
14786   ins_pipe(pipe_slow); // XXX
14787 %}
14788 
14789 instruct convXI2F_reg(regF dst, rRegI src)
14790 %{
14791   predicate(UseXmmI2F);
14792   match(Set dst (ConvI2F src));
14793 
14794   format %{ "movdl $dst, $src\n\t"
14795             "cvtdq2psl $dst, $dst\t# i2f" %}
14796   ins_encode %{
14797     __ movdl($dst$$XMMRegister, $src$$Register);
14798     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14799   %}
14800   ins_pipe(pipe_slow); // XXX
14801 %}
14802 
14803 instruct convXI2D_reg(regD dst, rRegI src)
14804 %{
14805   predicate(UseXmmI2D);
14806   match(Set dst (ConvI2D src));
14807 
14808   format %{ "movdl $dst, $src\n\t"
14809             "cvtdq2pdl $dst, $dst\t# i2d" %}
14810   ins_encode %{
14811     __ movdl($dst$$XMMRegister, $src$$Register);
14812     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14813   %}
14814   ins_pipe(pipe_slow); // XXX
14815 %}
14816 
14817 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14818 %{
14819   match(Set dst (ConvL2F src));
14820 
14821   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14822   ins_encode %{
14823     if (UseAVX > 0) {
14824       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14825     }
14826     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14827   %}
14828   ins_pipe(pipe_slow); // XXX
14829 %}
14830 
14831 instruct convL2F_reg_mem(regF dst, memory src)
14832 %{
14833   predicate(UseAVX == 0);
14834   match(Set dst (ConvL2F (LoadL src)));
14835 
14836   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14837   ins_encode %{
14838     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14839   %}
14840   ins_pipe(pipe_slow); // XXX
14841 %}
14842 
14843 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14844 %{
14845   match(Set dst (ConvL2D src));
14846 
14847   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14848   ins_encode %{
14849     if (UseAVX > 0) {
14850       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14851     }
14852     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14853   %}
14854   ins_pipe(pipe_slow); // XXX
14855 %}
14856 
14857 instruct convL2D_reg_mem(regD dst, memory src)
14858 %{
14859   predicate(UseAVX == 0);
14860   match(Set dst (ConvL2D (LoadL src)));
14861 
14862   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14863   ins_encode %{
14864     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14865   %}
14866   ins_pipe(pipe_slow); // XXX
14867 %}
14868 
14869 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14870 %{
14871   match(Set dst (ConvI2L src));
14872 
14873   ins_cost(125);
14874   format %{ "movslq  $dst, $src\t# i2l" %}
14875   ins_encode %{
14876     __ movslq($dst$$Register, $src$$Register);
14877   %}
14878   ins_pipe(ialu_reg_reg);
14879 %}
14880 
14881 // Zero-extend convert int to long
14882 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14883 %{
14884   match(Set dst (AndL (ConvI2L src) mask));
14885 
14886   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14887   ins_encode %{
14888     if ($dst$$reg != $src$$reg) {
14889       __ movl($dst$$Register, $src$$Register);
14890     }
14891   %}
14892   ins_pipe(ialu_reg_reg);
14893 %}
14894 
14895 // Zero-extend convert int to long
14896 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14897 %{
14898   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14899 
14900   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14901   ins_encode %{
14902     __ movl($dst$$Register, $src$$Address);
14903   %}
14904   ins_pipe(ialu_reg_mem);
14905 %}
14906 
14907 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14908 %{
14909   match(Set dst (AndL src mask));
14910 
14911   format %{ "movl    $dst, $src\t# zero-extend long" %}
14912   ins_encode %{
14913     __ movl($dst$$Register, $src$$Register);
14914   %}
14915   ins_pipe(ialu_reg_reg);
14916 %}
14917 
14918 instruct convL2I_reg_reg(rRegI dst, rRegL src)
14919 %{
14920   match(Set dst (ConvL2I src));
14921 
14922   format %{ "movl    $dst, $src\t# l2i" %}
14923   ins_encode %{
14924     __ movl($dst$$Register, $src$$Register);
14925   %}
14926   ins_pipe(ialu_reg_reg);
14927 %}
14928 
14929 
14930 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
14931   match(Set dst (MoveF2I src));
14932   effect(DEF dst, USE src);
14933 
14934   ins_cost(125);
14935   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
14936   ins_encode %{
14937     __ movl($dst$$Register, Address(rsp, $src$$disp));
14938   %}
14939   ins_pipe(ialu_reg_mem);
14940 %}
14941 
14942 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
14943   match(Set dst (MoveI2F src));
14944   effect(DEF dst, USE src);
14945 
14946   ins_cost(125);
14947   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
14948   ins_encode %{
14949     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
14950   %}
14951   ins_pipe(pipe_slow);
14952 %}
14953 
14954 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
14955   match(Set dst (MoveD2L src));
14956   effect(DEF dst, USE src);
14957 
14958   ins_cost(125);
14959   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
14960   ins_encode %{
14961     __ movq($dst$$Register, Address(rsp, $src$$disp));
14962   %}
14963   ins_pipe(ialu_reg_mem);
14964 %}
14965 
14966 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
14967   predicate(!UseXmmLoadAndClearUpper);
14968   match(Set dst (MoveL2D src));
14969   effect(DEF dst, USE src);
14970 
14971   ins_cost(125);
14972   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
14973   ins_encode %{
14974     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14975   %}
14976   ins_pipe(pipe_slow);
14977 %}
14978 
14979 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
14980   predicate(UseXmmLoadAndClearUpper);
14981   match(Set dst (MoveL2D src));
14982   effect(DEF dst, USE src);
14983 
14984   ins_cost(125);
14985   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
14986   ins_encode %{
14987     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14988   %}
14989   ins_pipe(pipe_slow);
14990 %}
14991 
14992 
14993 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
14994   match(Set dst (MoveF2I src));
14995   effect(DEF dst, USE src);
14996 
14997   ins_cost(95); // XXX
14998   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
14999   ins_encode %{
15000     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15001   %}
15002   ins_pipe(pipe_slow);
15003 %}
15004 
15005 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15006   match(Set dst (MoveI2F src));
15007   effect(DEF dst, USE src);
15008 
15009   ins_cost(100);
15010   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
15011   ins_encode %{
15012     __ movl(Address(rsp, $dst$$disp), $src$$Register);
15013   %}
15014   ins_pipe( ialu_mem_reg );
15015 %}
15016 
15017 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15018   match(Set dst (MoveD2L src));
15019   effect(DEF dst, USE src);
15020 
15021   ins_cost(95); // XXX
15022   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
15023   ins_encode %{
15024     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15025   %}
15026   ins_pipe(pipe_slow);
15027 %}
15028 
15029 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15030   match(Set dst (MoveL2D src));
15031   effect(DEF dst, USE src);
15032 
15033   ins_cost(100);
15034   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
15035   ins_encode %{
15036     __ movq(Address(rsp, $dst$$disp), $src$$Register);
15037   %}
15038   ins_pipe(ialu_mem_reg);
15039 %}
15040 
15041 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15042   match(Set dst (MoveF2I src));
15043   effect(DEF dst, USE src);
15044   ins_cost(85);
15045   format %{ "movd    $dst,$src\t# MoveF2I" %}
15046   ins_encode %{
15047     __ movdl($dst$$Register, $src$$XMMRegister);
15048   %}
15049   ins_pipe( pipe_slow );
15050 %}
15051 
15052 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15053   match(Set dst (MoveD2L src));
15054   effect(DEF dst, USE src);
15055   ins_cost(85);
15056   format %{ "movd    $dst,$src\t# MoveD2L" %}
15057   ins_encode %{
15058     __ movdq($dst$$Register, $src$$XMMRegister);
15059   %}
15060   ins_pipe( pipe_slow );
15061 %}
15062 
15063 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15064   match(Set dst (MoveI2F src));
15065   effect(DEF dst, USE src);
15066   ins_cost(100);
15067   format %{ "movd    $dst,$src\t# MoveI2F" %}
15068   ins_encode %{
15069     __ movdl($dst$$XMMRegister, $src$$Register);
15070   %}
15071   ins_pipe( pipe_slow );
15072 %}
15073 
15074 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15075   match(Set dst (MoveL2D src));
15076   effect(DEF dst, USE src);
15077   ins_cost(100);
15078   format %{ "movd    $dst,$src\t# MoveL2D" %}
15079   ins_encode %{
15080      __ movdq($dst$$XMMRegister, $src$$Register);
15081   %}
15082   ins_pipe( pipe_slow );
15083 %}
15084 
15085 // Fast clearing of an array
15086 // Small non-constant lenght ClearArray for non-AVX512 targets.
15087 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15088                   Universe dummy, rFlagsReg cr)
15089 %{
15090   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15091   match(Set dummy (ClearArray cnt base));
15092   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15093 
15094   format %{ $$template
15095     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15096     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15097     $$emit$$"jg      LARGE\n\t"
15098     $$emit$$"dec     rcx\n\t"
15099     $$emit$$"js      DONE\t# Zero length\n\t"
15100     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15101     $$emit$$"dec     rcx\n\t"
15102     $$emit$$"jge     LOOP\n\t"
15103     $$emit$$"jmp     DONE\n\t"
15104     $$emit$$"# LARGE:\n\t"
15105     if (UseFastStosb) {
15106        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15107        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15108     } else if (UseXMMForObjInit) {
15109        $$emit$$"mov     rdi,rax\n\t"
15110        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15111        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15112        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15113        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15114        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15115        $$emit$$"add     0x40,rax\n\t"
15116        $$emit$$"# L_zero_64_bytes:\n\t"
15117        $$emit$$"sub     0x8,rcx\n\t"
15118        $$emit$$"jge     L_loop\n\t"
15119        $$emit$$"add     0x4,rcx\n\t"
15120        $$emit$$"jl      L_tail\n\t"
15121        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15122        $$emit$$"add     0x20,rax\n\t"
15123        $$emit$$"sub     0x4,rcx\n\t"
15124        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15125        $$emit$$"add     0x4,rcx\n\t"
15126        $$emit$$"jle     L_end\n\t"
15127        $$emit$$"dec     rcx\n\t"
15128        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15129        $$emit$$"vmovq   xmm0,(rax)\n\t"
15130        $$emit$$"add     0x8,rax\n\t"
15131        $$emit$$"dec     rcx\n\t"
15132        $$emit$$"jge     L_sloop\n\t"
15133        $$emit$$"# L_end:\n\t"
15134     } else {
15135        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15136     }
15137     $$emit$$"# DONE"
15138   %}
15139   ins_encode %{
15140     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15141                  $tmp$$XMMRegister, false, knoreg);
15142   %}
15143   ins_pipe(pipe_slow);
15144 %}
15145 
15146 // Small non-constant length ClearArray for AVX512 targets.
15147 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15148                        Universe dummy, rFlagsReg cr)
15149 %{
15150   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15151   match(Set dummy (ClearArray cnt base));
15152   ins_cost(125);
15153   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15154 
15155   format %{ $$template
15156     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15157     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15158     $$emit$$"jg      LARGE\n\t"
15159     $$emit$$"dec     rcx\n\t"
15160     $$emit$$"js      DONE\t# Zero length\n\t"
15161     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15162     $$emit$$"dec     rcx\n\t"
15163     $$emit$$"jge     LOOP\n\t"
15164     $$emit$$"jmp     DONE\n\t"
15165     $$emit$$"# LARGE:\n\t"
15166     if (UseFastStosb) {
15167        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15168        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15169     } else if (UseXMMForObjInit) {
15170        $$emit$$"mov     rdi,rax\n\t"
15171        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15172        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15173        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15174        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15175        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15176        $$emit$$"add     0x40,rax\n\t"
15177        $$emit$$"# L_zero_64_bytes:\n\t"
15178        $$emit$$"sub     0x8,rcx\n\t"
15179        $$emit$$"jge     L_loop\n\t"
15180        $$emit$$"add     0x4,rcx\n\t"
15181        $$emit$$"jl      L_tail\n\t"
15182        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15183        $$emit$$"add     0x20,rax\n\t"
15184        $$emit$$"sub     0x4,rcx\n\t"
15185        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15186        $$emit$$"add     0x4,rcx\n\t"
15187        $$emit$$"jle     L_end\n\t"
15188        $$emit$$"dec     rcx\n\t"
15189        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15190        $$emit$$"vmovq   xmm0,(rax)\n\t"
15191        $$emit$$"add     0x8,rax\n\t"
15192        $$emit$$"dec     rcx\n\t"
15193        $$emit$$"jge     L_sloop\n\t"
15194        $$emit$$"# L_end:\n\t"
15195     } else {
15196        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15197     }
15198     $$emit$$"# DONE"
15199   %}
15200   ins_encode %{
15201     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15202                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15203   %}
15204   ins_pipe(pipe_slow);
15205 %}
15206 
15207 // Large non-constant length ClearArray for non-AVX512 targets.
15208 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15209                         Universe dummy, rFlagsReg cr)
15210 %{
15211   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15212   match(Set dummy (ClearArray cnt base));
15213   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15214 
15215   format %{ $$template
15216     if (UseFastStosb) {
15217        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15218        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15219        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15220     } else if (UseXMMForObjInit) {
15221        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15222        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15223        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15224        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15225        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15226        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15227        $$emit$$"add     0x40,rax\n\t"
15228        $$emit$$"# L_zero_64_bytes:\n\t"
15229        $$emit$$"sub     0x8,rcx\n\t"
15230        $$emit$$"jge     L_loop\n\t"
15231        $$emit$$"add     0x4,rcx\n\t"
15232        $$emit$$"jl      L_tail\n\t"
15233        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15234        $$emit$$"add     0x20,rax\n\t"
15235        $$emit$$"sub     0x4,rcx\n\t"
15236        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15237        $$emit$$"add     0x4,rcx\n\t"
15238        $$emit$$"jle     L_end\n\t"
15239        $$emit$$"dec     rcx\n\t"
15240        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15241        $$emit$$"vmovq   xmm0,(rax)\n\t"
15242        $$emit$$"add     0x8,rax\n\t"
15243        $$emit$$"dec     rcx\n\t"
15244        $$emit$$"jge     L_sloop\n\t"
15245        $$emit$$"# L_end:\n\t"
15246     } else {
15247        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15248        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15249     }
15250   %}
15251   ins_encode %{
15252     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15253                  $tmp$$XMMRegister, true, knoreg);
15254   %}
15255   ins_pipe(pipe_slow);
15256 %}
15257 
15258 // Large non-constant length ClearArray for AVX512 targets.
15259 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15260                              Universe dummy, rFlagsReg cr)
15261 %{
15262   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15263   match(Set dummy (ClearArray cnt base));
15264   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15265 
15266   format %{ $$template
15267     if (UseFastStosb) {
15268        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15269        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15270        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15271     } else if (UseXMMForObjInit) {
15272        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15273        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15274        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15275        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15276        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15277        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15278        $$emit$$"add     0x40,rax\n\t"
15279        $$emit$$"# L_zero_64_bytes:\n\t"
15280        $$emit$$"sub     0x8,rcx\n\t"
15281        $$emit$$"jge     L_loop\n\t"
15282        $$emit$$"add     0x4,rcx\n\t"
15283        $$emit$$"jl      L_tail\n\t"
15284        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15285        $$emit$$"add     0x20,rax\n\t"
15286        $$emit$$"sub     0x4,rcx\n\t"
15287        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15288        $$emit$$"add     0x4,rcx\n\t"
15289        $$emit$$"jle     L_end\n\t"
15290        $$emit$$"dec     rcx\n\t"
15291        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15292        $$emit$$"vmovq   xmm0,(rax)\n\t"
15293        $$emit$$"add     0x8,rax\n\t"
15294        $$emit$$"dec     rcx\n\t"
15295        $$emit$$"jge     L_sloop\n\t"
15296        $$emit$$"# L_end:\n\t"
15297     } else {
15298        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15299        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15300     }
15301   %}
15302   ins_encode %{
15303     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15304                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15305   %}
15306   ins_pipe(pipe_slow);
15307 %}
15308 
15309 // Small constant length ClearArray for AVX512 targets.
15310 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15311 %{
15312   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15313   match(Set dummy (ClearArray cnt base));
15314   ins_cost(100);
15315   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15316   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15317   ins_encode %{
15318    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15319   %}
15320   ins_pipe(pipe_slow);
15321 %}
15322 
15323 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15324                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15325 %{
15326   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15327   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15328   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15329 
15330   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15331   ins_encode %{
15332     __ string_compare($str1$$Register, $str2$$Register,
15333                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15334                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15335   %}
15336   ins_pipe( pipe_slow );
15337 %}
15338 
15339 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15340                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15341 %{
15342   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15343   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15344   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15345 
15346   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15347   ins_encode %{
15348     __ string_compare($str1$$Register, $str2$$Register,
15349                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15350                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15351   %}
15352   ins_pipe( pipe_slow );
15353 %}
15354 
15355 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15356                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15357 %{
15358   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15359   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15360   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15361 
15362   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15363   ins_encode %{
15364     __ string_compare($str1$$Register, $str2$$Register,
15365                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15366                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15367   %}
15368   ins_pipe( pipe_slow );
15369 %}
15370 
15371 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15372                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15373 %{
15374   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15375   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15376   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15377 
15378   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15379   ins_encode %{
15380     __ string_compare($str1$$Register, $str2$$Register,
15381                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15382                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15383   %}
15384   ins_pipe( pipe_slow );
15385 %}
15386 
15387 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15388                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15389 %{
15390   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15391   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15392   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15393 
15394   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15395   ins_encode %{
15396     __ string_compare($str1$$Register, $str2$$Register,
15397                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15398                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15399   %}
15400   ins_pipe( pipe_slow );
15401 %}
15402 
15403 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15404                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15405 %{
15406   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15407   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15408   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15409 
15410   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15411   ins_encode %{
15412     __ string_compare($str1$$Register, $str2$$Register,
15413                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15414                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15415   %}
15416   ins_pipe( pipe_slow );
15417 %}
15418 
15419 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15420                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15421 %{
15422   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15423   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15424   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15425 
15426   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15427   ins_encode %{
15428     __ string_compare($str2$$Register, $str1$$Register,
15429                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15430                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15431   %}
15432   ins_pipe( pipe_slow );
15433 %}
15434 
15435 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15436                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15437 %{
15438   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15439   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15440   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15441 
15442   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15443   ins_encode %{
15444     __ string_compare($str2$$Register, $str1$$Register,
15445                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15446                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15447   %}
15448   ins_pipe( pipe_slow );
15449 %}
15450 
15451 // fast search of substring with known size.
15452 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15453                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15454 %{
15455   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15456   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15457   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15458 
15459   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15460   ins_encode %{
15461     int icnt2 = (int)$int_cnt2$$constant;
15462     if (icnt2 >= 16) {
15463       // IndexOf for constant substrings with size >= 16 elements
15464       // which don't need to be loaded through stack.
15465       __ string_indexofC8($str1$$Register, $str2$$Register,
15466                           $cnt1$$Register, $cnt2$$Register,
15467                           icnt2, $result$$Register,
15468                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15469     } else {
15470       // Small strings are loaded through stack if they cross page boundary.
15471       __ string_indexof($str1$$Register, $str2$$Register,
15472                         $cnt1$$Register, $cnt2$$Register,
15473                         icnt2, $result$$Register,
15474                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15475     }
15476   %}
15477   ins_pipe( pipe_slow );
15478 %}
15479 
15480 // fast search of substring with known size.
15481 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15482                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15483 %{
15484   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15485   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15486   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15487 
15488   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15489   ins_encode %{
15490     int icnt2 = (int)$int_cnt2$$constant;
15491     if (icnt2 >= 8) {
15492       // IndexOf for constant substrings with size >= 8 elements
15493       // which don't need to be loaded through stack.
15494       __ string_indexofC8($str1$$Register, $str2$$Register,
15495                           $cnt1$$Register, $cnt2$$Register,
15496                           icnt2, $result$$Register,
15497                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15498     } else {
15499       // Small strings are loaded through stack if they cross page boundary.
15500       __ string_indexof($str1$$Register, $str2$$Register,
15501                         $cnt1$$Register, $cnt2$$Register,
15502                         icnt2, $result$$Register,
15503                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15504     }
15505   %}
15506   ins_pipe( pipe_slow );
15507 %}
15508 
15509 // fast search of substring with known size.
15510 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15511                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15512 %{
15513   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15514   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15515   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15516 
15517   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15518   ins_encode %{
15519     int icnt2 = (int)$int_cnt2$$constant;
15520     if (icnt2 >= 8) {
15521       // IndexOf for constant substrings with size >= 8 elements
15522       // which don't need to be loaded through stack.
15523       __ string_indexofC8($str1$$Register, $str2$$Register,
15524                           $cnt1$$Register, $cnt2$$Register,
15525                           icnt2, $result$$Register,
15526                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15527     } else {
15528       // Small strings are loaded through stack if they cross page boundary.
15529       __ string_indexof($str1$$Register, $str2$$Register,
15530                         $cnt1$$Register, $cnt2$$Register,
15531                         icnt2, $result$$Register,
15532                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15533     }
15534   %}
15535   ins_pipe( pipe_slow );
15536 %}
15537 
15538 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15539                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15540 %{
15541   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15542   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15543   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15544 
15545   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15546   ins_encode %{
15547     __ string_indexof($str1$$Register, $str2$$Register,
15548                       $cnt1$$Register, $cnt2$$Register,
15549                       (-1), $result$$Register,
15550                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15551   %}
15552   ins_pipe( pipe_slow );
15553 %}
15554 
15555 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15556                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15557 %{
15558   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15559   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15560   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15561 
15562   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15563   ins_encode %{
15564     __ string_indexof($str1$$Register, $str2$$Register,
15565                       $cnt1$$Register, $cnt2$$Register,
15566                       (-1), $result$$Register,
15567                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15568   %}
15569   ins_pipe( pipe_slow );
15570 %}
15571 
15572 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15573                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15574 %{
15575   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15576   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15577   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15578 
15579   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15580   ins_encode %{
15581     __ string_indexof($str1$$Register, $str2$$Register,
15582                       $cnt1$$Register, $cnt2$$Register,
15583                       (-1), $result$$Register,
15584                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15585   %}
15586   ins_pipe( pipe_slow );
15587 %}
15588 
15589 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15590                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15591 %{
15592   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15593   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15594   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15595   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15596   ins_encode %{
15597     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15598                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15599   %}
15600   ins_pipe( pipe_slow );
15601 %}
15602 
15603 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15604                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15605 %{
15606   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15607   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15608   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15609   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15610   ins_encode %{
15611     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15612                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15613   %}
15614   ins_pipe( pipe_slow );
15615 %}
15616 
15617 // fast string equals
15618 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15619                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15620 %{
15621   predicate(!VM_Version::supports_avx512vlbw());
15622   match(Set result (StrEquals (Binary str1 str2) cnt));
15623   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15624 
15625   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15626   ins_encode %{
15627     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15628                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15629                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15630   %}
15631   ins_pipe( pipe_slow );
15632 %}
15633 
15634 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15635                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15636 %{
15637   predicate(VM_Version::supports_avx512vlbw());
15638   match(Set result (StrEquals (Binary str1 str2) cnt));
15639   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15640 
15641   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15642   ins_encode %{
15643     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15644                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15645                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15646   %}
15647   ins_pipe( pipe_slow );
15648 %}
15649 
15650 // fast array equals
15651 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15652                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15653 %{
15654   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15655   match(Set result (AryEq ary1 ary2));
15656   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15657 
15658   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15659   ins_encode %{
15660     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15661                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15662                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15663   %}
15664   ins_pipe( pipe_slow );
15665 %}
15666 
15667 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15668                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15669 %{
15670   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15671   match(Set result (AryEq ary1 ary2));
15672   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15673 
15674   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15675   ins_encode %{
15676     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15677                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15678                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15679   %}
15680   ins_pipe( pipe_slow );
15681 %}
15682 
15683 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15684                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15685 %{
15686   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15687   match(Set result (AryEq ary1 ary2));
15688   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15689 
15690   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15691   ins_encode %{
15692     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15693                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15694                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15695   %}
15696   ins_pipe( pipe_slow );
15697 %}
15698 
15699 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15700                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15701 %{
15702   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15703   match(Set result (AryEq ary1 ary2));
15704   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15705 
15706   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15707   ins_encode %{
15708     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15709                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15710                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15711   %}
15712   ins_pipe( pipe_slow );
15713 %}
15714 
15715 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15716                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15717                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15718                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15719                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15720 %{
15721   predicate(UseAVX >= 2);
15722   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15723   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15724          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15725          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15726          USE basic_type, KILL cr);
15727 
15728   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
15729   ins_encode %{
15730     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15731                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15732                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15733                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15734                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15735                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15736                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15737   %}
15738   ins_pipe( pipe_slow );
15739 %}
15740 
15741 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15742                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15743 %{
15744   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15745   match(Set result (CountPositives ary1 len));
15746   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15747 
15748   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15749   ins_encode %{
15750     __ count_positives($ary1$$Register, $len$$Register,
15751                        $result$$Register, $tmp3$$Register,
15752                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15753   %}
15754   ins_pipe( pipe_slow );
15755 %}
15756 
15757 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15758                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15759 %{
15760   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15761   match(Set result (CountPositives ary1 len));
15762   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15763 
15764   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15765   ins_encode %{
15766     __ count_positives($ary1$$Register, $len$$Register,
15767                        $result$$Register, $tmp3$$Register,
15768                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15769   %}
15770   ins_pipe( pipe_slow );
15771 %}
15772 
15773 // fast char[] to byte[] compression
15774 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15775                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15776   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15777   match(Set result (StrCompressedCopy src (Binary dst len)));
15778   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15779          USE_KILL len, KILL tmp5, KILL cr);
15780 
15781   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15782   ins_encode %{
15783     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15784                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15785                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15786                            knoreg, knoreg);
15787   %}
15788   ins_pipe( pipe_slow );
15789 %}
15790 
15791 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15792                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15793   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15794   match(Set result (StrCompressedCopy src (Binary dst len)));
15795   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15796          USE_KILL len, KILL tmp5, KILL cr);
15797 
15798   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15799   ins_encode %{
15800     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15801                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15802                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15803                            $ktmp1$$KRegister, $ktmp2$$KRegister);
15804   %}
15805   ins_pipe( pipe_slow );
15806 %}
15807 // fast byte[] to char[] inflation
15808 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15809                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15810   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15811   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15812   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15813 
15814   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15815   ins_encode %{
15816     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15817                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15818   %}
15819   ins_pipe( pipe_slow );
15820 %}
15821 
15822 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15823                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15824   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15825   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15826   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15827 
15828   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15829   ins_encode %{
15830     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15831                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15832   %}
15833   ins_pipe( pipe_slow );
15834 %}
15835 
15836 // encode char[] to byte[] in ISO_8859_1
15837 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15838                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15839                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15840   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15841   match(Set result (EncodeISOArray src (Binary dst len)));
15842   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15843 
15844   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15845   ins_encode %{
15846     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15847                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15848                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15849   %}
15850   ins_pipe( pipe_slow );
15851 %}
15852 
15853 // encode char[] to byte[] in ASCII
15854 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15855                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15856                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15857   predicate(((EncodeISOArrayNode*)n)->is_ascii());
15858   match(Set result (EncodeISOArray src (Binary dst len)));
15859   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15860 
15861   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15862   ins_encode %{
15863     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15864                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15865                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15866   %}
15867   ins_pipe( pipe_slow );
15868 %}
15869 
15870 //----------Overflow Math Instructions-----------------------------------------
15871 
15872 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15873 %{
15874   match(Set cr (OverflowAddI op1 op2));
15875   effect(DEF cr, USE_KILL op1, USE op2);
15876 
15877   format %{ "addl    $op1, $op2\t# overflow check int" %}
15878 
15879   ins_encode %{
15880     __ addl($op1$$Register, $op2$$Register);
15881   %}
15882   ins_pipe(ialu_reg_reg);
15883 %}
15884 
15885 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
15886 %{
15887   match(Set cr (OverflowAddI op1 op2));
15888   effect(DEF cr, USE_KILL op1, USE op2);
15889 
15890   format %{ "addl    $op1, $op2\t# overflow check int" %}
15891 
15892   ins_encode %{
15893     __ addl($op1$$Register, $op2$$constant);
15894   %}
15895   ins_pipe(ialu_reg_reg);
15896 %}
15897 
15898 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15899 %{
15900   match(Set cr (OverflowAddL op1 op2));
15901   effect(DEF cr, USE_KILL op1, USE op2);
15902 
15903   format %{ "addq    $op1, $op2\t# overflow check long" %}
15904   ins_encode %{
15905     __ addq($op1$$Register, $op2$$Register);
15906   %}
15907   ins_pipe(ialu_reg_reg);
15908 %}
15909 
15910 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
15911 %{
15912   match(Set cr (OverflowAddL op1 op2));
15913   effect(DEF cr, USE_KILL op1, USE op2);
15914 
15915   format %{ "addq    $op1, $op2\t# overflow check long" %}
15916   ins_encode %{
15917     __ addq($op1$$Register, $op2$$constant);
15918   %}
15919   ins_pipe(ialu_reg_reg);
15920 %}
15921 
15922 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15923 %{
15924   match(Set cr (OverflowSubI op1 op2));
15925 
15926   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
15927   ins_encode %{
15928     __ cmpl($op1$$Register, $op2$$Register);
15929   %}
15930   ins_pipe(ialu_reg_reg);
15931 %}
15932 
15933 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15934 %{
15935   match(Set cr (OverflowSubI op1 op2));
15936 
15937   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
15938   ins_encode %{
15939     __ cmpl($op1$$Register, $op2$$constant);
15940   %}
15941   ins_pipe(ialu_reg_reg);
15942 %}
15943 
15944 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15945 %{
15946   match(Set cr (OverflowSubL op1 op2));
15947 
15948   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
15949   ins_encode %{
15950     __ cmpq($op1$$Register, $op2$$Register);
15951   %}
15952   ins_pipe(ialu_reg_reg);
15953 %}
15954 
15955 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15956 %{
15957   match(Set cr (OverflowSubL op1 op2));
15958 
15959   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
15960   ins_encode %{
15961     __ cmpq($op1$$Register, $op2$$constant);
15962   %}
15963   ins_pipe(ialu_reg_reg);
15964 %}
15965 
15966 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
15967 %{
15968   match(Set cr (OverflowSubI zero op2));
15969   effect(DEF cr, USE_KILL op2);
15970 
15971   format %{ "negl    $op2\t# overflow check int" %}
15972   ins_encode %{
15973     __ negl($op2$$Register);
15974   %}
15975   ins_pipe(ialu_reg_reg);
15976 %}
15977 
15978 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
15979 %{
15980   match(Set cr (OverflowSubL zero op2));
15981   effect(DEF cr, USE_KILL op2);
15982 
15983   format %{ "negq    $op2\t# overflow check long" %}
15984   ins_encode %{
15985     __ negq($op2$$Register);
15986   %}
15987   ins_pipe(ialu_reg_reg);
15988 %}
15989 
15990 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15991 %{
15992   match(Set cr (OverflowMulI op1 op2));
15993   effect(DEF cr, USE_KILL op1, USE op2);
15994 
15995   format %{ "imull    $op1, $op2\t# overflow check int" %}
15996   ins_encode %{
15997     __ imull($op1$$Register, $op2$$Register);
15998   %}
15999   ins_pipe(ialu_reg_reg_alu0);
16000 %}
16001 
16002 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16003 %{
16004   match(Set cr (OverflowMulI op1 op2));
16005   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16006 
16007   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
16008   ins_encode %{
16009     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16010   %}
16011   ins_pipe(ialu_reg_reg_alu0);
16012 %}
16013 
16014 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16015 %{
16016   match(Set cr (OverflowMulL op1 op2));
16017   effect(DEF cr, USE_KILL op1, USE op2);
16018 
16019   format %{ "imulq    $op1, $op2\t# overflow check long" %}
16020   ins_encode %{
16021     __ imulq($op1$$Register, $op2$$Register);
16022   %}
16023   ins_pipe(ialu_reg_reg_alu0);
16024 %}
16025 
16026 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16027 %{
16028   match(Set cr (OverflowMulL op1 op2));
16029   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16030 
16031   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
16032   ins_encode %{
16033     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16034   %}
16035   ins_pipe(ialu_reg_reg_alu0);
16036 %}
16037 
16038 
16039 //----------Control Flow Instructions------------------------------------------
16040 // Signed compare Instructions
16041 
16042 // XXX more variants!!
16043 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16044 %{
16045   match(Set cr (CmpI op1 op2));
16046   effect(DEF cr, USE op1, USE op2);
16047 
16048   format %{ "cmpl    $op1, $op2" %}
16049   ins_encode %{
16050     __ cmpl($op1$$Register, $op2$$Register);
16051   %}
16052   ins_pipe(ialu_cr_reg_reg);
16053 %}
16054 
16055 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16056 %{
16057   match(Set cr (CmpI op1 op2));
16058 
16059   format %{ "cmpl    $op1, $op2" %}
16060   ins_encode %{
16061     __ cmpl($op1$$Register, $op2$$constant);
16062   %}
16063   ins_pipe(ialu_cr_reg_imm);
16064 %}
16065 
16066 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16067 %{
16068   match(Set cr (CmpI op1 (LoadI op2)));
16069 
16070   ins_cost(500); // XXX
16071   format %{ "cmpl    $op1, $op2" %}
16072   ins_encode %{
16073     __ cmpl($op1$$Register, $op2$$Address);
16074   %}
16075   ins_pipe(ialu_cr_reg_mem);
16076 %}
16077 
16078 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16079 %{
16080   match(Set cr (CmpI src zero));
16081 
16082   format %{ "testl   $src, $src" %}
16083   ins_encode %{
16084     __ testl($src$$Register, $src$$Register);
16085   %}
16086   ins_pipe(ialu_cr_reg_imm);
16087 %}
16088 
16089 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16090 %{
16091   match(Set cr (CmpI (AndI src con) zero));
16092 
16093   format %{ "testl   $src, $con" %}
16094   ins_encode %{
16095     __ testl($src$$Register, $con$$constant);
16096   %}
16097   ins_pipe(ialu_cr_reg_imm);
16098 %}
16099 
16100 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16101 %{
16102   match(Set cr (CmpI (AndI src1 src2) zero));
16103 
16104   format %{ "testl   $src1, $src2" %}
16105   ins_encode %{
16106     __ testl($src1$$Register, $src2$$Register);
16107   %}
16108   ins_pipe(ialu_cr_reg_imm);
16109 %}
16110 
16111 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16112 %{
16113   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16114 
16115   format %{ "testl   $src, $mem" %}
16116   ins_encode %{
16117     __ testl($src$$Register, $mem$$Address);
16118   %}
16119   ins_pipe(ialu_cr_reg_mem);
16120 %}
16121 
16122 // Unsigned compare Instructions; really, same as signed except they
16123 // produce an rFlagsRegU instead of rFlagsReg.
16124 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16125 %{
16126   match(Set cr (CmpU op1 op2));
16127 
16128   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16129   ins_encode %{
16130     __ cmpl($op1$$Register, $op2$$Register);
16131   %}
16132   ins_pipe(ialu_cr_reg_reg);
16133 %}
16134 
16135 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16136 %{
16137   match(Set cr (CmpU op1 op2));
16138 
16139   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16140   ins_encode %{
16141     __ cmpl($op1$$Register, $op2$$constant);
16142   %}
16143   ins_pipe(ialu_cr_reg_imm);
16144 %}
16145 
16146 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16147 %{
16148   match(Set cr (CmpU op1 (LoadI op2)));
16149 
16150   ins_cost(500); // XXX
16151   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16152   ins_encode %{
16153     __ cmpl($op1$$Register, $op2$$Address);
16154   %}
16155   ins_pipe(ialu_cr_reg_mem);
16156 %}
16157 
16158 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16159 %{
16160   match(Set cr (CmpU src zero));
16161 
16162   format %{ "testl   $src, $src\t# unsigned" %}
16163   ins_encode %{
16164     __ testl($src$$Register, $src$$Register);
16165   %}
16166   ins_pipe(ialu_cr_reg_imm);
16167 %}
16168 
16169 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16170 %{
16171   match(Set cr (CmpP op1 op2));
16172 
16173   format %{ "cmpq    $op1, $op2\t# ptr" %}
16174   ins_encode %{
16175     __ cmpq($op1$$Register, $op2$$Register);
16176   %}
16177   ins_pipe(ialu_cr_reg_reg);
16178 %}
16179 
16180 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16181 %{
16182   match(Set cr (CmpP op1 (LoadP op2)));
16183   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16184 
16185   ins_cost(500); // XXX
16186   format %{ "cmpq    $op1, $op2\t# ptr" %}
16187   ins_encode %{
16188     __ cmpq($op1$$Register, $op2$$Address);
16189   %}
16190   ins_pipe(ialu_cr_reg_mem);
16191 %}
16192 
16193 // XXX this is generalized by compP_rReg_mem???
16194 // Compare raw pointer (used in out-of-heap check).
16195 // Only works because non-oop pointers must be raw pointers
16196 // and raw pointers have no anti-dependencies.
16197 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16198 %{
16199   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16200             n->in(2)->as_Load()->barrier_data() == 0);
16201   match(Set cr (CmpP op1 (LoadP op2)));
16202 
16203   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
16204   ins_encode %{
16205     __ cmpq($op1$$Register, $op2$$Address);
16206   %}
16207   ins_pipe(ialu_cr_reg_mem);
16208 %}
16209 
16210 // This will generate a signed flags result. This should be OK since
16211 // any compare to a zero should be eq/neq.
16212 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16213 %{
16214   match(Set cr (CmpP src zero));
16215 
16216   format %{ "testq   $src, $src\t# ptr" %}
16217   ins_encode %{
16218     __ testq($src$$Register, $src$$Register);
16219   %}
16220   ins_pipe(ialu_cr_reg_imm);
16221 %}
16222 
16223 // This will generate a signed flags result. This should be OK since
16224 // any compare to a zero should be eq/neq.
16225 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16226 %{
16227   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16228             n->in(1)->as_Load()->barrier_data() == 0);
16229   match(Set cr (CmpP (LoadP op) zero));
16230 
16231   ins_cost(500); // XXX
16232   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
16233   ins_encode %{
16234     __ testq($op$$Address, 0xFFFFFFFF);
16235   %}
16236   ins_pipe(ialu_cr_reg_imm);
16237 %}
16238 
16239 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16240 %{
16241   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16242             n->in(1)->as_Load()->barrier_data() == 0);
16243   match(Set cr (CmpP (LoadP mem) zero));
16244 
16245   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
16246   ins_encode %{
16247     __ cmpq(r12, $mem$$Address);
16248   %}
16249   ins_pipe(ialu_cr_reg_mem);
16250 %}
16251 
16252 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16253 %{
16254   match(Set cr (CmpN op1 op2));
16255 
16256   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16257   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16258   ins_pipe(ialu_cr_reg_reg);
16259 %}
16260 
16261 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16262 %{
16263   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16264   match(Set cr (CmpN src (LoadN mem)));
16265 
16266   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
16267   ins_encode %{
16268     __ cmpl($src$$Register, $mem$$Address);
16269   %}
16270   ins_pipe(ialu_cr_reg_mem);
16271 %}
16272 
16273 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16274   match(Set cr (CmpN op1 op2));
16275 
16276   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16277   ins_encode %{
16278     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16279   %}
16280   ins_pipe(ialu_cr_reg_imm);
16281 %}
16282 
16283 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16284 %{
16285   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16286   match(Set cr (CmpN src (LoadN mem)));
16287 
16288   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
16289   ins_encode %{
16290     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16291   %}
16292   ins_pipe(ialu_cr_reg_mem);
16293 %}
16294 
16295 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16296   match(Set cr (CmpN op1 op2));
16297 
16298   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
16299   ins_encode %{
16300     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16301   %}
16302   ins_pipe(ialu_cr_reg_imm);
16303 %}
16304 
16305 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16306 %{
16307   predicate(!UseCompactObjectHeaders);
16308   match(Set cr (CmpN src (LoadNKlass mem)));
16309 
16310   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
16311   ins_encode %{
16312     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16313   %}
16314   ins_pipe(ialu_cr_reg_mem);
16315 %}
16316 
16317 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16318   match(Set cr (CmpN src zero));
16319 
16320   format %{ "testl   $src, $src\t# compressed ptr" %}
16321   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16322   ins_pipe(ialu_cr_reg_imm);
16323 %}
16324 
16325 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16326 %{
16327   predicate(CompressedOops::base() != nullptr &&
16328             n->in(1)->as_Load()->barrier_data() == 0);
16329   match(Set cr (CmpN (LoadN mem) zero));
16330 
16331   ins_cost(500); // XXX
16332   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
16333   ins_encode %{
16334     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16335   %}
16336   ins_pipe(ialu_cr_reg_mem);
16337 %}
16338 
16339 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16340 %{
16341   predicate(CompressedOops::base() == nullptr &&
16342             n->in(1)->as_Load()->barrier_data() == 0);
16343   match(Set cr (CmpN (LoadN mem) zero));
16344 
16345   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16346   ins_encode %{
16347     __ cmpl(r12, $mem$$Address);
16348   %}
16349   ins_pipe(ialu_cr_reg_mem);
16350 %}
16351 
16352 // Yanked all unsigned pointer compare operations.
16353 // Pointer compares are done with CmpP which is already unsigned.
16354 
16355 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16356 %{
16357   match(Set cr (CmpL op1 op2));
16358 
16359   format %{ "cmpq    $op1, $op2" %}
16360   ins_encode %{
16361     __ cmpq($op1$$Register, $op2$$Register);
16362   %}
16363   ins_pipe(ialu_cr_reg_reg);
16364 %}
16365 
16366 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16367 %{
16368   match(Set cr (CmpL op1 op2));
16369 
16370   format %{ "cmpq    $op1, $op2" %}
16371   ins_encode %{
16372     __ cmpq($op1$$Register, $op2$$constant);
16373   %}
16374   ins_pipe(ialu_cr_reg_imm);
16375 %}
16376 
16377 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16378 %{
16379   match(Set cr (CmpL op1 (LoadL op2)));
16380 
16381   format %{ "cmpq    $op1, $op2" %}
16382   ins_encode %{
16383     __ cmpq($op1$$Register, $op2$$Address);
16384   %}
16385   ins_pipe(ialu_cr_reg_mem);
16386 %}
16387 
16388 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16389 %{
16390   match(Set cr (CmpL src zero));
16391 
16392   format %{ "testq   $src, $src" %}
16393   ins_encode %{
16394     __ testq($src$$Register, $src$$Register);
16395   %}
16396   ins_pipe(ialu_cr_reg_imm);
16397 %}
16398 
16399 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16400 %{
16401   match(Set cr (CmpL (AndL src con) zero));
16402 
16403   format %{ "testq   $src, $con\t# long" %}
16404   ins_encode %{
16405     __ testq($src$$Register, $con$$constant);
16406   %}
16407   ins_pipe(ialu_cr_reg_imm);
16408 %}
16409 
16410 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16411 %{
16412   match(Set cr (CmpL (AndL src1 src2) zero));
16413 
16414   format %{ "testq   $src1, $src2\t# long" %}
16415   ins_encode %{
16416     __ testq($src1$$Register, $src2$$Register);
16417   %}
16418   ins_pipe(ialu_cr_reg_imm);
16419 %}
16420 
16421 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16422 %{
16423   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16424 
16425   format %{ "testq   $src, $mem" %}
16426   ins_encode %{
16427     __ testq($src$$Register, $mem$$Address);
16428   %}
16429   ins_pipe(ialu_cr_reg_mem);
16430 %}
16431 
16432 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16433 %{
16434   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16435 
16436   format %{ "testq   $src, $mem" %}
16437   ins_encode %{
16438     __ testq($src$$Register, $mem$$Address);
16439   %}
16440   ins_pipe(ialu_cr_reg_mem);
16441 %}
16442 
16443 // Manifest a CmpU result in an integer register.  Very painful.
16444 // This is the test to avoid.
16445 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16446 %{
16447   match(Set dst (CmpU3 src1 src2));
16448   effect(KILL flags);
16449 
16450   ins_cost(275); // XXX
16451   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16452             "movl    $dst, -1\n\t"
16453             "jb,u    done\n\t"
16454             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16455     "done:" %}
16456   ins_encode %{
16457     Label done;
16458     __ cmpl($src1$$Register, $src2$$Register);
16459     __ movl($dst$$Register, -1);
16460     __ jccb(Assembler::below, done);
16461     __ setcc(Assembler::notZero, $dst$$Register);
16462     __ bind(done);
16463   %}
16464   ins_pipe(pipe_slow);
16465 %}
16466 
16467 // Manifest a CmpL result in an integer register.  Very painful.
16468 // This is the test to avoid.
16469 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16470 %{
16471   match(Set dst (CmpL3 src1 src2));
16472   effect(KILL flags);
16473 
16474   ins_cost(275); // XXX
16475   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16476             "movl    $dst, -1\n\t"
16477             "jl,s    done\n\t"
16478             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16479     "done:" %}
16480   ins_encode %{
16481     Label done;
16482     __ cmpq($src1$$Register, $src2$$Register);
16483     __ movl($dst$$Register, -1);
16484     __ jccb(Assembler::less, done);
16485     __ setcc(Assembler::notZero, $dst$$Register);
16486     __ bind(done);
16487   %}
16488   ins_pipe(pipe_slow);
16489 %}
16490 
16491 // Manifest a CmpUL result in an integer register.  Very painful.
16492 // This is the test to avoid.
16493 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16494 %{
16495   match(Set dst (CmpUL3 src1 src2));
16496   effect(KILL flags);
16497 
16498   ins_cost(275); // XXX
16499   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16500             "movl    $dst, -1\n\t"
16501             "jb,u    done\n\t"
16502             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16503     "done:" %}
16504   ins_encode %{
16505     Label done;
16506     __ cmpq($src1$$Register, $src2$$Register);
16507     __ movl($dst$$Register, -1);
16508     __ jccb(Assembler::below, done);
16509     __ setcc(Assembler::notZero, $dst$$Register);
16510     __ bind(done);
16511   %}
16512   ins_pipe(pipe_slow);
16513 %}
16514 
16515 // Unsigned long compare Instructions; really, same as signed long except they
16516 // produce an rFlagsRegU instead of rFlagsReg.
16517 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16518 %{
16519   match(Set cr (CmpUL op1 op2));
16520 
16521   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16522   ins_encode %{
16523     __ cmpq($op1$$Register, $op2$$Register);
16524   %}
16525   ins_pipe(ialu_cr_reg_reg);
16526 %}
16527 
16528 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16529 %{
16530   match(Set cr (CmpUL op1 op2));
16531 
16532   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16533   ins_encode %{
16534     __ cmpq($op1$$Register, $op2$$constant);
16535   %}
16536   ins_pipe(ialu_cr_reg_imm);
16537 %}
16538 
16539 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16540 %{
16541   match(Set cr (CmpUL op1 (LoadL op2)));
16542 
16543   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16544   ins_encode %{
16545     __ cmpq($op1$$Register, $op2$$Address);
16546   %}
16547   ins_pipe(ialu_cr_reg_mem);
16548 %}
16549 
16550 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16551 %{
16552   match(Set cr (CmpUL src zero));
16553 
16554   format %{ "testq   $src, $src\t# unsigned" %}
16555   ins_encode %{
16556     __ testq($src$$Register, $src$$Register);
16557   %}
16558   ins_pipe(ialu_cr_reg_imm);
16559 %}
16560 
16561 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16562 %{
16563   match(Set cr (CmpI (LoadB mem) imm));
16564 
16565   ins_cost(125);
16566   format %{ "cmpb    $mem, $imm" %}
16567   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16568   ins_pipe(ialu_cr_reg_mem);
16569 %}
16570 
16571 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16572 %{
16573   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16574 
16575   ins_cost(125);
16576   format %{ "testb   $mem, $imm\t# ubyte" %}
16577   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16578   ins_pipe(ialu_cr_reg_mem);
16579 %}
16580 
16581 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16582 %{
16583   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16584 
16585   ins_cost(125);
16586   format %{ "testb   $mem, $imm\t# byte" %}
16587   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16588   ins_pipe(ialu_cr_reg_mem);
16589 %}
16590 
16591 //----------Max and Min--------------------------------------------------------
16592 // Min Instructions
16593 
16594 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16595 %{
16596   predicate(!UseAPX);
16597   effect(USE_DEF dst, USE src, USE cr);
16598 
16599   format %{ "cmovlgt $dst, $src\t# min" %}
16600   ins_encode %{
16601     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16602   %}
16603   ins_pipe(pipe_cmov_reg);
16604 %}
16605 
16606 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16607 %{
16608   predicate(UseAPX);
16609   effect(DEF dst, USE src1, USE src2, USE cr);
16610 
16611   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16612   ins_encode %{
16613     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16614   %}
16615   ins_pipe(pipe_cmov_reg);
16616 %}
16617 
16618 instruct minI_rReg(rRegI dst, rRegI src)
16619 %{
16620   predicate(!UseAPX);
16621   match(Set dst (MinI dst src));
16622 
16623   ins_cost(200);
16624   expand %{
16625     rFlagsReg cr;
16626     compI_rReg(cr, dst, src);
16627     cmovI_reg_g(dst, src, cr);
16628   %}
16629 %}
16630 
16631 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16632 %{
16633   predicate(UseAPX);
16634   match(Set dst (MinI src1 src2));
16635   effect(DEF dst, USE src1, USE src2);
16636   flag(PD::Flag_ndd_demotable);
16637 
16638   ins_cost(200);
16639   expand %{
16640     rFlagsReg cr;
16641     compI_rReg(cr, src1, src2);
16642     cmovI_reg_g_ndd(dst, src1, src2, cr);
16643   %}
16644 %}
16645 
16646 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16647 %{
16648   predicate(!UseAPX);
16649   effect(USE_DEF dst, USE src, USE cr);
16650 
16651   format %{ "cmovllt $dst, $src\t# max" %}
16652   ins_encode %{
16653     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16654   %}
16655   ins_pipe(pipe_cmov_reg);
16656 %}
16657 
16658 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16659 %{
16660   predicate(UseAPX);
16661   effect(DEF dst, USE src1, USE src2, USE cr);
16662 
16663   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16664   ins_encode %{
16665     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16666   %}
16667   ins_pipe(pipe_cmov_reg);
16668 %}
16669 
16670 instruct maxI_rReg(rRegI dst, rRegI src)
16671 %{
16672   predicate(!UseAPX);
16673   match(Set dst (MaxI dst src));
16674 
16675   ins_cost(200);
16676   expand %{
16677     rFlagsReg cr;
16678     compI_rReg(cr, dst, src);
16679     cmovI_reg_l(dst, src, cr);
16680   %}
16681 %}
16682 
16683 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16684 %{
16685   predicate(UseAPX);
16686   match(Set dst (MaxI src1 src2));
16687   effect(DEF dst, USE src1, USE src2);
16688   flag(PD::Flag_ndd_demotable);
16689 
16690   ins_cost(200);
16691   expand %{
16692     rFlagsReg cr;
16693     compI_rReg(cr, src1, src2);
16694     cmovI_reg_l_ndd(dst, src1, src2, cr);
16695   %}
16696 %}
16697 
16698 // ============================================================================
16699 // Branch Instructions
16700 
16701 // Jump Direct - Label defines a relative address from JMP+1
16702 instruct jmpDir(label labl)
16703 %{
16704   match(Goto);
16705   effect(USE labl);
16706 
16707   ins_cost(300);
16708   format %{ "jmp     $labl" %}
16709   size(5);
16710   ins_encode %{
16711     Label* L = $labl$$label;
16712     __ jmp(*L, false); // Always long jump
16713   %}
16714   ins_pipe(pipe_jmp);
16715 %}
16716 
16717 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16718 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16719 %{
16720   match(If cop cr);
16721   effect(USE labl);
16722 
16723   ins_cost(300);
16724   format %{ "j$cop     $labl" %}
16725   size(6);
16726   ins_encode %{
16727     Label* L = $labl$$label;
16728     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16729   %}
16730   ins_pipe(pipe_jcc);
16731 %}
16732 
16733 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16734 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16735 %{
16736   match(CountedLoopEnd cop cr);
16737   effect(USE labl);
16738 
16739   ins_cost(300);
16740   format %{ "j$cop     $labl\t# loop end" %}
16741   size(6);
16742   ins_encode %{
16743     Label* L = $labl$$label;
16744     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16745   %}
16746   ins_pipe(pipe_jcc);
16747 %}
16748 
16749 // Jump Direct Conditional - using unsigned comparison
16750 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16751   match(If cop cmp);
16752   effect(USE labl);
16753 
16754   ins_cost(300);
16755   format %{ "j$cop,u   $labl" %}
16756   size(6);
16757   ins_encode %{
16758     Label* L = $labl$$label;
16759     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16760   %}
16761   ins_pipe(pipe_jcc);
16762 %}
16763 
16764 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16765   match(If cop cmp);
16766   effect(USE labl);
16767 
16768   ins_cost(200);
16769   format %{ "j$cop,u   $labl" %}
16770   size(6);
16771   ins_encode %{
16772     Label* L = $labl$$label;
16773     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16774   %}
16775   ins_pipe(pipe_jcc);
16776 %}
16777 
16778 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16779   match(If cop cmp);
16780   effect(USE labl);
16781 
16782   ins_cost(200);
16783   format %{ $$template
16784     if ($cop$$cmpcode == Assembler::notEqual) {
16785       $$emit$$"jp,u    $labl\n\t"
16786       $$emit$$"j$cop,u   $labl"
16787     } else {
16788       $$emit$$"jp,u    done\n\t"
16789       $$emit$$"j$cop,u   $labl\n\t"
16790       $$emit$$"done:"
16791     }
16792   %}
16793   ins_encode %{
16794     Label* l = $labl$$label;
16795     if ($cop$$cmpcode == Assembler::notEqual) {
16796       __ jcc(Assembler::parity, *l, false);
16797       __ jcc(Assembler::notEqual, *l, false);
16798     } else if ($cop$$cmpcode == Assembler::equal) {
16799       Label done;
16800       __ jccb(Assembler::parity, done);
16801       __ jcc(Assembler::equal, *l, false);
16802       __ bind(done);
16803     } else {
16804        ShouldNotReachHere();
16805     }
16806   %}
16807   ins_pipe(pipe_jcc);
16808 %}
16809 
16810 // ============================================================================
16811 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
16812 // superklass array for an instance of the superklass.  Set a hidden
16813 // internal cache on a hit (cache is checked with exposed code in
16814 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
16815 // encoding ALSO sets flags.
16816 
16817 instruct partialSubtypeCheck(rdi_RegP result,
16818                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16819                              rFlagsReg cr)
16820 %{
16821   match(Set result (PartialSubtypeCheck sub super));
16822   predicate(!UseSecondarySupersTable);
16823   effect(KILL rcx, KILL cr);
16824 
16825   ins_cost(1100);  // slightly larger than the next version
16826   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16827             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16828             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16829             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16830             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
16831             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16832             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
16833     "miss:\t" %}
16834 
16835   ins_encode %{
16836     Label miss;
16837     // NB: Callers may assume that, when $result is a valid register,
16838     // check_klass_subtype_slow_path_linear sets it to a nonzero
16839     // value.
16840     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16841                                             $rcx$$Register, $result$$Register,
16842                                             nullptr, &miss,
16843                                             /*set_cond_codes:*/ true);
16844     __ xorptr($result$$Register, $result$$Register);
16845     __ bind(miss);
16846   %}
16847 
16848   ins_pipe(pipe_slow);
16849 %}
16850 
16851 // ============================================================================
16852 // Two versions of hashtable-based partialSubtypeCheck, both used when
16853 // we need to search for a super class in the secondary supers array.
16854 // The first is used when we don't know _a priori_ the class being
16855 // searched for. The second, far more common, is used when we do know:
16856 // this is used for instanceof, checkcast, and any case where C2 can
16857 // determine it by constant propagation.
16858 
16859 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
16860                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16861                                        rFlagsReg cr)
16862 %{
16863   match(Set result (PartialSubtypeCheck sub super));
16864   predicate(UseSecondarySupersTable);
16865   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16866 
16867   ins_cost(1000);
16868   format %{ "partialSubtypeCheck $result, $sub, $super" %}
16869 
16870   ins_encode %{
16871     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
16872 					 $temp3$$Register, $temp4$$Register, $result$$Register);
16873   %}
16874 
16875   ins_pipe(pipe_slow);
16876 %}
16877 
16878 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
16879                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16880                                        rFlagsReg cr)
16881 %{
16882   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
16883   predicate(UseSecondarySupersTable);
16884   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16885 
16886   ins_cost(700);  // smaller than the next version
16887   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
16888 
16889   ins_encode %{
16890     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
16891     if (InlineSecondarySupersTest) {
16892       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
16893                                        $temp3$$Register, $temp4$$Register, $result$$Register,
16894                                        super_klass_slot);
16895     } else {
16896       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
16897     }
16898   %}
16899 
16900   ins_pipe(pipe_slow);
16901 %}
16902 
16903 // ============================================================================
16904 // Branch Instructions -- short offset versions
16905 //
16906 // These instructions are used to replace jumps of a long offset (the default
16907 // match) with jumps of a shorter offset.  These instructions are all tagged
16908 // with the ins_short_branch attribute, which causes the ADLC to suppress the
16909 // match rules in general matching.  Instead, the ADLC generates a conversion
16910 // method in the MachNode which can be used to do in-place replacement of the
16911 // long variant with the shorter variant.  The compiler will determine if a
16912 // branch can be taken by the is_short_branch_offset() predicate in the machine
16913 // specific code section of the file.
16914 
16915 // Jump Direct - Label defines a relative address from JMP+1
16916 instruct jmpDir_short(label labl) %{
16917   match(Goto);
16918   effect(USE labl);
16919 
16920   ins_cost(300);
16921   format %{ "jmp,s   $labl" %}
16922   size(2);
16923   ins_encode %{
16924     Label* L = $labl$$label;
16925     __ jmpb(*L);
16926   %}
16927   ins_pipe(pipe_jmp);
16928   ins_short_branch(1);
16929 %}
16930 
16931 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16932 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
16933   match(If cop cr);
16934   effect(USE labl);
16935 
16936   ins_cost(300);
16937   format %{ "j$cop,s   $labl" %}
16938   size(2);
16939   ins_encode %{
16940     Label* L = $labl$$label;
16941     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16942   %}
16943   ins_pipe(pipe_jcc);
16944   ins_short_branch(1);
16945 %}
16946 
16947 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16948 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
16949   match(CountedLoopEnd cop cr);
16950   effect(USE labl);
16951 
16952   ins_cost(300);
16953   format %{ "j$cop,s   $labl\t# loop end" %}
16954   size(2);
16955   ins_encode %{
16956     Label* L = $labl$$label;
16957     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16958   %}
16959   ins_pipe(pipe_jcc);
16960   ins_short_branch(1);
16961 %}
16962 
16963 // Jump Direct Conditional - using unsigned comparison
16964 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16965   match(If cop cmp);
16966   effect(USE labl);
16967 
16968   ins_cost(300);
16969   format %{ "j$cop,us  $labl" %}
16970   size(2);
16971   ins_encode %{
16972     Label* L = $labl$$label;
16973     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16974   %}
16975   ins_pipe(pipe_jcc);
16976   ins_short_branch(1);
16977 %}
16978 
16979 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16980   match(If cop cmp);
16981   effect(USE labl);
16982 
16983   ins_cost(300);
16984   format %{ "j$cop,us  $labl" %}
16985   size(2);
16986   ins_encode %{
16987     Label* L = $labl$$label;
16988     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16989   %}
16990   ins_pipe(pipe_jcc);
16991   ins_short_branch(1);
16992 %}
16993 
16994 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16995   match(If cop cmp);
16996   effect(USE labl);
16997 
16998   ins_cost(300);
16999   format %{ $$template
17000     if ($cop$$cmpcode == Assembler::notEqual) {
17001       $$emit$$"jp,u,s  $labl\n\t"
17002       $$emit$$"j$cop,u,s  $labl"
17003     } else {
17004       $$emit$$"jp,u,s  done\n\t"
17005       $$emit$$"j$cop,u,s  $labl\n\t"
17006       $$emit$$"done:"
17007     }
17008   %}
17009   size(4);
17010   ins_encode %{
17011     Label* l = $labl$$label;
17012     if ($cop$$cmpcode == Assembler::notEqual) {
17013       __ jccb(Assembler::parity, *l);
17014       __ jccb(Assembler::notEqual, *l);
17015     } else if ($cop$$cmpcode == Assembler::equal) {
17016       Label done;
17017       __ jccb(Assembler::parity, done);
17018       __ jccb(Assembler::equal, *l);
17019       __ bind(done);
17020     } else {
17021        ShouldNotReachHere();
17022     }
17023   %}
17024   ins_pipe(pipe_jcc);
17025   ins_short_branch(1);
17026 %}
17027 
17028 // ============================================================================
17029 // inlined locking and unlocking
17030 
17031 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17032   match(Set cr (FastLock object box));
17033   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17034   ins_cost(300);
17035   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17036   ins_encode %{
17037     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17038   %}
17039   ins_pipe(pipe_slow);
17040 %}
17041 
17042 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17043   match(Set cr (FastUnlock object rax_reg));
17044   effect(TEMP tmp, USE_KILL rax_reg);
17045   ins_cost(300);
17046   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17047   ins_encode %{
17048     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17049   %}
17050   ins_pipe(pipe_slow);
17051 %}
17052 
17053 
17054 // ============================================================================
17055 // Safepoint Instructions
17056 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17057 %{
17058   match(SafePoint poll);
17059   effect(KILL cr, USE poll);
17060 
17061   format %{ "testl   rax, [$poll]\t"
17062             "# Safepoint: poll for GC" %}
17063   ins_cost(125);
17064   ins_encode %{
17065     __ relocate(relocInfo::poll_type);
17066     address pre_pc = __ pc();
17067     __ testl(rax, Address($poll$$Register, 0));
17068     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17069   %}
17070   ins_pipe(ialu_reg_mem);
17071 %}
17072 
17073 instruct mask_all_evexL(kReg dst, rRegL src) %{
17074   match(Set dst (MaskAll src));
17075   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17076   ins_encode %{
17077     int mask_len = Matcher::vector_length(this);
17078     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17079   %}
17080   ins_pipe( pipe_slow );
17081 %}
17082 
17083 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17084   predicate(Matcher::vector_length(n) > 32);
17085   match(Set dst (MaskAll src));
17086   effect(TEMP tmp);
17087   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17088   ins_encode %{
17089     int mask_len = Matcher::vector_length(this);
17090     __ movslq($tmp$$Register, $src$$Register);
17091     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17092   %}
17093   ins_pipe( pipe_slow );
17094 %}
17095 
17096 // ============================================================================
17097 // Procedure Call/Return Instructions
17098 // Call Java Static Instruction
17099 // Note: If this code changes, the corresponding ret_addr_offset() and
17100 //       compute_padding() functions will have to be adjusted.
17101 instruct CallStaticJavaDirect(method meth) %{
17102   match(CallStaticJava);
17103   effect(USE meth);
17104 
17105   ins_cost(300);
17106   format %{ "call,static " %}
17107   opcode(0xE8); /* E8 cd */
17108   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17109   ins_pipe(pipe_slow);
17110   ins_alignment(4);
17111 %}
17112 
17113 // Call Java Dynamic Instruction
17114 // Note: If this code changes, the corresponding ret_addr_offset() and
17115 //       compute_padding() functions will have to be adjusted.
17116 instruct CallDynamicJavaDirect(method meth)
17117 %{
17118   match(CallDynamicJava);
17119   effect(USE meth);
17120 
17121   ins_cost(300);
17122   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
17123             "call,dynamic " %}
17124   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17125   ins_pipe(pipe_slow);
17126   ins_alignment(4);
17127 %}
17128 
17129 // Call Runtime Instruction
17130 instruct CallRuntimeDirect(method meth)
17131 %{
17132   match(CallRuntime);
17133   effect(USE meth);
17134 
17135   ins_cost(300);
17136   format %{ "call,runtime " %}
17137   ins_encode(clear_avx, Java_To_Runtime(meth));
17138   ins_pipe(pipe_slow);
17139 %}
17140 
17141 // Call runtime without safepoint
17142 instruct CallLeafDirect(method meth)
17143 %{
17144   match(CallLeaf);
17145   effect(USE meth);
17146 
17147   ins_cost(300);
17148   format %{ "call_leaf,runtime " %}
17149   ins_encode(clear_avx, Java_To_Runtime(meth));
17150   ins_pipe(pipe_slow);
17151 %}
17152 
17153 // Call runtime without safepoint and with vector arguments
17154 instruct CallLeafDirectVector(method meth)
17155 %{
17156   match(CallLeafVector);
17157   effect(USE meth);
17158 
17159   ins_cost(300);
17160   format %{ "call_leaf,vector " %}
17161   ins_encode(Java_To_Runtime(meth));
17162   ins_pipe(pipe_slow);
17163 %}
17164 
17165 // Call runtime without safepoint
17166 instruct CallLeafNoFPDirect(method meth)
17167 %{
17168   match(CallLeafNoFP);
17169   effect(USE meth);
17170 
17171   ins_cost(300);
17172   format %{ "call_leaf_nofp,runtime " %}
17173   ins_encode(clear_avx, Java_To_Runtime(meth));
17174   ins_pipe(pipe_slow);
17175 %}
17176 
17177 // Return Instruction
17178 // Remove the return address & jump to it.
17179 // Notice: We always emit a nop after a ret to make sure there is room
17180 // for safepoint patching
17181 instruct Ret()
17182 %{
17183   match(Return);
17184 
17185   format %{ "ret" %}
17186   ins_encode %{
17187     __ ret(0);
17188   %}
17189   ins_pipe(pipe_jmp);
17190 %}
17191 
17192 // Tail Call; Jump from runtime stub to Java code.
17193 // Also known as an 'interprocedural jump'.
17194 // Target of jump will eventually return to caller.
17195 // TailJump below removes the return address.
17196 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17197 // emitted just above the TailCall which has reset rbp to the caller state.
17198 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17199 %{
17200   match(TailCall jump_target method_ptr);
17201 
17202   ins_cost(300);
17203   format %{ "jmp     $jump_target\t# rbx holds method" %}
17204   ins_encode %{
17205     __ jmp($jump_target$$Register);
17206   %}
17207   ins_pipe(pipe_jmp);
17208 %}
17209 
17210 // Tail Jump; remove the return address; jump to target.
17211 // TailCall above leaves the return address around.
17212 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17213 %{
17214   match(TailJump jump_target ex_oop);
17215 
17216   ins_cost(300);
17217   format %{ "popq    rdx\t# pop return address\n\t"
17218             "jmp     $jump_target" %}
17219   ins_encode %{
17220     __ popq(as_Register(RDX_enc));
17221     __ jmp($jump_target$$Register);
17222   %}
17223   ins_pipe(pipe_jmp);
17224 %}
17225 
17226 // Forward exception.
17227 instruct ForwardExceptionjmp()
17228 %{
17229   match(ForwardException);
17230 
17231   format %{ "jmp     forward_exception_stub" %}
17232   ins_encode %{
17233     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17234   %}
17235   ins_pipe(pipe_jmp);
17236 %}
17237 
17238 // Create exception oop: created by stack-crawling runtime code.
17239 // Created exception is now available to this handler, and is setup
17240 // just prior to jumping to this handler.  No code emitted.
17241 instruct CreateException(rax_RegP ex_oop)
17242 %{
17243   match(Set ex_oop (CreateEx));
17244 
17245   size(0);
17246   // use the following format syntax
17247   format %{ "# exception oop is in rax; no code emitted" %}
17248   ins_encode();
17249   ins_pipe(empty);
17250 %}
17251 
17252 // Rethrow exception:
17253 // The exception oop will come in the first argument position.
17254 // Then JUMP (not call) to the rethrow stub code.
17255 instruct RethrowException()
17256 %{
17257   match(Rethrow);
17258 
17259   // use the following format syntax
17260   format %{ "jmp     rethrow_stub" %}
17261   ins_encode %{
17262     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17263   %}
17264   ins_pipe(pipe_jmp);
17265 %}
17266 
17267 // ============================================================================
17268 // This name is KNOWN by the ADLC and cannot be changed.
17269 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17270 // for this guy.
17271 instruct tlsLoadP(r15_RegP dst) %{
17272   match(Set dst (ThreadLocal));
17273   effect(DEF dst);
17274 
17275   size(0);
17276   format %{ "# TLS is in R15" %}
17277   ins_encode( /*empty encoding*/ );
17278   ins_pipe(ialu_reg_reg);
17279 %}
17280 
17281 instruct addF_reg(regF dst, regF src) %{
17282   predicate(UseAVX == 0);
17283   match(Set dst (AddF dst src));
17284 
17285   format %{ "addss   $dst, $src" %}
17286   ins_cost(150);
17287   ins_encode %{
17288     __ addss($dst$$XMMRegister, $src$$XMMRegister);
17289   %}
17290   ins_pipe(pipe_slow);
17291 %}
17292 
17293 instruct addF_mem(regF dst, memory src) %{
17294   predicate(UseAVX == 0);
17295   match(Set dst (AddF dst (LoadF src)));
17296 
17297   format %{ "addss   $dst, $src" %}
17298   ins_cost(150);
17299   ins_encode %{
17300     __ addss($dst$$XMMRegister, $src$$Address);
17301   %}
17302   ins_pipe(pipe_slow);
17303 %}
17304 
17305 instruct addF_imm(regF dst, immF con) %{
17306   predicate(UseAVX == 0);
17307   match(Set dst (AddF dst con));
17308   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17309   ins_cost(150);
17310   ins_encode %{
17311     __ addss($dst$$XMMRegister, $constantaddress($con));
17312   %}
17313   ins_pipe(pipe_slow);
17314 %}
17315 
17316 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17317   predicate(UseAVX > 0);
17318   match(Set dst (AddF src1 src2));
17319 
17320   format %{ "vaddss  $dst, $src1, $src2" %}
17321   ins_cost(150);
17322   ins_encode %{
17323     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17324   %}
17325   ins_pipe(pipe_slow);
17326 %}
17327 
17328 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17329   predicate(UseAVX > 0);
17330   match(Set dst (AddF src1 (LoadF src2)));
17331 
17332   format %{ "vaddss  $dst, $src1, $src2" %}
17333   ins_cost(150);
17334   ins_encode %{
17335     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17336   %}
17337   ins_pipe(pipe_slow);
17338 %}
17339 
17340 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17341   predicate(UseAVX > 0);
17342   match(Set dst (AddF src con));
17343 
17344   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17345   ins_cost(150);
17346   ins_encode %{
17347     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17348   %}
17349   ins_pipe(pipe_slow);
17350 %}
17351 
17352 instruct addD_reg(regD dst, regD src) %{
17353   predicate(UseAVX == 0);
17354   match(Set dst (AddD dst src));
17355 
17356   format %{ "addsd   $dst, $src" %}
17357   ins_cost(150);
17358   ins_encode %{
17359     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17360   %}
17361   ins_pipe(pipe_slow);
17362 %}
17363 
17364 instruct addD_mem(regD dst, memory src) %{
17365   predicate(UseAVX == 0);
17366   match(Set dst (AddD dst (LoadD src)));
17367 
17368   format %{ "addsd   $dst, $src" %}
17369   ins_cost(150);
17370   ins_encode %{
17371     __ addsd($dst$$XMMRegister, $src$$Address);
17372   %}
17373   ins_pipe(pipe_slow);
17374 %}
17375 
17376 instruct addD_imm(regD dst, immD con) %{
17377   predicate(UseAVX == 0);
17378   match(Set dst (AddD dst con));
17379   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17380   ins_cost(150);
17381   ins_encode %{
17382     __ addsd($dst$$XMMRegister, $constantaddress($con));
17383   %}
17384   ins_pipe(pipe_slow);
17385 %}
17386 
17387 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17388   predicate(UseAVX > 0);
17389   match(Set dst (AddD src1 src2));
17390 
17391   format %{ "vaddsd  $dst, $src1, $src2" %}
17392   ins_cost(150);
17393   ins_encode %{
17394     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17395   %}
17396   ins_pipe(pipe_slow);
17397 %}
17398 
17399 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17400   predicate(UseAVX > 0);
17401   match(Set dst (AddD src1 (LoadD src2)));
17402 
17403   format %{ "vaddsd  $dst, $src1, $src2" %}
17404   ins_cost(150);
17405   ins_encode %{
17406     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17407   %}
17408   ins_pipe(pipe_slow);
17409 %}
17410 
17411 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17412   predicate(UseAVX > 0);
17413   match(Set dst (AddD src con));
17414 
17415   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17416   ins_cost(150);
17417   ins_encode %{
17418     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17419   %}
17420   ins_pipe(pipe_slow);
17421 %}
17422 
17423 instruct subF_reg(regF dst, regF src) %{
17424   predicate(UseAVX == 0);
17425   match(Set dst (SubF dst src));
17426 
17427   format %{ "subss   $dst, $src" %}
17428   ins_cost(150);
17429   ins_encode %{
17430     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17431   %}
17432   ins_pipe(pipe_slow);
17433 %}
17434 
17435 instruct subF_mem(regF dst, memory src) %{
17436   predicate(UseAVX == 0);
17437   match(Set dst (SubF dst (LoadF src)));
17438 
17439   format %{ "subss   $dst, $src" %}
17440   ins_cost(150);
17441   ins_encode %{
17442     __ subss($dst$$XMMRegister, $src$$Address);
17443   %}
17444   ins_pipe(pipe_slow);
17445 %}
17446 
17447 instruct subF_imm(regF dst, immF con) %{
17448   predicate(UseAVX == 0);
17449   match(Set dst (SubF dst con));
17450   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17451   ins_cost(150);
17452   ins_encode %{
17453     __ subss($dst$$XMMRegister, $constantaddress($con));
17454   %}
17455   ins_pipe(pipe_slow);
17456 %}
17457 
17458 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17459   predicate(UseAVX > 0);
17460   match(Set dst (SubF src1 src2));
17461 
17462   format %{ "vsubss  $dst, $src1, $src2" %}
17463   ins_cost(150);
17464   ins_encode %{
17465     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17466   %}
17467   ins_pipe(pipe_slow);
17468 %}
17469 
17470 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17471   predicate(UseAVX > 0);
17472   match(Set dst (SubF src1 (LoadF src2)));
17473 
17474   format %{ "vsubss  $dst, $src1, $src2" %}
17475   ins_cost(150);
17476   ins_encode %{
17477     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17478   %}
17479   ins_pipe(pipe_slow);
17480 %}
17481 
17482 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17483   predicate(UseAVX > 0);
17484   match(Set dst (SubF src con));
17485 
17486   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17487   ins_cost(150);
17488   ins_encode %{
17489     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17490   %}
17491   ins_pipe(pipe_slow);
17492 %}
17493 
17494 instruct subD_reg(regD dst, regD src) %{
17495   predicate(UseAVX == 0);
17496   match(Set dst (SubD dst src));
17497 
17498   format %{ "subsd   $dst, $src" %}
17499   ins_cost(150);
17500   ins_encode %{
17501     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17502   %}
17503   ins_pipe(pipe_slow);
17504 %}
17505 
17506 instruct subD_mem(regD dst, memory src) %{
17507   predicate(UseAVX == 0);
17508   match(Set dst (SubD dst (LoadD src)));
17509 
17510   format %{ "subsd   $dst, $src" %}
17511   ins_cost(150);
17512   ins_encode %{
17513     __ subsd($dst$$XMMRegister, $src$$Address);
17514   %}
17515   ins_pipe(pipe_slow);
17516 %}
17517 
17518 instruct subD_imm(regD dst, immD con) %{
17519   predicate(UseAVX == 0);
17520   match(Set dst (SubD dst con));
17521   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17522   ins_cost(150);
17523   ins_encode %{
17524     __ subsd($dst$$XMMRegister, $constantaddress($con));
17525   %}
17526   ins_pipe(pipe_slow);
17527 %}
17528 
17529 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17530   predicate(UseAVX > 0);
17531   match(Set dst (SubD src1 src2));
17532 
17533   format %{ "vsubsd  $dst, $src1, $src2" %}
17534   ins_cost(150);
17535   ins_encode %{
17536     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17537   %}
17538   ins_pipe(pipe_slow);
17539 %}
17540 
17541 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17542   predicate(UseAVX > 0);
17543   match(Set dst (SubD src1 (LoadD src2)));
17544 
17545   format %{ "vsubsd  $dst, $src1, $src2" %}
17546   ins_cost(150);
17547   ins_encode %{
17548     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17549   %}
17550   ins_pipe(pipe_slow);
17551 %}
17552 
17553 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17554   predicate(UseAVX > 0);
17555   match(Set dst (SubD src con));
17556 
17557   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17558   ins_cost(150);
17559   ins_encode %{
17560     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17561   %}
17562   ins_pipe(pipe_slow);
17563 %}
17564 
17565 instruct mulF_reg(regF dst, regF src) %{
17566   predicate(UseAVX == 0);
17567   match(Set dst (MulF dst src));
17568 
17569   format %{ "mulss   $dst, $src" %}
17570   ins_cost(150);
17571   ins_encode %{
17572     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17573   %}
17574   ins_pipe(pipe_slow);
17575 %}
17576 
17577 instruct mulF_mem(regF dst, memory src) %{
17578   predicate(UseAVX == 0);
17579   match(Set dst (MulF dst (LoadF src)));
17580 
17581   format %{ "mulss   $dst, $src" %}
17582   ins_cost(150);
17583   ins_encode %{
17584     __ mulss($dst$$XMMRegister, $src$$Address);
17585   %}
17586   ins_pipe(pipe_slow);
17587 %}
17588 
17589 instruct mulF_imm(regF dst, immF con) %{
17590   predicate(UseAVX == 0);
17591   match(Set dst (MulF dst con));
17592   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17593   ins_cost(150);
17594   ins_encode %{
17595     __ mulss($dst$$XMMRegister, $constantaddress($con));
17596   %}
17597   ins_pipe(pipe_slow);
17598 %}
17599 
17600 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17601   predicate(UseAVX > 0);
17602   match(Set dst (MulF src1 src2));
17603 
17604   format %{ "vmulss  $dst, $src1, $src2" %}
17605   ins_cost(150);
17606   ins_encode %{
17607     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17608   %}
17609   ins_pipe(pipe_slow);
17610 %}
17611 
17612 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17613   predicate(UseAVX > 0);
17614   match(Set dst (MulF src1 (LoadF src2)));
17615 
17616   format %{ "vmulss  $dst, $src1, $src2" %}
17617   ins_cost(150);
17618   ins_encode %{
17619     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17620   %}
17621   ins_pipe(pipe_slow);
17622 %}
17623 
17624 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17625   predicate(UseAVX > 0);
17626   match(Set dst (MulF src con));
17627 
17628   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17629   ins_cost(150);
17630   ins_encode %{
17631     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17632   %}
17633   ins_pipe(pipe_slow);
17634 %}
17635 
17636 instruct mulD_reg(regD dst, regD src) %{
17637   predicate(UseAVX == 0);
17638   match(Set dst (MulD dst src));
17639 
17640   format %{ "mulsd   $dst, $src" %}
17641   ins_cost(150);
17642   ins_encode %{
17643     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17644   %}
17645   ins_pipe(pipe_slow);
17646 %}
17647 
17648 instruct mulD_mem(regD dst, memory src) %{
17649   predicate(UseAVX == 0);
17650   match(Set dst (MulD dst (LoadD src)));
17651 
17652   format %{ "mulsd   $dst, $src" %}
17653   ins_cost(150);
17654   ins_encode %{
17655     __ mulsd($dst$$XMMRegister, $src$$Address);
17656   %}
17657   ins_pipe(pipe_slow);
17658 %}
17659 
17660 instruct mulD_imm(regD dst, immD con) %{
17661   predicate(UseAVX == 0);
17662   match(Set dst (MulD dst con));
17663   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17664   ins_cost(150);
17665   ins_encode %{
17666     __ mulsd($dst$$XMMRegister, $constantaddress($con));
17667   %}
17668   ins_pipe(pipe_slow);
17669 %}
17670 
17671 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17672   predicate(UseAVX > 0);
17673   match(Set dst (MulD src1 src2));
17674 
17675   format %{ "vmulsd  $dst, $src1, $src2" %}
17676   ins_cost(150);
17677   ins_encode %{
17678     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17679   %}
17680   ins_pipe(pipe_slow);
17681 %}
17682 
17683 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17684   predicate(UseAVX > 0);
17685   match(Set dst (MulD src1 (LoadD src2)));
17686 
17687   format %{ "vmulsd  $dst, $src1, $src2" %}
17688   ins_cost(150);
17689   ins_encode %{
17690     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17691   %}
17692   ins_pipe(pipe_slow);
17693 %}
17694 
17695 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17696   predicate(UseAVX > 0);
17697   match(Set dst (MulD src con));
17698 
17699   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17700   ins_cost(150);
17701   ins_encode %{
17702     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17703   %}
17704   ins_pipe(pipe_slow);
17705 %}
17706 
17707 instruct divF_reg(regF dst, regF src) %{
17708   predicate(UseAVX == 0);
17709   match(Set dst (DivF dst src));
17710 
17711   format %{ "divss   $dst, $src" %}
17712   ins_cost(150);
17713   ins_encode %{
17714     __ divss($dst$$XMMRegister, $src$$XMMRegister);
17715   %}
17716   ins_pipe(pipe_slow);
17717 %}
17718 
17719 instruct divF_mem(regF dst, memory src) %{
17720   predicate(UseAVX == 0);
17721   match(Set dst (DivF dst (LoadF src)));
17722 
17723   format %{ "divss   $dst, $src" %}
17724   ins_cost(150);
17725   ins_encode %{
17726     __ divss($dst$$XMMRegister, $src$$Address);
17727   %}
17728   ins_pipe(pipe_slow);
17729 %}
17730 
17731 instruct divF_imm(regF dst, immF con) %{
17732   predicate(UseAVX == 0);
17733   match(Set dst (DivF dst con));
17734   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17735   ins_cost(150);
17736   ins_encode %{
17737     __ divss($dst$$XMMRegister, $constantaddress($con));
17738   %}
17739   ins_pipe(pipe_slow);
17740 %}
17741 
17742 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17743   predicate(UseAVX > 0);
17744   match(Set dst (DivF src1 src2));
17745 
17746   format %{ "vdivss  $dst, $src1, $src2" %}
17747   ins_cost(150);
17748   ins_encode %{
17749     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17750   %}
17751   ins_pipe(pipe_slow);
17752 %}
17753 
17754 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17755   predicate(UseAVX > 0);
17756   match(Set dst (DivF src1 (LoadF src2)));
17757 
17758   format %{ "vdivss  $dst, $src1, $src2" %}
17759   ins_cost(150);
17760   ins_encode %{
17761     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17762   %}
17763   ins_pipe(pipe_slow);
17764 %}
17765 
17766 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17767   predicate(UseAVX > 0);
17768   match(Set dst (DivF src con));
17769 
17770   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17771   ins_cost(150);
17772   ins_encode %{
17773     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17774   %}
17775   ins_pipe(pipe_slow);
17776 %}
17777 
17778 instruct divD_reg(regD dst, regD src) %{
17779   predicate(UseAVX == 0);
17780   match(Set dst (DivD dst src));
17781 
17782   format %{ "divsd   $dst, $src" %}
17783   ins_cost(150);
17784   ins_encode %{
17785     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17786   %}
17787   ins_pipe(pipe_slow);
17788 %}
17789 
17790 instruct divD_mem(regD dst, memory src) %{
17791   predicate(UseAVX == 0);
17792   match(Set dst (DivD dst (LoadD src)));
17793 
17794   format %{ "divsd   $dst, $src" %}
17795   ins_cost(150);
17796   ins_encode %{
17797     __ divsd($dst$$XMMRegister, $src$$Address);
17798   %}
17799   ins_pipe(pipe_slow);
17800 %}
17801 
17802 instruct divD_imm(regD dst, immD con) %{
17803   predicate(UseAVX == 0);
17804   match(Set dst (DivD dst con));
17805   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17806   ins_cost(150);
17807   ins_encode %{
17808     __ divsd($dst$$XMMRegister, $constantaddress($con));
17809   %}
17810   ins_pipe(pipe_slow);
17811 %}
17812 
17813 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17814   predicate(UseAVX > 0);
17815   match(Set dst (DivD src1 src2));
17816 
17817   format %{ "vdivsd  $dst, $src1, $src2" %}
17818   ins_cost(150);
17819   ins_encode %{
17820     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17821   %}
17822   ins_pipe(pipe_slow);
17823 %}
17824 
17825 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17826   predicate(UseAVX > 0);
17827   match(Set dst (DivD src1 (LoadD src2)));
17828 
17829   format %{ "vdivsd  $dst, $src1, $src2" %}
17830   ins_cost(150);
17831   ins_encode %{
17832     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17833   %}
17834   ins_pipe(pipe_slow);
17835 %}
17836 
17837 instruct divD_reg_imm(regD dst, regD src, immD con) %{
17838   predicate(UseAVX > 0);
17839   match(Set dst (DivD src con));
17840 
17841   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17842   ins_cost(150);
17843   ins_encode %{
17844     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17845   %}
17846   ins_pipe(pipe_slow);
17847 %}
17848 
17849 instruct absF_reg(regF dst) %{
17850   predicate(UseAVX == 0);
17851   match(Set dst (AbsF dst));
17852   ins_cost(150);
17853   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
17854   ins_encode %{
17855     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
17856   %}
17857   ins_pipe(pipe_slow);
17858 %}
17859 
17860 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
17861   predicate(UseAVX > 0);
17862   match(Set dst (AbsF src));
17863   ins_cost(150);
17864   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
17865   ins_encode %{
17866     int vlen_enc = Assembler::AVX_128bit;
17867     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
17868               ExternalAddress(float_signmask()), vlen_enc);
17869   %}
17870   ins_pipe(pipe_slow);
17871 %}
17872 
17873 instruct absD_reg(regD dst) %{
17874   predicate(UseAVX == 0);
17875   match(Set dst (AbsD dst));
17876   ins_cost(150);
17877   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
17878             "# abs double by sign masking" %}
17879   ins_encode %{
17880     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
17881   %}
17882   ins_pipe(pipe_slow);
17883 %}
17884 
17885 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
17886   predicate(UseAVX > 0);
17887   match(Set dst (AbsD src));
17888   ins_cost(150);
17889   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
17890             "# abs double by sign masking" %}
17891   ins_encode %{
17892     int vlen_enc = Assembler::AVX_128bit;
17893     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
17894               ExternalAddress(double_signmask()), vlen_enc);
17895   %}
17896   ins_pipe(pipe_slow);
17897 %}
17898 
17899 instruct negF_reg(regF dst) %{
17900   predicate(UseAVX == 0);
17901   match(Set dst (NegF dst));
17902   ins_cost(150);
17903   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
17904   ins_encode %{
17905     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
17906   %}
17907   ins_pipe(pipe_slow);
17908 %}
17909 
17910 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
17911   predicate(UseAVX > 0);
17912   match(Set dst (NegF src));
17913   ins_cost(150);
17914   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
17915   ins_encode %{
17916     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
17917                  ExternalAddress(float_signflip()));
17918   %}
17919   ins_pipe(pipe_slow);
17920 %}
17921 
17922 instruct negD_reg(regD dst) %{
17923   predicate(UseAVX == 0);
17924   match(Set dst (NegD dst));
17925   ins_cost(150);
17926   format %{ "xorpd   $dst, [0x8000000000000000]\t"
17927             "# neg double by sign flipping" %}
17928   ins_encode %{
17929     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
17930   %}
17931   ins_pipe(pipe_slow);
17932 %}
17933 
17934 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
17935   predicate(UseAVX > 0);
17936   match(Set dst (NegD src));
17937   ins_cost(150);
17938   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
17939             "# neg double by sign flipping" %}
17940   ins_encode %{
17941     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
17942                  ExternalAddress(double_signflip()));
17943   %}
17944   ins_pipe(pipe_slow);
17945 %}
17946 
17947 // sqrtss instruction needs destination register to be pre initialized for best performance
17948 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17949 instruct sqrtF_reg(regF dst) %{
17950   match(Set dst (SqrtF dst));
17951   format %{ "sqrtss  $dst, $dst" %}
17952   ins_encode %{
17953     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
17954   %}
17955   ins_pipe(pipe_slow);
17956 %}
17957 
17958 // sqrtsd instruction needs destination register to be pre initialized for best performance
17959 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17960 instruct sqrtD_reg(regD dst) %{
17961   match(Set dst (SqrtD dst));
17962   format %{ "sqrtsd  $dst, $dst" %}
17963   ins_encode %{
17964     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
17965   %}
17966   ins_pipe(pipe_slow);
17967 %}
17968 
17969 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
17970   effect(TEMP tmp);
17971   match(Set dst (ConvF2HF src));
17972   ins_cost(125);
17973   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
17974   ins_encode %{
17975     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
17976   %}
17977   ins_pipe( pipe_slow );
17978 %}
17979 
17980 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
17981   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
17982   effect(TEMP ktmp, TEMP rtmp);
17983   match(Set mem (StoreC mem (ConvF2HF src)));
17984   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
17985   ins_encode %{
17986     __ movl($rtmp$$Register, 0x1);
17987     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
17988     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
17989   %}
17990   ins_pipe( pipe_slow );
17991 %}
17992 
17993 instruct vconvF2HF(vec dst, vec src) %{
17994   match(Set dst (VectorCastF2HF src));
17995   format %{ "vector_conv_F2HF $dst $src" %}
17996   ins_encode %{
17997     int vlen_enc = vector_length_encoding(this, $src);
17998     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
17999   %}
18000   ins_pipe( pipe_slow );
18001 %}
18002 
18003 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18004   predicate(n->as_StoreVector()->memory_size() >= 16);
18005   match(Set mem (StoreVector mem (VectorCastF2HF src)));
18006   format %{ "vcvtps2ph $mem,$src" %}
18007   ins_encode %{
18008     int vlen_enc = vector_length_encoding(this, $src);
18009     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18010   %}
18011   ins_pipe( pipe_slow );
18012 %}
18013 
18014 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18015   match(Set dst (ConvHF2F src));
18016   format %{ "vcvtph2ps $dst,$src" %}
18017   ins_encode %{
18018     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18019   %}
18020   ins_pipe( pipe_slow );
18021 %}
18022 
18023 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18024   match(Set dst (VectorCastHF2F (LoadVector mem)));
18025   format %{ "vcvtph2ps $dst,$mem" %}
18026   ins_encode %{
18027     int vlen_enc = vector_length_encoding(this);
18028     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18029   %}
18030   ins_pipe( pipe_slow );
18031 %}
18032 
18033 instruct vconvHF2F(vec dst, vec src) %{
18034   match(Set dst (VectorCastHF2F src));
18035   ins_cost(125);
18036   format %{ "vector_conv_HF2F $dst,$src" %}
18037   ins_encode %{
18038     int vlen_enc = vector_length_encoding(this);
18039     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18040   %}
18041   ins_pipe( pipe_slow );
18042 %}
18043 
18044 // ---------------------------------------- VectorReinterpret ------------------------------------
18045 instruct reinterpret_mask(kReg dst) %{
18046   predicate(n->bottom_type()->isa_vectmask() &&
18047             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18048   match(Set dst (VectorReinterpret dst));
18049   ins_cost(125);
18050   format %{ "vector_reinterpret $dst\t!" %}
18051   ins_encode %{
18052     // empty
18053   %}
18054   ins_pipe( pipe_slow );
18055 %}
18056 
18057 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18058   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18059             n->bottom_type()->isa_vectmask() &&
18060             n->in(1)->bottom_type()->isa_vectmask() &&
18061             n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18062             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18063   match(Set dst (VectorReinterpret src));
18064   effect(TEMP xtmp);
18065   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18066   ins_encode %{
18067      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18068      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18069      assert(src_sz == dst_sz , "src and dst size mismatch");
18070      int vlen_enc = vector_length_encoding(src_sz);
18071      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18072      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18073   %}
18074   ins_pipe( pipe_slow );
18075 %}
18076 
18077 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18078   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18079             n->bottom_type()->isa_vectmask() &&
18080             n->in(1)->bottom_type()->isa_vectmask() &&
18081             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18082              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18083             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18084   match(Set dst (VectorReinterpret src));
18085   effect(TEMP xtmp);
18086   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18087   ins_encode %{
18088      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18089      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18090      assert(src_sz == dst_sz , "src and dst size mismatch");
18091      int vlen_enc = vector_length_encoding(src_sz);
18092      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18093      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18094   %}
18095   ins_pipe( pipe_slow );
18096 %}
18097 
18098 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18099   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18100             n->bottom_type()->isa_vectmask() &&
18101             n->in(1)->bottom_type()->isa_vectmask() &&
18102             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18103              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18104             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18105   match(Set dst (VectorReinterpret src));
18106   effect(TEMP xtmp);
18107   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18108   ins_encode %{
18109      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18110      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18111      assert(src_sz == dst_sz , "src and dst size mismatch");
18112      int vlen_enc = vector_length_encoding(src_sz);
18113      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18114      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18115   %}
18116   ins_pipe( pipe_slow );
18117 %}
18118 
18119 instruct reinterpret(vec dst) %{
18120   predicate(!n->bottom_type()->isa_vectmask() &&
18121             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18122   match(Set dst (VectorReinterpret dst));
18123   ins_cost(125);
18124   format %{ "vector_reinterpret $dst\t!" %}
18125   ins_encode %{
18126     // empty
18127   %}
18128   ins_pipe( pipe_slow );
18129 %}
18130 
18131 instruct reinterpret_expand(vec dst, vec src) %{
18132   predicate(UseAVX == 0 &&
18133             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18134   match(Set dst (VectorReinterpret src));
18135   ins_cost(125);
18136   effect(TEMP dst);
18137   format %{ "vector_reinterpret_expand $dst,$src" %}
18138   ins_encode %{
18139     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
18140     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
18141 
18142     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18143     if (src_vlen_in_bytes == 4) {
18144       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18145     } else {
18146       assert(src_vlen_in_bytes == 8, "");
18147       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18148     }
18149     __ pand($dst$$XMMRegister, $src$$XMMRegister);
18150   %}
18151   ins_pipe( pipe_slow );
18152 %}
18153 
18154 instruct vreinterpret_expand4(legVec dst, vec src) %{
18155   predicate(UseAVX > 0 &&
18156             !n->bottom_type()->isa_vectmask() &&
18157             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18158             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18159   match(Set dst (VectorReinterpret src));
18160   ins_cost(125);
18161   format %{ "vector_reinterpret_expand $dst,$src" %}
18162   ins_encode %{
18163     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18164   %}
18165   ins_pipe( pipe_slow );
18166 %}
18167 
18168 
18169 instruct vreinterpret_expand(legVec dst, vec src) %{
18170   predicate(UseAVX > 0 &&
18171             !n->bottom_type()->isa_vectmask() &&
18172             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18173             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18174   match(Set dst (VectorReinterpret src));
18175   ins_cost(125);
18176   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18177   ins_encode %{
18178     switch (Matcher::vector_length_in_bytes(this, $src)) {
18179       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18180       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18181       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18182       default: ShouldNotReachHere();
18183     }
18184   %}
18185   ins_pipe( pipe_slow );
18186 %}
18187 
18188 instruct reinterpret_shrink(vec dst, legVec src) %{
18189   predicate(!n->bottom_type()->isa_vectmask() &&
18190             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18191   match(Set dst (VectorReinterpret src));
18192   ins_cost(125);
18193   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18194   ins_encode %{
18195     switch (Matcher::vector_length_in_bytes(this)) {
18196       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18197       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18198       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18199       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18200       default: ShouldNotReachHere();
18201     }
18202   %}
18203   ins_pipe( pipe_slow );
18204 %}
18205 
18206 // ----------------------------------------------------------------------------------------------------
18207 
18208 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18209   match(Set dst (RoundDoubleMode src rmode));
18210   format %{ "roundsd $dst,$src" %}
18211   ins_cost(150);
18212   ins_encode %{
18213     assert(UseSSE >= 4, "required");
18214     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18215       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18216     }
18217     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18218   %}
18219   ins_pipe(pipe_slow);
18220 %}
18221 
18222 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18223   match(Set dst (RoundDoubleMode con rmode));
18224   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18225   ins_cost(150);
18226   ins_encode %{
18227     assert(UseSSE >= 4, "required");
18228     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18229   %}
18230   ins_pipe(pipe_slow);
18231 %}
18232 
18233 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18234   predicate(Matcher::vector_length(n) < 8);
18235   match(Set dst (RoundDoubleModeV src rmode));
18236   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18237   ins_encode %{
18238     assert(UseAVX > 0, "required");
18239     int vlen_enc = vector_length_encoding(this);
18240     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18241   %}
18242   ins_pipe( pipe_slow );
18243 %}
18244 
18245 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18246   predicate(Matcher::vector_length(n) == 8);
18247   match(Set dst (RoundDoubleModeV src rmode));
18248   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18249   ins_encode %{
18250     assert(UseAVX > 2, "required");
18251     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18252   %}
18253   ins_pipe( pipe_slow );
18254 %}
18255 
18256 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18257   predicate(Matcher::vector_length(n) < 8);
18258   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18259   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18260   ins_encode %{
18261     assert(UseAVX > 0, "required");
18262     int vlen_enc = vector_length_encoding(this);
18263     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18264   %}
18265   ins_pipe( pipe_slow );
18266 %}
18267 
18268 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18269   predicate(Matcher::vector_length(n) == 8);
18270   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18271   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18272   ins_encode %{
18273     assert(UseAVX > 2, "required");
18274     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18275   %}
18276   ins_pipe( pipe_slow );
18277 %}
18278 
18279 instruct onspinwait() %{
18280   match(OnSpinWait);
18281   ins_cost(200);
18282 
18283   format %{
18284     $$template
18285     $$emit$$"pause\t! membar_onspinwait"
18286   %}
18287   ins_encode %{
18288     __ pause();
18289   %}
18290   ins_pipe(pipe_slow);
18291 %}
18292 
18293 // a * b + c
18294 instruct fmaD_reg(regD a, regD b, regD c) %{
18295   match(Set c (FmaD  c (Binary a b)));
18296   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18297   ins_cost(150);
18298   ins_encode %{
18299     assert(UseFMA, "Needs FMA instructions support.");
18300     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18301   %}
18302   ins_pipe( pipe_slow );
18303 %}
18304 
18305 // a * b + c
18306 instruct fmaF_reg(regF a, regF b, regF c) %{
18307   match(Set c (FmaF  c (Binary a b)));
18308   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18309   ins_cost(150);
18310   ins_encode %{
18311     assert(UseFMA, "Needs FMA instructions support.");
18312     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18313   %}
18314   ins_pipe( pipe_slow );
18315 %}
18316 
18317 // ====================VECTOR INSTRUCTIONS=====================================
18318 
18319 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18320 instruct MoveVec2Leg(legVec dst, vec src) %{
18321   match(Set dst src);
18322   format %{ "" %}
18323   ins_encode %{
18324     ShouldNotReachHere();
18325   %}
18326   ins_pipe( fpu_reg_reg );
18327 %}
18328 
18329 instruct MoveLeg2Vec(vec dst, legVec src) %{
18330   match(Set dst src);
18331   format %{ "" %}
18332   ins_encode %{
18333     ShouldNotReachHere();
18334   %}
18335   ins_pipe( fpu_reg_reg );
18336 %}
18337 
18338 // ============================================================================
18339 
18340 // Load vectors generic operand pattern
18341 instruct loadV(vec dst, memory mem) %{
18342   match(Set dst (LoadVector mem));
18343   ins_cost(125);
18344   format %{ "load_vector $dst,$mem" %}
18345   ins_encode %{
18346     BasicType bt = Matcher::vector_element_basic_type(this);
18347     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18348   %}
18349   ins_pipe( pipe_slow );
18350 %}
18351 
18352 // Store vectors generic operand pattern.
18353 instruct storeV(memory mem, vec src) %{
18354   match(Set mem (StoreVector mem src));
18355   ins_cost(145);
18356   format %{ "store_vector $mem,$src\n\t" %}
18357   ins_encode %{
18358     switch (Matcher::vector_length_in_bytes(this, $src)) {
18359       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
18360       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
18361       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
18362       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
18363       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18364       default: ShouldNotReachHere();
18365     }
18366   %}
18367   ins_pipe( pipe_slow );
18368 %}
18369 
18370 // ---------------------------------------- Gather ------------------------------------
18371 
18372 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18373 
18374 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18375   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18376             Matcher::vector_length_in_bytes(n) <= 32);
18377   match(Set dst (LoadVectorGather mem idx));
18378   effect(TEMP dst, TEMP tmp, TEMP mask);
18379   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18380   ins_encode %{
18381     int vlen_enc = vector_length_encoding(this);
18382     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18383     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18384     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18385     __ lea($tmp$$Register, $mem$$Address);
18386     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18387   %}
18388   ins_pipe( pipe_slow );
18389 %}
18390 
18391 
18392 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18393   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18394             !is_subword_type(Matcher::vector_element_basic_type(n)));
18395   match(Set dst (LoadVectorGather mem idx));
18396   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18397   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18398   ins_encode %{
18399     int vlen_enc = vector_length_encoding(this);
18400     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18401     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18402     __ lea($tmp$$Register, $mem$$Address);
18403     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18404   %}
18405   ins_pipe( pipe_slow );
18406 %}
18407 
18408 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18409   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18410             !is_subword_type(Matcher::vector_element_basic_type(n)));
18411   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18412   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18413   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18414   ins_encode %{
18415     assert(UseAVX > 2, "sanity");
18416     int vlen_enc = vector_length_encoding(this);
18417     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18418     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18419     // Note: Since gather instruction partially updates the opmask register used
18420     // for predication hense moving mask operand to a temporary.
18421     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18422     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18423     __ lea($tmp$$Register, $mem$$Address);
18424     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18425   %}
18426   ins_pipe( pipe_slow );
18427 %}
18428 
18429 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18430   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18431   match(Set dst (LoadVectorGather mem idx_base));
18432   effect(TEMP tmp, TEMP rtmp);
18433   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18434   ins_encode %{
18435     int vlen_enc = vector_length_encoding(this);
18436     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18437     __ lea($tmp$$Register, $mem$$Address);
18438     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18439   %}
18440   ins_pipe( pipe_slow );
18441 %}
18442 
18443 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18444                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18445   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18446   match(Set dst (LoadVectorGather mem idx_base));
18447   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18448   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18449   ins_encode %{
18450     int vlen_enc = vector_length_encoding(this);
18451     int vector_len = Matcher::vector_length(this);
18452     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18453     __ lea($tmp$$Register, $mem$$Address);
18454     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18455     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18456                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18457   %}
18458   ins_pipe( pipe_slow );
18459 %}
18460 
18461 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18462   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18463   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18464   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18465   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18466   ins_encode %{
18467     int vlen_enc = vector_length_encoding(this);
18468     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18469     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18470     __ lea($tmp$$Register, $mem$$Address);
18471     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18472     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18473   %}
18474   ins_pipe( pipe_slow );
18475 %}
18476 
18477 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18478                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18479   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18480   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18481   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18482   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18483   ins_encode %{
18484     int vlen_enc = vector_length_encoding(this);
18485     int vector_len = Matcher::vector_length(this);
18486     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18487     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18488     __ lea($tmp$$Register, $mem$$Address);
18489     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18490     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18491     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18492                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18493   %}
18494   ins_pipe( pipe_slow );
18495 %}
18496 
18497 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18498   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18499   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18500   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18501   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18502   ins_encode %{
18503     int vlen_enc = vector_length_encoding(this);
18504     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18505     __ lea($tmp$$Register, $mem$$Address);
18506     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18507     if (elem_bt == T_SHORT) {
18508       __ movl($mask_idx$$Register, 0x55555555);
18509       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18510     }
18511     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18512     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18513   %}
18514   ins_pipe( pipe_slow );
18515 %}
18516 
18517 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18518                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18519   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18520   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18521   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18522   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18523   ins_encode %{
18524     int vlen_enc = vector_length_encoding(this);
18525     int vector_len = Matcher::vector_length(this);
18526     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18527     __ lea($tmp$$Register, $mem$$Address);
18528     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18529     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18530     if (elem_bt == T_SHORT) {
18531       __ movl($mask_idx$$Register, 0x55555555);
18532       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18533     }
18534     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18535     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18536                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18537   %}
18538   ins_pipe( pipe_slow );
18539 %}
18540 
18541 // ====================Scatter=======================================
18542 
18543 // Scatter INT, LONG, FLOAT, DOUBLE
18544 
18545 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18546   predicate(UseAVX > 2);
18547   match(Set mem (StoreVectorScatter mem (Binary src idx)));
18548   effect(TEMP tmp, TEMP ktmp);
18549   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18550   ins_encode %{
18551     int vlen_enc = vector_length_encoding(this, $src);
18552     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18553 
18554     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18555     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18556 
18557     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18558     __ lea($tmp$$Register, $mem$$Address);
18559     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18560   %}
18561   ins_pipe( pipe_slow );
18562 %}
18563 
18564 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18565   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18566   effect(TEMP tmp, TEMP ktmp);
18567   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18568   ins_encode %{
18569     int vlen_enc = vector_length_encoding(this, $src);
18570     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18571     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18572     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18573     // Note: Since scatter instruction partially updates the opmask register used
18574     // for predication hense moving mask operand to a temporary.
18575     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18576     __ lea($tmp$$Register, $mem$$Address);
18577     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18578   %}
18579   ins_pipe( pipe_slow );
18580 %}
18581 
18582 // ====================REPLICATE=======================================
18583 
18584 // Replicate byte scalar to be vector
18585 instruct vReplB_reg(vec dst, rRegI src) %{
18586   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18587   match(Set dst (Replicate src));
18588   format %{ "replicateB $dst,$src" %}
18589   ins_encode %{
18590     uint vlen = Matcher::vector_length(this);
18591     if (UseAVX >= 2) {
18592       int vlen_enc = vector_length_encoding(this);
18593       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18594         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18595         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18596       } else {
18597         __ movdl($dst$$XMMRegister, $src$$Register);
18598         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18599       }
18600     } else {
18601        assert(UseAVX < 2, "");
18602       __ movdl($dst$$XMMRegister, $src$$Register);
18603       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18604       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18605       if (vlen >= 16) {
18606         assert(vlen == 16, "");
18607         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18608       }
18609     }
18610   %}
18611   ins_pipe( pipe_slow );
18612 %}
18613 
18614 instruct ReplB_mem(vec dst, memory mem) %{
18615   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18616   match(Set dst (Replicate (LoadB mem)));
18617   format %{ "replicateB $dst,$mem" %}
18618   ins_encode %{
18619     int vlen_enc = vector_length_encoding(this);
18620     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18621   %}
18622   ins_pipe( pipe_slow );
18623 %}
18624 
18625 // ====================ReplicateS=======================================
18626 
18627 instruct vReplS_reg(vec dst, rRegI src) %{
18628   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18629   match(Set dst (Replicate src));
18630   format %{ "replicateS $dst,$src" %}
18631   ins_encode %{
18632     uint vlen = Matcher::vector_length(this);
18633     int vlen_enc = vector_length_encoding(this);
18634     if (UseAVX >= 2) {
18635       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18636         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18637         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18638       } else {
18639         __ movdl($dst$$XMMRegister, $src$$Register);
18640         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18641       }
18642     } else {
18643       assert(UseAVX < 2, "");
18644       __ movdl($dst$$XMMRegister, $src$$Register);
18645       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18646       if (vlen >= 8) {
18647         assert(vlen == 8, "");
18648         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18649       }
18650     }
18651   %}
18652   ins_pipe( pipe_slow );
18653 %}
18654 
18655 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18656   match(Set dst (Replicate con));
18657   effect(TEMP rtmp);
18658   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18659   ins_encode %{
18660     int vlen_enc = vector_length_encoding(this);
18661     BasicType bt = Matcher::vector_element_basic_type(this);
18662     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18663     __ movl($rtmp$$Register, $con$$constant);
18664     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18665   %}
18666   ins_pipe( pipe_slow );
18667 %}
18668 
18669 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18670   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18671   match(Set dst (Replicate src));
18672   effect(TEMP rtmp);
18673   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18674   ins_encode %{
18675     int vlen_enc = vector_length_encoding(this);
18676     __ vmovw($rtmp$$Register, $src$$XMMRegister);
18677     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18678   %}
18679   ins_pipe( pipe_slow );
18680 %}
18681 
18682 instruct ReplS_mem(vec dst, memory mem) %{
18683   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18684   match(Set dst (Replicate (LoadS mem)));
18685   format %{ "replicateS $dst,$mem" %}
18686   ins_encode %{
18687     int vlen_enc = vector_length_encoding(this);
18688     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18689   %}
18690   ins_pipe( pipe_slow );
18691 %}
18692 
18693 // ====================ReplicateI=======================================
18694 
18695 instruct ReplI_reg(vec dst, rRegI src) %{
18696   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18697   match(Set dst (Replicate src));
18698   format %{ "replicateI $dst,$src" %}
18699   ins_encode %{
18700     uint vlen = Matcher::vector_length(this);
18701     int vlen_enc = vector_length_encoding(this);
18702     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18703       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18704     } else if (VM_Version::supports_avx2()) {
18705       __ movdl($dst$$XMMRegister, $src$$Register);
18706       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18707     } else {
18708       __ movdl($dst$$XMMRegister, $src$$Register);
18709       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18710     }
18711   %}
18712   ins_pipe( pipe_slow );
18713 %}
18714 
18715 instruct ReplI_mem(vec dst, memory mem) %{
18716   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18717   match(Set dst (Replicate (LoadI mem)));
18718   format %{ "replicateI $dst,$mem" %}
18719   ins_encode %{
18720     int vlen_enc = vector_length_encoding(this);
18721     if (VM_Version::supports_avx2()) {
18722       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18723     } else if (VM_Version::supports_avx()) {
18724       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18725     } else {
18726       __ movdl($dst$$XMMRegister, $mem$$Address);
18727       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18728     }
18729   %}
18730   ins_pipe( pipe_slow );
18731 %}
18732 
18733 instruct ReplI_imm(vec dst, immI con) %{
18734   predicate(Matcher::is_non_long_integral_vector(n));
18735   match(Set dst (Replicate con));
18736   format %{ "replicateI $dst,$con" %}
18737   ins_encode %{
18738     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18739                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18740                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
18741     BasicType bt = Matcher::vector_element_basic_type(this);
18742     int vlen = Matcher::vector_length_in_bytes(this);
18743     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18744   %}
18745   ins_pipe( pipe_slow );
18746 %}
18747 
18748 // Replicate scalar zero to be vector
18749 instruct ReplI_zero(vec dst, immI_0 zero) %{
18750   predicate(Matcher::is_non_long_integral_vector(n));
18751   match(Set dst (Replicate zero));
18752   format %{ "replicateI $dst,$zero" %}
18753   ins_encode %{
18754     int vlen_enc = vector_length_encoding(this);
18755     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18756       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18757     } else {
18758       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18759     }
18760   %}
18761   ins_pipe( fpu_reg_reg );
18762 %}
18763 
18764 instruct ReplI_M1(vec dst, immI_M1 con) %{
18765   predicate(Matcher::is_non_long_integral_vector(n));
18766   match(Set dst (Replicate con));
18767   format %{ "vallones $dst" %}
18768   ins_encode %{
18769     int vector_len = vector_length_encoding(this);
18770     __ vallones($dst$$XMMRegister, vector_len);
18771   %}
18772   ins_pipe( pipe_slow );
18773 %}
18774 
18775 // ====================ReplicateL=======================================
18776 
18777 // Replicate long (8 byte) scalar to be vector
18778 instruct ReplL_reg(vec dst, rRegL src) %{
18779   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18780   match(Set dst (Replicate src));
18781   format %{ "replicateL $dst,$src" %}
18782   ins_encode %{
18783     int vlen = Matcher::vector_length(this);
18784     int vlen_enc = vector_length_encoding(this);
18785     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18786       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18787     } else if (VM_Version::supports_avx2()) {
18788       __ movdq($dst$$XMMRegister, $src$$Register);
18789       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18790     } else {
18791       __ movdq($dst$$XMMRegister, $src$$Register);
18792       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18793     }
18794   %}
18795   ins_pipe( pipe_slow );
18796 %}
18797 
18798 instruct ReplL_mem(vec dst, memory mem) %{
18799   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18800   match(Set dst (Replicate (LoadL mem)));
18801   format %{ "replicateL $dst,$mem" %}
18802   ins_encode %{
18803     int vlen_enc = vector_length_encoding(this);
18804     if (VM_Version::supports_avx2()) {
18805       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18806     } else if (VM_Version::supports_sse3()) {
18807       __ movddup($dst$$XMMRegister, $mem$$Address);
18808     } else {
18809       __ movq($dst$$XMMRegister, $mem$$Address);
18810       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18811     }
18812   %}
18813   ins_pipe( pipe_slow );
18814 %}
18815 
18816 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18817 instruct ReplL_imm(vec dst, immL con) %{
18818   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18819   match(Set dst (Replicate con));
18820   format %{ "replicateL $dst,$con" %}
18821   ins_encode %{
18822     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18823     int vlen = Matcher::vector_length_in_bytes(this);
18824     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18825   %}
18826   ins_pipe( pipe_slow );
18827 %}
18828 
18829 instruct ReplL_zero(vec dst, immL0 zero) %{
18830   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18831   match(Set dst (Replicate zero));
18832   format %{ "replicateL $dst,$zero" %}
18833   ins_encode %{
18834     int vlen_enc = vector_length_encoding(this);
18835     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18836       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18837     } else {
18838       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18839     }
18840   %}
18841   ins_pipe( fpu_reg_reg );
18842 %}
18843 
18844 instruct ReplL_M1(vec dst, immL_M1 con) %{
18845   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18846   match(Set dst (Replicate con));
18847   format %{ "vallones $dst" %}
18848   ins_encode %{
18849     int vector_len = vector_length_encoding(this);
18850     __ vallones($dst$$XMMRegister, vector_len);
18851   %}
18852   ins_pipe( pipe_slow );
18853 %}
18854 
18855 // ====================ReplicateF=======================================
18856 
18857 instruct vReplF_reg(vec dst, vlRegF src) %{
18858   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18859   match(Set dst (Replicate src));
18860   format %{ "replicateF $dst,$src" %}
18861   ins_encode %{
18862     uint vlen = Matcher::vector_length(this);
18863     int vlen_enc = vector_length_encoding(this);
18864     if (vlen <= 4) {
18865       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18866     } else if (VM_Version::supports_avx2()) {
18867       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18868     } else {
18869       assert(vlen == 8, "sanity");
18870       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18871       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18872     }
18873   %}
18874   ins_pipe( pipe_slow );
18875 %}
18876 
18877 instruct ReplF_reg(vec dst, vlRegF src) %{
18878   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18879   match(Set dst (Replicate src));
18880   format %{ "replicateF $dst,$src" %}
18881   ins_encode %{
18882     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
18883   %}
18884   ins_pipe( pipe_slow );
18885 %}
18886 
18887 instruct ReplF_mem(vec dst, memory mem) %{
18888   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18889   match(Set dst (Replicate (LoadF mem)));
18890   format %{ "replicateF $dst,$mem" %}
18891   ins_encode %{
18892     int vlen_enc = vector_length_encoding(this);
18893     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18894   %}
18895   ins_pipe( pipe_slow );
18896 %}
18897 
18898 // Replicate float scalar immediate to be vector by loading from const table.
18899 instruct ReplF_imm(vec dst, immF con) %{
18900   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18901   match(Set dst (Replicate con));
18902   format %{ "replicateF $dst,$con" %}
18903   ins_encode %{
18904     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
18905                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
18906     int vlen = Matcher::vector_length_in_bytes(this);
18907     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
18908   %}
18909   ins_pipe( pipe_slow );
18910 %}
18911 
18912 instruct ReplF_zero(vec dst, immF0 zero) %{
18913   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18914   match(Set dst (Replicate zero));
18915   format %{ "replicateF $dst,$zero" %}
18916   ins_encode %{
18917     int vlen_enc = vector_length_encoding(this);
18918     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18919       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18920     } else {
18921       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18922     }
18923   %}
18924   ins_pipe( fpu_reg_reg );
18925 %}
18926 
18927 // ====================ReplicateD=======================================
18928 
18929 // Replicate double (8 bytes) scalar to be vector
18930 instruct vReplD_reg(vec dst, vlRegD src) %{
18931   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18932   match(Set dst (Replicate src));
18933   format %{ "replicateD $dst,$src" %}
18934   ins_encode %{
18935     uint vlen = Matcher::vector_length(this);
18936     int vlen_enc = vector_length_encoding(this);
18937     if (vlen <= 2) {
18938       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18939     } else if (VM_Version::supports_avx2()) {
18940       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18941     } else {
18942       assert(vlen == 4, "sanity");
18943       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18944       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18945     }
18946   %}
18947   ins_pipe( pipe_slow );
18948 %}
18949 
18950 instruct ReplD_reg(vec dst, vlRegD src) %{
18951   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18952   match(Set dst (Replicate src));
18953   format %{ "replicateD $dst,$src" %}
18954   ins_encode %{
18955     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
18956   %}
18957   ins_pipe( pipe_slow );
18958 %}
18959 
18960 instruct ReplD_mem(vec dst, memory mem) %{
18961   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18962   match(Set dst (Replicate (LoadD mem)));
18963   format %{ "replicateD $dst,$mem" %}
18964   ins_encode %{
18965     if (Matcher::vector_length(this) >= 4) {
18966       int vlen_enc = vector_length_encoding(this);
18967       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18968     } else {
18969       __ movddup($dst$$XMMRegister, $mem$$Address);
18970     }
18971   %}
18972   ins_pipe( pipe_slow );
18973 %}
18974 
18975 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
18976 instruct ReplD_imm(vec dst, immD con) %{
18977   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18978   match(Set dst (Replicate con));
18979   format %{ "replicateD $dst,$con" %}
18980   ins_encode %{
18981     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18982     int vlen = Matcher::vector_length_in_bytes(this);
18983     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
18984   %}
18985   ins_pipe( pipe_slow );
18986 %}
18987 
18988 instruct ReplD_zero(vec dst, immD0 zero) %{
18989   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18990   match(Set dst (Replicate zero));
18991   format %{ "replicateD $dst,$zero" %}
18992   ins_encode %{
18993     int vlen_enc = vector_length_encoding(this);
18994     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18995       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18996     } else {
18997       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18998     }
18999   %}
19000   ins_pipe( fpu_reg_reg );
19001 %}
19002 
19003 // ====================VECTOR INSERT=======================================
19004 
19005 instruct insert(vec dst, rRegI val, immU8 idx) %{
19006   predicate(Matcher::vector_length_in_bytes(n) < 32);
19007   match(Set dst (VectorInsert (Binary dst val) idx));
19008   format %{ "vector_insert $dst,$val,$idx" %}
19009   ins_encode %{
19010     assert(UseSSE >= 4, "required");
19011     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19012 
19013     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19014 
19015     assert(is_integral_type(elem_bt), "");
19016     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19017 
19018     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19019   %}
19020   ins_pipe( pipe_slow );
19021 %}
19022 
19023 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19024   predicate(Matcher::vector_length_in_bytes(n) == 32);
19025   match(Set dst (VectorInsert (Binary src val) idx));
19026   effect(TEMP vtmp);
19027   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19028   ins_encode %{
19029     int vlen_enc = Assembler::AVX_256bit;
19030     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19031     int elem_per_lane = 16/type2aelembytes(elem_bt);
19032     int log2epr = log2(elem_per_lane);
19033 
19034     assert(is_integral_type(elem_bt), "sanity");
19035     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19036 
19037     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19038     uint y_idx = ($idx$$constant >> log2epr) & 1;
19039     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19040     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19041     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19042   %}
19043   ins_pipe( pipe_slow );
19044 %}
19045 
19046 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19047   predicate(Matcher::vector_length_in_bytes(n) == 64);
19048   match(Set dst (VectorInsert (Binary src val) idx));
19049   effect(TEMP vtmp);
19050   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19051   ins_encode %{
19052     assert(UseAVX > 2, "sanity");
19053 
19054     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19055     int elem_per_lane = 16/type2aelembytes(elem_bt);
19056     int log2epr = log2(elem_per_lane);
19057 
19058     assert(is_integral_type(elem_bt), "");
19059     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19060 
19061     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19062     uint y_idx = ($idx$$constant >> log2epr) & 3;
19063     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19064     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19065     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19066   %}
19067   ins_pipe( pipe_slow );
19068 %}
19069 
19070 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19071   predicate(Matcher::vector_length(n) == 2);
19072   match(Set dst (VectorInsert (Binary dst val) idx));
19073   format %{ "vector_insert $dst,$val,$idx" %}
19074   ins_encode %{
19075     assert(UseSSE >= 4, "required");
19076     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19077     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19078 
19079     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19080   %}
19081   ins_pipe( pipe_slow );
19082 %}
19083 
19084 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19085   predicate(Matcher::vector_length(n) == 4);
19086   match(Set dst (VectorInsert (Binary src val) idx));
19087   effect(TEMP vtmp);
19088   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19089   ins_encode %{
19090     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19091     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19092 
19093     uint x_idx = $idx$$constant & right_n_bits(1);
19094     uint y_idx = ($idx$$constant >> 1) & 1;
19095     int vlen_enc = Assembler::AVX_256bit;
19096     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19097     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19098     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19099   %}
19100   ins_pipe( pipe_slow );
19101 %}
19102 
19103 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19104   predicate(Matcher::vector_length(n) == 8);
19105   match(Set dst (VectorInsert (Binary src val) idx));
19106   effect(TEMP vtmp);
19107   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19108   ins_encode %{
19109     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19110     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19111 
19112     uint x_idx = $idx$$constant & right_n_bits(1);
19113     uint y_idx = ($idx$$constant >> 1) & 3;
19114     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19115     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19116     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19117   %}
19118   ins_pipe( pipe_slow );
19119 %}
19120 
19121 instruct insertF(vec dst, regF val, immU8 idx) %{
19122   predicate(Matcher::vector_length(n) < 8);
19123   match(Set dst (VectorInsert (Binary dst val) idx));
19124   format %{ "vector_insert $dst,$val,$idx" %}
19125   ins_encode %{
19126     assert(UseSSE >= 4, "sanity");
19127 
19128     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19129     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19130 
19131     uint x_idx = $idx$$constant & right_n_bits(2);
19132     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19133   %}
19134   ins_pipe( pipe_slow );
19135 %}
19136 
19137 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19138   predicate(Matcher::vector_length(n) >= 8);
19139   match(Set dst (VectorInsert (Binary src val) idx));
19140   effect(TEMP vtmp);
19141   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19142   ins_encode %{
19143     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19144     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19145 
19146     int vlen = Matcher::vector_length(this);
19147     uint x_idx = $idx$$constant & right_n_bits(2);
19148     if (vlen == 8) {
19149       uint y_idx = ($idx$$constant >> 2) & 1;
19150       int vlen_enc = Assembler::AVX_256bit;
19151       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19152       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19153       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19154     } else {
19155       assert(vlen == 16, "sanity");
19156       uint y_idx = ($idx$$constant >> 2) & 3;
19157       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19158       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19159       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19160     }
19161   %}
19162   ins_pipe( pipe_slow );
19163 %}
19164 
19165 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19166   predicate(Matcher::vector_length(n) == 2);
19167   match(Set dst (VectorInsert (Binary dst val) idx));
19168   effect(TEMP tmp);
19169   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19170   ins_encode %{
19171     assert(UseSSE >= 4, "sanity");
19172     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19173     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19174 
19175     __ movq($tmp$$Register, $val$$XMMRegister);
19176     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19177   %}
19178   ins_pipe( pipe_slow );
19179 %}
19180 
19181 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19182   predicate(Matcher::vector_length(n) == 4);
19183   match(Set dst (VectorInsert (Binary src val) idx));
19184   effect(TEMP vtmp, TEMP tmp);
19185   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19186   ins_encode %{
19187     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19188     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19189 
19190     uint x_idx = $idx$$constant & right_n_bits(1);
19191     uint y_idx = ($idx$$constant >> 1) & 1;
19192     int vlen_enc = Assembler::AVX_256bit;
19193     __ movq($tmp$$Register, $val$$XMMRegister);
19194     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19195     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19196     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19197   %}
19198   ins_pipe( pipe_slow );
19199 %}
19200 
19201 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19202   predicate(Matcher::vector_length(n) == 8);
19203   match(Set dst (VectorInsert (Binary src val) idx));
19204   effect(TEMP tmp, TEMP vtmp);
19205   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19206   ins_encode %{
19207     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19208     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19209 
19210     uint x_idx = $idx$$constant & right_n_bits(1);
19211     uint y_idx = ($idx$$constant >> 1) & 3;
19212     __ movq($tmp$$Register, $val$$XMMRegister);
19213     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19214     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19215     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19216   %}
19217   ins_pipe( pipe_slow );
19218 %}
19219 
19220 // ====================REDUCTION ARITHMETIC=======================================
19221 
19222 // =======================Int Reduction==========================================
19223 
19224 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19225   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19226   match(Set dst (AddReductionVI src1 src2));
19227   match(Set dst (MulReductionVI src1 src2));
19228   match(Set dst (AndReductionV  src1 src2));
19229   match(Set dst ( OrReductionV  src1 src2));
19230   match(Set dst (XorReductionV  src1 src2));
19231   match(Set dst (MinReductionV  src1 src2));
19232   match(Set dst (MaxReductionV  src1 src2));
19233   effect(TEMP vtmp1, TEMP vtmp2);
19234   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19235   ins_encode %{
19236     int opcode = this->ideal_Opcode();
19237     int vlen = Matcher::vector_length(this, $src2);
19238     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19239   %}
19240   ins_pipe( pipe_slow );
19241 %}
19242 
19243 // =======================Long Reduction==========================================
19244 
19245 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19246   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19247   match(Set dst (AddReductionVL src1 src2));
19248   match(Set dst (MulReductionVL src1 src2));
19249   match(Set dst (AndReductionV  src1 src2));
19250   match(Set dst ( OrReductionV  src1 src2));
19251   match(Set dst (XorReductionV  src1 src2));
19252   match(Set dst (MinReductionV  src1 src2));
19253   match(Set dst (MaxReductionV  src1 src2));
19254   effect(TEMP vtmp1, TEMP vtmp2);
19255   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19256   ins_encode %{
19257     int opcode = this->ideal_Opcode();
19258     int vlen = Matcher::vector_length(this, $src2);
19259     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19260   %}
19261   ins_pipe( pipe_slow );
19262 %}
19263 
19264 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19265   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19266   match(Set dst (AddReductionVL src1 src2));
19267   match(Set dst (MulReductionVL src1 src2));
19268   match(Set dst (AndReductionV  src1 src2));
19269   match(Set dst ( OrReductionV  src1 src2));
19270   match(Set dst (XorReductionV  src1 src2));
19271   match(Set dst (MinReductionV  src1 src2));
19272   match(Set dst (MaxReductionV  src1 src2));
19273   effect(TEMP vtmp1, TEMP vtmp2);
19274   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19275   ins_encode %{
19276     int opcode = this->ideal_Opcode();
19277     int vlen = Matcher::vector_length(this, $src2);
19278     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19279   %}
19280   ins_pipe( pipe_slow );
19281 %}
19282 
19283 // =======================Float Reduction==========================================
19284 
19285 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19286   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19287   match(Set dst (AddReductionVF dst src));
19288   match(Set dst (MulReductionVF dst src));
19289   effect(TEMP dst, TEMP vtmp);
19290   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
19291   ins_encode %{
19292     int opcode = this->ideal_Opcode();
19293     int vlen = Matcher::vector_length(this, $src);
19294     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19295   %}
19296   ins_pipe( pipe_slow );
19297 %}
19298 
19299 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19300   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19301   match(Set dst (AddReductionVF dst src));
19302   match(Set dst (MulReductionVF dst src));
19303   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19304   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19305   ins_encode %{
19306     int opcode = this->ideal_Opcode();
19307     int vlen = Matcher::vector_length(this, $src);
19308     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19309   %}
19310   ins_pipe( pipe_slow );
19311 %}
19312 
19313 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19314   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19315   match(Set dst (AddReductionVF dst src));
19316   match(Set dst (MulReductionVF dst src));
19317   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19318   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19319   ins_encode %{
19320     int opcode = this->ideal_Opcode();
19321     int vlen = Matcher::vector_length(this, $src);
19322     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19323   %}
19324   ins_pipe( pipe_slow );
19325 %}
19326 
19327 
19328 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19329   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19330   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19331   // src1 contains reduction identity
19332   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19333   match(Set dst (AddReductionVF src1 src2));
19334   match(Set dst (MulReductionVF src1 src2));
19335   effect(TEMP dst);
19336   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
19337   ins_encode %{
19338     int opcode = this->ideal_Opcode();
19339     int vlen = Matcher::vector_length(this, $src2);
19340     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19341   %}
19342   ins_pipe( pipe_slow );
19343 %}
19344 
19345 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19346   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19347   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19348   // src1 contains reduction identity
19349   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19350   match(Set dst (AddReductionVF src1 src2));
19351   match(Set dst (MulReductionVF src1 src2));
19352   effect(TEMP dst, TEMP vtmp);
19353   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19354   ins_encode %{
19355     int opcode = this->ideal_Opcode();
19356     int vlen = Matcher::vector_length(this, $src2);
19357     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19358   %}
19359   ins_pipe( pipe_slow );
19360 %}
19361 
19362 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19363   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19364   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19365   // src1 contains reduction identity
19366   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19367   match(Set dst (AddReductionVF src1 src2));
19368   match(Set dst (MulReductionVF src1 src2));
19369   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19370   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19371   ins_encode %{
19372     int opcode = this->ideal_Opcode();
19373     int vlen = Matcher::vector_length(this, $src2);
19374     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19375   %}
19376   ins_pipe( pipe_slow );
19377 %}
19378 
19379 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19380   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19381   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19382   // src1 contains reduction identity
19383   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19384   match(Set dst (AddReductionVF src1 src2));
19385   match(Set dst (MulReductionVF src1 src2));
19386   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19387   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19388   ins_encode %{
19389     int opcode = this->ideal_Opcode();
19390     int vlen = Matcher::vector_length(this, $src2);
19391     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19392   %}
19393   ins_pipe( pipe_slow );
19394 %}
19395 
19396 // =======================Double Reduction==========================================
19397 
19398 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19399   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19400   match(Set dst (AddReductionVD dst src));
19401   match(Set dst (MulReductionVD dst src));
19402   effect(TEMP dst, TEMP vtmp);
19403   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19404   ins_encode %{
19405     int opcode = this->ideal_Opcode();
19406     int vlen = Matcher::vector_length(this, $src);
19407     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19408 %}
19409   ins_pipe( pipe_slow );
19410 %}
19411 
19412 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19413   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19414   match(Set dst (AddReductionVD dst src));
19415   match(Set dst (MulReductionVD dst src));
19416   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19417   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19418   ins_encode %{
19419     int opcode = this->ideal_Opcode();
19420     int vlen = Matcher::vector_length(this, $src);
19421     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19422   %}
19423   ins_pipe( pipe_slow );
19424 %}
19425 
19426 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19427   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19428   match(Set dst (AddReductionVD dst src));
19429   match(Set dst (MulReductionVD dst src));
19430   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19431   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19432   ins_encode %{
19433     int opcode = this->ideal_Opcode();
19434     int vlen = Matcher::vector_length(this, $src);
19435     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19436   %}
19437   ins_pipe( pipe_slow );
19438 %}
19439 
19440 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19441   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19442   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19443   // src1 contains reduction identity
19444   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19445   match(Set dst (AddReductionVD src1 src2));
19446   match(Set dst (MulReductionVD src1 src2));
19447   effect(TEMP dst);
19448   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19449   ins_encode %{
19450     int opcode = this->ideal_Opcode();
19451     int vlen = Matcher::vector_length(this, $src2);
19452     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19453 %}
19454   ins_pipe( pipe_slow );
19455 %}
19456 
19457 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19458   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19459   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19460   // src1 contains reduction identity
19461   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19462   match(Set dst (AddReductionVD src1 src2));
19463   match(Set dst (MulReductionVD src1 src2));
19464   effect(TEMP dst, TEMP vtmp);
19465   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19466   ins_encode %{
19467     int opcode = this->ideal_Opcode();
19468     int vlen = Matcher::vector_length(this, $src2);
19469     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19470   %}
19471   ins_pipe( pipe_slow );
19472 %}
19473 
19474 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19475   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19476   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19477   // src1 contains reduction identity
19478   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19479   match(Set dst (AddReductionVD src1 src2));
19480   match(Set dst (MulReductionVD src1 src2));
19481   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19482   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19483   ins_encode %{
19484     int opcode = this->ideal_Opcode();
19485     int vlen = Matcher::vector_length(this, $src2);
19486     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19487   %}
19488   ins_pipe( pipe_slow );
19489 %}
19490 
19491 // =======================Byte Reduction==========================================
19492 
19493 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19494   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19495   match(Set dst (AddReductionVI src1 src2));
19496   match(Set dst (AndReductionV  src1 src2));
19497   match(Set dst ( OrReductionV  src1 src2));
19498   match(Set dst (XorReductionV  src1 src2));
19499   match(Set dst (MinReductionV  src1 src2));
19500   match(Set dst (MaxReductionV  src1 src2));
19501   effect(TEMP vtmp1, TEMP vtmp2);
19502   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19503   ins_encode %{
19504     int opcode = this->ideal_Opcode();
19505     int vlen = Matcher::vector_length(this, $src2);
19506     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19507   %}
19508   ins_pipe( pipe_slow );
19509 %}
19510 
19511 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19512   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19513   match(Set dst (AddReductionVI src1 src2));
19514   match(Set dst (AndReductionV  src1 src2));
19515   match(Set dst ( OrReductionV  src1 src2));
19516   match(Set dst (XorReductionV  src1 src2));
19517   match(Set dst (MinReductionV  src1 src2));
19518   match(Set dst (MaxReductionV  src1 src2));
19519   effect(TEMP vtmp1, TEMP vtmp2);
19520   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19521   ins_encode %{
19522     int opcode = this->ideal_Opcode();
19523     int vlen = Matcher::vector_length(this, $src2);
19524     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19525   %}
19526   ins_pipe( pipe_slow );
19527 %}
19528 
19529 // =======================Short Reduction==========================================
19530 
19531 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19532   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19533   match(Set dst (AddReductionVI src1 src2));
19534   match(Set dst (MulReductionVI src1 src2));
19535   match(Set dst (AndReductionV  src1 src2));
19536   match(Set dst ( OrReductionV  src1 src2));
19537   match(Set dst (XorReductionV  src1 src2));
19538   match(Set dst (MinReductionV  src1 src2));
19539   match(Set dst (MaxReductionV  src1 src2));
19540   effect(TEMP vtmp1, TEMP vtmp2);
19541   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19542   ins_encode %{
19543     int opcode = this->ideal_Opcode();
19544     int vlen = Matcher::vector_length(this, $src2);
19545     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19546   %}
19547   ins_pipe( pipe_slow );
19548 %}
19549 
19550 // =======================Mul Reduction==========================================
19551 
19552 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19553   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19554             Matcher::vector_length(n->in(2)) <= 32); // src2
19555   match(Set dst (MulReductionVI src1 src2));
19556   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19557   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19558   ins_encode %{
19559     int opcode = this->ideal_Opcode();
19560     int vlen = Matcher::vector_length(this, $src2);
19561     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19562   %}
19563   ins_pipe( pipe_slow );
19564 %}
19565 
19566 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19567   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19568             Matcher::vector_length(n->in(2)) == 64); // src2
19569   match(Set dst (MulReductionVI src1 src2));
19570   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19571   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19572   ins_encode %{
19573     int opcode = this->ideal_Opcode();
19574     int vlen = Matcher::vector_length(this, $src2);
19575     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19576   %}
19577   ins_pipe( pipe_slow );
19578 %}
19579 
19580 //--------------------Min/Max Float Reduction --------------------
19581 // Float Min Reduction
19582 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19583                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19584   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19585             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19586              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19587             Matcher::vector_length(n->in(2)) == 2);
19588   match(Set dst (MinReductionV src1 src2));
19589   match(Set dst (MaxReductionV src1 src2));
19590   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19591   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19592   ins_encode %{
19593     assert(UseAVX > 0, "sanity");
19594 
19595     int opcode = this->ideal_Opcode();
19596     int vlen = Matcher::vector_length(this, $src2);
19597     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19598                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19599   %}
19600   ins_pipe( pipe_slow );
19601 %}
19602 
19603 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19604                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19605   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19606             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19607              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19608             Matcher::vector_length(n->in(2)) >= 4);
19609   match(Set dst (MinReductionV src1 src2));
19610   match(Set dst (MaxReductionV src1 src2));
19611   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19612   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19613   ins_encode %{
19614     assert(UseAVX > 0, "sanity");
19615 
19616     int opcode = this->ideal_Opcode();
19617     int vlen = Matcher::vector_length(this, $src2);
19618     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19619                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19620   %}
19621   ins_pipe( pipe_slow );
19622 %}
19623 
19624 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19625                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19626   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19627             Matcher::vector_length(n->in(2)) == 2);
19628   match(Set dst (MinReductionV dst src));
19629   match(Set dst (MaxReductionV dst src));
19630   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19631   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19632   ins_encode %{
19633     assert(UseAVX > 0, "sanity");
19634 
19635     int opcode = this->ideal_Opcode();
19636     int vlen = Matcher::vector_length(this, $src);
19637     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19638                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19639   %}
19640   ins_pipe( pipe_slow );
19641 %}
19642 
19643 
19644 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19645                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19646   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19647             Matcher::vector_length(n->in(2)) >= 4);
19648   match(Set dst (MinReductionV dst src));
19649   match(Set dst (MaxReductionV dst src));
19650   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19651   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19652   ins_encode %{
19653     assert(UseAVX > 0, "sanity");
19654 
19655     int opcode = this->ideal_Opcode();
19656     int vlen = Matcher::vector_length(this, $src);
19657     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19658                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19659   %}
19660   ins_pipe( pipe_slow );
19661 %}
19662 
19663 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19664   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19665             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19666              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19667             Matcher::vector_length(n->in(2)) == 2);
19668   match(Set dst (MinReductionV src1 src2));
19669   match(Set dst (MaxReductionV src1 src2));
19670   effect(TEMP dst, TEMP xtmp1);
19671   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19672   ins_encode %{
19673     int opcode = this->ideal_Opcode();
19674     int vlen = Matcher::vector_length(this, $src2);
19675     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19676                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19677   %}
19678   ins_pipe( pipe_slow );
19679 %}
19680 
19681 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19682   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19683             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19684              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19685             Matcher::vector_length(n->in(2)) >= 4);
19686   match(Set dst (MinReductionV src1 src2));
19687   match(Set dst (MaxReductionV src1 src2));
19688   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19689   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19690   ins_encode %{
19691     int opcode = this->ideal_Opcode();
19692     int vlen = Matcher::vector_length(this, $src2);
19693     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19694                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19695   %}
19696   ins_pipe( pipe_slow );
19697 %}
19698 
19699 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19700   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19701             Matcher::vector_length(n->in(2)) == 2);
19702   match(Set dst (MinReductionV dst src));
19703   match(Set dst (MaxReductionV dst src));
19704   effect(TEMP dst, TEMP xtmp1);
19705   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19706   ins_encode %{
19707     int opcode = this->ideal_Opcode();
19708     int vlen = Matcher::vector_length(this, $src);
19709     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19710                          $xtmp1$$XMMRegister);
19711   %}
19712   ins_pipe( pipe_slow );
19713 %}
19714 
19715 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19716   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19717             Matcher::vector_length(n->in(2)) >= 4);
19718   match(Set dst (MinReductionV dst src));
19719   match(Set dst (MaxReductionV dst src));
19720   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19721   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19722   ins_encode %{
19723     int opcode = this->ideal_Opcode();
19724     int vlen = Matcher::vector_length(this, $src);
19725     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19726                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19727   %}
19728   ins_pipe( pipe_slow );
19729 %}
19730 
19731 //--------------------Min Double Reduction --------------------
19732 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19733                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19734   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19735             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19736              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19737             Matcher::vector_length(n->in(2)) == 2);
19738   match(Set dst (MinReductionV src1 src2));
19739   match(Set dst (MaxReductionV src1 src2));
19740   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19741   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19742   ins_encode %{
19743     assert(UseAVX > 0, "sanity");
19744 
19745     int opcode = this->ideal_Opcode();
19746     int vlen = Matcher::vector_length(this, $src2);
19747     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19748                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19749   %}
19750   ins_pipe( pipe_slow );
19751 %}
19752 
19753 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19754                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19755   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19756             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19757              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19758             Matcher::vector_length(n->in(2)) >= 4);
19759   match(Set dst (MinReductionV src1 src2));
19760   match(Set dst (MaxReductionV src1 src2));
19761   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19762   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19763   ins_encode %{
19764     assert(UseAVX > 0, "sanity");
19765 
19766     int opcode = this->ideal_Opcode();
19767     int vlen = Matcher::vector_length(this, $src2);
19768     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19769                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19770   %}
19771   ins_pipe( pipe_slow );
19772 %}
19773 
19774 
19775 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19776                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19777   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19778             Matcher::vector_length(n->in(2)) == 2);
19779   match(Set dst (MinReductionV dst src));
19780   match(Set dst (MaxReductionV dst src));
19781   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19782   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19783   ins_encode %{
19784     assert(UseAVX > 0, "sanity");
19785 
19786     int opcode = this->ideal_Opcode();
19787     int vlen = Matcher::vector_length(this, $src);
19788     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19789                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19790   %}
19791   ins_pipe( pipe_slow );
19792 %}
19793 
19794 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19795                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19796   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19797             Matcher::vector_length(n->in(2)) >= 4);
19798   match(Set dst (MinReductionV dst src));
19799   match(Set dst (MaxReductionV dst src));
19800   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19801   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19802   ins_encode %{
19803     assert(UseAVX > 0, "sanity");
19804 
19805     int opcode = this->ideal_Opcode();
19806     int vlen = Matcher::vector_length(this, $src);
19807     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19808                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19809   %}
19810   ins_pipe( pipe_slow );
19811 %}
19812 
19813 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19814   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19815             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19816              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19817             Matcher::vector_length(n->in(2)) == 2);
19818   match(Set dst (MinReductionV src1 src2));
19819   match(Set dst (MaxReductionV src1 src2));
19820   effect(TEMP dst, TEMP xtmp1);
19821   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19822   ins_encode %{
19823     int opcode = this->ideal_Opcode();
19824     int vlen = Matcher::vector_length(this, $src2);
19825     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
19826                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
19827   %}
19828   ins_pipe( pipe_slow );
19829 %}
19830 
19831 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
19832   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19833             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19834              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19835             Matcher::vector_length(n->in(2)) >= 4);
19836   match(Set dst (MinReductionV src1 src2));
19837   match(Set dst (MaxReductionV src1 src2));
19838   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19839   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
19840   ins_encode %{
19841     int opcode = this->ideal_Opcode();
19842     int vlen = Matcher::vector_length(this, $src2);
19843     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19844                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19845   %}
19846   ins_pipe( pipe_slow );
19847 %}
19848 
19849 
19850 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
19851   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19852             Matcher::vector_length(n->in(2)) == 2);
19853   match(Set dst (MinReductionV dst src));
19854   match(Set dst (MaxReductionV dst src));
19855   effect(TEMP dst, TEMP xtmp1);
19856   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
19857   ins_encode %{
19858     int opcode = this->ideal_Opcode();
19859     int vlen = Matcher::vector_length(this, $src);
19860     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19861                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19862   %}
19863   ins_pipe( pipe_slow );
19864 %}
19865 
19866 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
19867   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19868             Matcher::vector_length(n->in(2)) >= 4);
19869   match(Set dst (MinReductionV dst src));
19870   match(Set dst (MaxReductionV dst src));
19871   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19872   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
19873   ins_encode %{
19874     int opcode = this->ideal_Opcode();
19875     int vlen = Matcher::vector_length(this, $src);
19876     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19877                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19878   %}
19879   ins_pipe( pipe_slow );
19880 %}
19881 
19882 // ====================VECTOR ARITHMETIC=======================================
19883 
19884 // --------------------------------- ADD --------------------------------------
19885 
19886 // Bytes vector add
19887 instruct vaddB(vec dst, vec src) %{
19888   predicate(UseAVX == 0);
19889   match(Set dst (AddVB dst src));
19890   format %{ "paddb   $dst,$src\t! add packedB" %}
19891   ins_encode %{
19892     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
19893   %}
19894   ins_pipe( pipe_slow );
19895 %}
19896 
19897 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
19898   predicate(UseAVX > 0);
19899   match(Set dst (AddVB src1 src2));
19900   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
19901   ins_encode %{
19902     int vlen_enc = vector_length_encoding(this);
19903     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19904   %}
19905   ins_pipe( pipe_slow );
19906 %}
19907 
19908 instruct vaddB_mem(vec dst, vec src, memory mem) %{
19909   predicate((UseAVX > 0) &&
19910             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19911   match(Set dst (AddVB src (LoadVector mem)));
19912   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
19913   ins_encode %{
19914     int vlen_enc = vector_length_encoding(this);
19915     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19916   %}
19917   ins_pipe( pipe_slow );
19918 %}
19919 
19920 // Shorts/Chars vector add
19921 instruct vaddS(vec dst, vec src) %{
19922   predicate(UseAVX == 0);
19923   match(Set dst (AddVS dst src));
19924   format %{ "paddw   $dst,$src\t! add packedS" %}
19925   ins_encode %{
19926     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
19927   %}
19928   ins_pipe( pipe_slow );
19929 %}
19930 
19931 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
19932   predicate(UseAVX > 0);
19933   match(Set dst (AddVS src1 src2));
19934   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
19935   ins_encode %{
19936     int vlen_enc = vector_length_encoding(this);
19937     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19938   %}
19939   ins_pipe( pipe_slow );
19940 %}
19941 
19942 instruct vaddS_mem(vec dst, vec src, memory mem) %{
19943   predicate((UseAVX > 0) &&
19944             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19945   match(Set dst (AddVS src (LoadVector mem)));
19946   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
19947   ins_encode %{
19948     int vlen_enc = vector_length_encoding(this);
19949     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19950   %}
19951   ins_pipe( pipe_slow );
19952 %}
19953 
19954 // Integers vector add
19955 instruct vaddI(vec dst, vec src) %{
19956   predicate(UseAVX == 0);
19957   match(Set dst (AddVI dst src));
19958   format %{ "paddd   $dst,$src\t! add packedI" %}
19959   ins_encode %{
19960     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
19961   %}
19962   ins_pipe( pipe_slow );
19963 %}
19964 
19965 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
19966   predicate(UseAVX > 0);
19967   match(Set dst (AddVI src1 src2));
19968   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
19969   ins_encode %{
19970     int vlen_enc = vector_length_encoding(this);
19971     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19972   %}
19973   ins_pipe( pipe_slow );
19974 %}
19975 
19976 
19977 instruct vaddI_mem(vec dst, vec src, memory mem) %{
19978   predicate((UseAVX > 0) &&
19979             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19980   match(Set dst (AddVI src (LoadVector mem)));
19981   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
19982   ins_encode %{
19983     int vlen_enc = vector_length_encoding(this);
19984     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19985   %}
19986   ins_pipe( pipe_slow );
19987 %}
19988 
19989 // Longs vector add
19990 instruct vaddL(vec dst, vec src) %{
19991   predicate(UseAVX == 0);
19992   match(Set dst (AddVL dst src));
19993   format %{ "paddq   $dst,$src\t! add packedL" %}
19994   ins_encode %{
19995     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
19996   %}
19997   ins_pipe( pipe_slow );
19998 %}
19999 
20000 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20001   predicate(UseAVX > 0);
20002   match(Set dst (AddVL src1 src2));
20003   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
20004   ins_encode %{
20005     int vlen_enc = vector_length_encoding(this);
20006     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20007   %}
20008   ins_pipe( pipe_slow );
20009 %}
20010 
20011 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20012   predicate((UseAVX > 0) &&
20013             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20014   match(Set dst (AddVL src (LoadVector mem)));
20015   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
20016   ins_encode %{
20017     int vlen_enc = vector_length_encoding(this);
20018     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20019   %}
20020   ins_pipe( pipe_slow );
20021 %}
20022 
20023 // Floats vector add
20024 instruct vaddF(vec dst, vec src) %{
20025   predicate(UseAVX == 0);
20026   match(Set dst (AddVF dst src));
20027   format %{ "addps   $dst,$src\t! add packedF" %}
20028   ins_encode %{
20029     __ addps($dst$$XMMRegister, $src$$XMMRegister);
20030   %}
20031   ins_pipe( pipe_slow );
20032 %}
20033 
20034 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20035   predicate(UseAVX > 0);
20036   match(Set dst (AddVF src1 src2));
20037   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
20038   ins_encode %{
20039     int vlen_enc = vector_length_encoding(this);
20040     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20041   %}
20042   ins_pipe( pipe_slow );
20043 %}
20044 
20045 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20046   predicate((UseAVX > 0) &&
20047             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20048   match(Set dst (AddVF src (LoadVector mem)));
20049   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
20050   ins_encode %{
20051     int vlen_enc = vector_length_encoding(this);
20052     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20053   %}
20054   ins_pipe( pipe_slow );
20055 %}
20056 
20057 // Doubles vector add
20058 instruct vaddD(vec dst, vec src) %{
20059   predicate(UseAVX == 0);
20060   match(Set dst (AddVD dst src));
20061   format %{ "addpd   $dst,$src\t! add packedD" %}
20062   ins_encode %{
20063     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20064   %}
20065   ins_pipe( pipe_slow );
20066 %}
20067 
20068 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20069   predicate(UseAVX > 0);
20070   match(Set dst (AddVD src1 src2));
20071   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
20072   ins_encode %{
20073     int vlen_enc = vector_length_encoding(this);
20074     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20075   %}
20076   ins_pipe( pipe_slow );
20077 %}
20078 
20079 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20080   predicate((UseAVX > 0) &&
20081             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20082   match(Set dst (AddVD src (LoadVector mem)));
20083   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
20084   ins_encode %{
20085     int vlen_enc = vector_length_encoding(this);
20086     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20087   %}
20088   ins_pipe( pipe_slow );
20089 %}
20090 
20091 // --------------------------------- SUB --------------------------------------
20092 
20093 // Bytes vector sub
20094 instruct vsubB(vec dst, vec src) %{
20095   predicate(UseAVX == 0);
20096   match(Set dst (SubVB dst src));
20097   format %{ "psubb   $dst,$src\t! sub packedB" %}
20098   ins_encode %{
20099     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20100   %}
20101   ins_pipe( pipe_slow );
20102 %}
20103 
20104 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20105   predicate(UseAVX > 0);
20106   match(Set dst (SubVB src1 src2));
20107   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
20108   ins_encode %{
20109     int vlen_enc = vector_length_encoding(this);
20110     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20111   %}
20112   ins_pipe( pipe_slow );
20113 %}
20114 
20115 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20116   predicate((UseAVX > 0) &&
20117             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20118   match(Set dst (SubVB src (LoadVector mem)));
20119   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
20120   ins_encode %{
20121     int vlen_enc = vector_length_encoding(this);
20122     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20123   %}
20124   ins_pipe( pipe_slow );
20125 %}
20126 
20127 // Shorts/Chars vector sub
20128 instruct vsubS(vec dst, vec src) %{
20129   predicate(UseAVX == 0);
20130   match(Set dst (SubVS dst src));
20131   format %{ "psubw   $dst,$src\t! sub packedS" %}
20132   ins_encode %{
20133     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20134   %}
20135   ins_pipe( pipe_slow );
20136 %}
20137 
20138 
20139 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20140   predicate(UseAVX > 0);
20141   match(Set dst (SubVS src1 src2));
20142   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
20143   ins_encode %{
20144     int vlen_enc = vector_length_encoding(this);
20145     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20146   %}
20147   ins_pipe( pipe_slow );
20148 %}
20149 
20150 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20151   predicate((UseAVX > 0) &&
20152             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20153   match(Set dst (SubVS src (LoadVector mem)));
20154   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
20155   ins_encode %{
20156     int vlen_enc = vector_length_encoding(this);
20157     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20158   %}
20159   ins_pipe( pipe_slow );
20160 %}
20161 
20162 // Integers vector sub
20163 instruct vsubI(vec dst, vec src) %{
20164   predicate(UseAVX == 0);
20165   match(Set dst (SubVI dst src));
20166   format %{ "psubd   $dst,$src\t! sub packedI" %}
20167   ins_encode %{
20168     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20169   %}
20170   ins_pipe( pipe_slow );
20171 %}
20172 
20173 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20174   predicate(UseAVX > 0);
20175   match(Set dst (SubVI src1 src2));
20176   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
20177   ins_encode %{
20178     int vlen_enc = vector_length_encoding(this);
20179     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20180   %}
20181   ins_pipe( pipe_slow );
20182 %}
20183 
20184 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20185   predicate((UseAVX > 0) &&
20186             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20187   match(Set dst (SubVI src (LoadVector mem)));
20188   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
20189   ins_encode %{
20190     int vlen_enc = vector_length_encoding(this);
20191     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20192   %}
20193   ins_pipe( pipe_slow );
20194 %}
20195 
20196 // Longs vector sub
20197 instruct vsubL(vec dst, vec src) %{
20198   predicate(UseAVX == 0);
20199   match(Set dst (SubVL dst src));
20200   format %{ "psubq   $dst,$src\t! sub packedL" %}
20201   ins_encode %{
20202     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20203   %}
20204   ins_pipe( pipe_slow );
20205 %}
20206 
20207 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20208   predicate(UseAVX > 0);
20209   match(Set dst (SubVL src1 src2));
20210   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
20211   ins_encode %{
20212     int vlen_enc = vector_length_encoding(this);
20213     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20214   %}
20215   ins_pipe( pipe_slow );
20216 %}
20217 
20218 
20219 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20220   predicate((UseAVX > 0) &&
20221             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20222   match(Set dst (SubVL src (LoadVector mem)));
20223   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
20224   ins_encode %{
20225     int vlen_enc = vector_length_encoding(this);
20226     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20227   %}
20228   ins_pipe( pipe_slow );
20229 %}
20230 
20231 // Floats vector sub
20232 instruct vsubF(vec dst, vec src) %{
20233   predicate(UseAVX == 0);
20234   match(Set dst (SubVF dst src));
20235   format %{ "subps   $dst,$src\t! sub packedF" %}
20236   ins_encode %{
20237     __ subps($dst$$XMMRegister, $src$$XMMRegister);
20238   %}
20239   ins_pipe( pipe_slow );
20240 %}
20241 
20242 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20243   predicate(UseAVX > 0);
20244   match(Set dst (SubVF src1 src2));
20245   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
20246   ins_encode %{
20247     int vlen_enc = vector_length_encoding(this);
20248     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20249   %}
20250   ins_pipe( pipe_slow );
20251 %}
20252 
20253 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20254   predicate((UseAVX > 0) &&
20255             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20256   match(Set dst (SubVF src (LoadVector mem)));
20257   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
20258   ins_encode %{
20259     int vlen_enc = vector_length_encoding(this);
20260     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20261   %}
20262   ins_pipe( pipe_slow );
20263 %}
20264 
20265 // Doubles vector sub
20266 instruct vsubD(vec dst, vec src) %{
20267   predicate(UseAVX == 0);
20268   match(Set dst (SubVD dst src));
20269   format %{ "subpd   $dst,$src\t! sub packedD" %}
20270   ins_encode %{
20271     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20272   %}
20273   ins_pipe( pipe_slow );
20274 %}
20275 
20276 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20277   predicate(UseAVX > 0);
20278   match(Set dst (SubVD src1 src2));
20279   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
20280   ins_encode %{
20281     int vlen_enc = vector_length_encoding(this);
20282     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20283   %}
20284   ins_pipe( pipe_slow );
20285 %}
20286 
20287 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20288   predicate((UseAVX > 0) &&
20289             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20290   match(Set dst (SubVD src (LoadVector mem)));
20291   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
20292   ins_encode %{
20293     int vlen_enc = vector_length_encoding(this);
20294     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20295   %}
20296   ins_pipe( pipe_slow );
20297 %}
20298 
20299 // --------------------------------- MUL --------------------------------------
20300 
20301 // Byte vector mul
20302 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20303   predicate(Matcher::vector_length_in_bytes(n) <= 8);
20304   match(Set dst (MulVB src1 src2));
20305   effect(TEMP dst, TEMP xtmp);
20306   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20307   ins_encode %{
20308     assert(UseSSE > 3, "required");
20309     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20310     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20311     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20312     __ psllw($dst$$XMMRegister, 8);
20313     __ psrlw($dst$$XMMRegister, 8);
20314     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20315   %}
20316   ins_pipe( pipe_slow );
20317 %}
20318 
20319 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20320   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20321   match(Set dst (MulVB src1 src2));
20322   effect(TEMP dst, TEMP xtmp);
20323   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20324   ins_encode %{
20325     assert(UseSSE > 3, "required");
20326     // Odd-index elements
20327     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20328     __ psrlw($dst$$XMMRegister, 8);
20329     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20330     __ psrlw($xtmp$$XMMRegister, 8);
20331     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20332     __ psllw($dst$$XMMRegister, 8);
20333     // Even-index elements
20334     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20335     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20336     __ psllw($xtmp$$XMMRegister, 8);
20337     __ psrlw($xtmp$$XMMRegister, 8);
20338     // Combine
20339     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20340   %}
20341   ins_pipe( pipe_slow );
20342 %}
20343 
20344 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20345   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20346   match(Set dst (MulVB src1 src2));
20347   effect(TEMP xtmp1, TEMP xtmp2);
20348   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20349   ins_encode %{
20350     int vlen_enc = vector_length_encoding(this);
20351     // Odd-index elements
20352     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20353     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20354     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20355     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20356     // Even-index elements
20357     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20358     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20359     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20360     // Combine
20361     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20362   %}
20363   ins_pipe( pipe_slow );
20364 %}
20365 
20366 // Shorts/Chars vector mul
20367 instruct vmulS(vec dst, vec src) %{
20368   predicate(UseAVX == 0);
20369   match(Set dst (MulVS dst src));
20370   format %{ "pmullw  $dst,$src\t! mul packedS" %}
20371   ins_encode %{
20372     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20373   %}
20374   ins_pipe( pipe_slow );
20375 %}
20376 
20377 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20378   predicate(UseAVX > 0);
20379   match(Set dst (MulVS src1 src2));
20380   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20381   ins_encode %{
20382     int vlen_enc = vector_length_encoding(this);
20383     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20384   %}
20385   ins_pipe( pipe_slow );
20386 %}
20387 
20388 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20389   predicate((UseAVX > 0) &&
20390             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20391   match(Set dst (MulVS src (LoadVector mem)));
20392   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20393   ins_encode %{
20394     int vlen_enc = vector_length_encoding(this);
20395     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20396   %}
20397   ins_pipe( pipe_slow );
20398 %}
20399 
20400 // Integers vector mul
20401 instruct vmulI(vec dst, vec src) %{
20402   predicate(UseAVX == 0);
20403   match(Set dst (MulVI dst src));
20404   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20405   ins_encode %{
20406     assert(UseSSE > 3, "required");
20407     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20408   %}
20409   ins_pipe( pipe_slow );
20410 %}
20411 
20412 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20413   predicate(UseAVX > 0);
20414   match(Set dst (MulVI src1 src2));
20415   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20416   ins_encode %{
20417     int vlen_enc = vector_length_encoding(this);
20418     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20419   %}
20420   ins_pipe( pipe_slow );
20421 %}
20422 
20423 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20424   predicate((UseAVX > 0) &&
20425             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20426   match(Set dst (MulVI src (LoadVector mem)));
20427   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20428   ins_encode %{
20429     int vlen_enc = vector_length_encoding(this);
20430     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20431   %}
20432   ins_pipe( pipe_slow );
20433 %}
20434 
20435 // Longs vector mul
20436 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20437   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20438              VM_Version::supports_avx512dq()) ||
20439             VM_Version::supports_avx512vldq());
20440   match(Set dst (MulVL src1 src2));
20441   ins_cost(500);
20442   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20443   ins_encode %{
20444     assert(UseAVX > 2, "required");
20445     int vlen_enc = vector_length_encoding(this);
20446     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20447   %}
20448   ins_pipe( pipe_slow );
20449 %}
20450 
20451 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20452   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20453              VM_Version::supports_avx512dq()) ||
20454             (Matcher::vector_length_in_bytes(n) > 8 &&
20455              VM_Version::supports_avx512vldq()));
20456   match(Set dst (MulVL src (LoadVector mem)));
20457   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20458   ins_cost(500);
20459   ins_encode %{
20460     assert(UseAVX > 2, "required");
20461     int vlen_enc = vector_length_encoding(this);
20462     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20463   %}
20464   ins_pipe( pipe_slow );
20465 %}
20466 
20467 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20468   predicate(UseAVX == 0);
20469   match(Set dst (MulVL src1 src2));
20470   ins_cost(500);
20471   effect(TEMP dst, TEMP xtmp);
20472   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20473   ins_encode %{
20474     assert(VM_Version::supports_sse4_1(), "required");
20475     // Get the lo-hi products, only the lower 32 bits is in concerns
20476     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20477     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20478     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20479     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20480     __ psllq($dst$$XMMRegister, 32);
20481     // Get the lo-lo products
20482     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20483     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20484     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20485   %}
20486   ins_pipe( pipe_slow );
20487 %}
20488 
20489 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20490   predicate(UseAVX > 0 &&
20491             ((Matcher::vector_length_in_bytes(n) == 64 &&
20492               !VM_Version::supports_avx512dq()) ||
20493              (Matcher::vector_length_in_bytes(n) < 64 &&
20494               !VM_Version::supports_avx512vldq())));
20495   match(Set dst (MulVL src1 src2));
20496   effect(TEMP xtmp1, TEMP xtmp2);
20497   ins_cost(500);
20498   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20499   ins_encode %{
20500     int vlen_enc = vector_length_encoding(this);
20501     // Get the lo-hi products, only the lower 32 bits is in concerns
20502     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20503     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20504     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20505     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20506     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20507     // Get the lo-lo products
20508     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20509     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20510   %}
20511   ins_pipe( pipe_slow );
20512 %}
20513 
20514 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20515   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20516   match(Set dst (MulVL src1 src2));
20517   ins_cost(100);
20518   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20519   ins_encode %{
20520     int vlen_enc = vector_length_encoding(this);
20521     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20522   %}
20523   ins_pipe( pipe_slow );
20524 %}
20525 
20526 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20527   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20528   match(Set dst (MulVL src1 src2));
20529   ins_cost(100);
20530   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20531   ins_encode %{
20532     int vlen_enc = vector_length_encoding(this);
20533     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20534   %}
20535   ins_pipe( pipe_slow );
20536 %}
20537 
20538 // Floats vector mul
20539 instruct vmulF(vec dst, vec src) %{
20540   predicate(UseAVX == 0);
20541   match(Set dst (MulVF dst src));
20542   format %{ "mulps   $dst,$src\t! mul packedF" %}
20543   ins_encode %{
20544     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20545   %}
20546   ins_pipe( pipe_slow );
20547 %}
20548 
20549 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20550   predicate(UseAVX > 0);
20551   match(Set dst (MulVF src1 src2));
20552   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
20553   ins_encode %{
20554     int vlen_enc = vector_length_encoding(this);
20555     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20556   %}
20557   ins_pipe( pipe_slow );
20558 %}
20559 
20560 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20561   predicate((UseAVX > 0) &&
20562             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20563   match(Set dst (MulVF src (LoadVector mem)));
20564   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
20565   ins_encode %{
20566     int vlen_enc = vector_length_encoding(this);
20567     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20568   %}
20569   ins_pipe( pipe_slow );
20570 %}
20571 
20572 // Doubles vector mul
20573 instruct vmulD(vec dst, vec src) %{
20574   predicate(UseAVX == 0);
20575   match(Set dst (MulVD dst src));
20576   format %{ "mulpd   $dst,$src\t! mul packedD" %}
20577   ins_encode %{
20578     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20579   %}
20580   ins_pipe( pipe_slow );
20581 %}
20582 
20583 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20584   predicate(UseAVX > 0);
20585   match(Set dst (MulVD src1 src2));
20586   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
20587   ins_encode %{
20588     int vlen_enc = vector_length_encoding(this);
20589     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20590   %}
20591   ins_pipe( pipe_slow );
20592 %}
20593 
20594 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20595   predicate((UseAVX > 0) &&
20596             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20597   match(Set dst (MulVD src (LoadVector mem)));
20598   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
20599   ins_encode %{
20600     int vlen_enc = vector_length_encoding(this);
20601     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20602   %}
20603   ins_pipe( pipe_slow );
20604 %}
20605 
20606 // --------------------------------- DIV --------------------------------------
20607 
20608 // Floats vector div
20609 instruct vdivF(vec dst, vec src) %{
20610   predicate(UseAVX == 0);
20611   match(Set dst (DivVF dst src));
20612   format %{ "divps   $dst,$src\t! div packedF" %}
20613   ins_encode %{
20614     __ divps($dst$$XMMRegister, $src$$XMMRegister);
20615   %}
20616   ins_pipe( pipe_slow );
20617 %}
20618 
20619 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20620   predicate(UseAVX > 0);
20621   match(Set dst (DivVF src1 src2));
20622   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
20623   ins_encode %{
20624     int vlen_enc = vector_length_encoding(this);
20625     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20626   %}
20627   ins_pipe( pipe_slow );
20628 %}
20629 
20630 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20631   predicate((UseAVX > 0) &&
20632             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20633   match(Set dst (DivVF src (LoadVector mem)));
20634   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
20635   ins_encode %{
20636     int vlen_enc = vector_length_encoding(this);
20637     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20638   %}
20639   ins_pipe( pipe_slow );
20640 %}
20641 
20642 // Doubles vector div
20643 instruct vdivD(vec dst, vec src) %{
20644   predicate(UseAVX == 0);
20645   match(Set dst (DivVD dst src));
20646   format %{ "divpd   $dst,$src\t! div packedD" %}
20647   ins_encode %{
20648     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20649   %}
20650   ins_pipe( pipe_slow );
20651 %}
20652 
20653 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20654   predicate(UseAVX > 0);
20655   match(Set dst (DivVD src1 src2));
20656   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
20657   ins_encode %{
20658     int vlen_enc = vector_length_encoding(this);
20659     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20660   %}
20661   ins_pipe( pipe_slow );
20662 %}
20663 
20664 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20665   predicate((UseAVX > 0) &&
20666             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20667   match(Set dst (DivVD src (LoadVector mem)));
20668   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
20669   ins_encode %{
20670     int vlen_enc = vector_length_encoding(this);
20671     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20672   %}
20673   ins_pipe( pipe_slow );
20674 %}
20675 
20676 // ------------------------------ MinMax ---------------------------------------
20677 
20678 // Byte, Short, Int vector Min/Max
20679 instruct minmax_reg_sse(vec dst, vec src) %{
20680   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20681             UseAVX == 0);
20682   match(Set dst (MinV dst src));
20683   match(Set dst (MaxV dst src));
20684   format %{ "vector_minmax  $dst,$src\t!  " %}
20685   ins_encode %{
20686     assert(UseSSE >= 4, "required");
20687 
20688     int opcode = this->ideal_Opcode();
20689     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20690     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20691   %}
20692   ins_pipe( pipe_slow );
20693 %}
20694 
20695 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20696   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20697             UseAVX > 0);
20698   match(Set dst (MinV src1 src2));
20699   match(Set dst (MaxV src1 src2));
20700   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
20701   ins_encode %{
20702     int opcode = this->ideal_Opcode();
20703     int vlen_enc = vector_length_encoding(this);
20704     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20705 
20706     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20707   %}
20708   ins_pipe( pipe_slow );
20709 %}
20710 
20711 // Long vector Min/Max
20712 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20713   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20714             UseAVX == 0);
20715   match(Set dst (MinV dst src));
20716   match(Set dst (MaxV src dst));
20717   effect(TEMP dst, TEMP tmp);
20718   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
20719   ins_encode %{
20720     assert(UseSSE >= 4, "required");
20721 
20722     int opcode = this->ideal_Opcode();
20723     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20724     assert(elem_bt == T_LONG, "sanity");
20725 
20726     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20727   %}
20728   ins_pipe( pipe_slow );
20729 %}
20730 
20731 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20732   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20733             UseAVX > 0 && !VM_Version::supports_avx512vl());
20734   match(Set dst (MinV src1 src2));
20735   match(Set dst (MaxV src1 src2));
20736   effect(TEMP dst);
20737   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
20738   ins_encode %{
20739     int vlen_enc = vector_length_encoding(this);
20740     int opcode = this->ideal_Opcode();
20741     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20742     assert(elem_bt == T_LONG, "sanity");
20743 
20744     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20745   %}
20746   ins_pipe( pipe_slow );
20747 %}
20748 
20749 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20750   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20751             Matcher::vector_element_basic_type(n) == T_LONG);
20752   match(Set dst (MinV src1 src2));
20753   match(Set dst (MaxV src1 src2));
20754   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
20755   ins_encode %{
20756     assert(UseAVX > 2, "required");
20757 
20758     int vlen_enc = vector_length_encoding(this);
20759     int opcode = this->ideal_Opcode();
20760     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20761     assert(elem_bt == T_LONG, "sanity");
20762 
20763     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20764   %}
20765   ins_pipe( pipe_slow );
20766 %}
20767 
20768 // Float/Double vector Min/Max
20769 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20770   predicate(VM_Version::supports_avx10_2() &&
20771             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20772   match(Set dst (MinV a b));
20773   match(Set dst (MaxV a b));
20774   format %{ "vector_minmaxFP  $dst, $a, $b" %}
20775   ins_encode %{
20776     int vlen_enc = vector_length_encoding(this);
20777     int opcode = this->ideal_Opcode();
20778     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20779     __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20780   %}
20781   ins_pipe( pipe_slow );
20782 %}
20783 
20784 // Float/Double vector Min/Max
20785 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20786   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20787             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20788             UseAVX > 0);
20789   match(Set dst (MinV a b));
20790   match(Set dst (MaxV a b));
20791   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20792   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20793   ins_encode %{
20794     assert(UseAVX > 0, "required");
20795 
20796     int opcode = this->ideal_Opcode();
20797     int vlen_enc = vector_length_encoding(this);
20798     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20799 
20800     __ vminmax_fp(opcode, elem_bt,
20801                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20802                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20803   %}
20804   ins_pipe( pipe_slow );
20805 %}
20806 
20807 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20808   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20809             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20810   match(Set dst (MinV a b));
20811   match(Set dst (MaxV a b));
20812   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20813   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20814   ins_encode %{
20815     assert(UseAVX > 2, "required");
20816 
20817     int opcode = this->ideal_Opcode();
20818     int vlen_enc = vector_length_encoding(this);
20819     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20820 
20821     __ evminmax_fp(opcode, elem_bt,
20822                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20823                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20824   %}
20825   ins_pipe( pipe_slow );
20826 %}
20827 
20828 // ------------------------------ Unsigned vector Min/Max ----------------------
20829 
20830 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
20831   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20832   match(Set dst (UMinV a b));
20833   match(Set dst (UMaxV a b));
20834   format %{ "vector_uminmax $dst,$a,$b\t!" %}
20835   ins_encode %{
20836     int opcode = this->ideal_Opcode();
20837     int vlen_enc = vector_length_encoding(this);
20838     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20839     assert(is_integral_type(elem_bt), "");
20840     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20841   %}
20842   ins_pipe( pipe_slow );
20843 %}
20844 
20845 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
20846   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20847   match(Set dst (UMinV a (LoadVector b)));
20848   match(Set dst (UMaxV a (LoadVector b)));
20849   format %{ "vector_uminmax $dst,$a,$b\t!" %}
20850   ins_encode %{
20851     int opcode = this->ideal_Opcode();
20852     int vlen_enc = vector_length_encoding(this);
20853     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20854     assert(is_integral_type(elem_bt), "");
20855     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
20856   %}
20857   ins_pipe( pipe_slow );
20858 %}
20859 
20860 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
20861   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
20862   match(Set dst (UMinV a b));
20863   match(Set dst (UMaxV a b));
20864   effect(TEMP xtmp1, TEMP xtmp2);
20865   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
20866   ins_encode %{
20867     int opcode = this->ideal_Opcode();
20868     int vlen_enc = vector_length_encoding(this);
20869     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20870   %}
20871   ins_pipe( pipe_slow );
20872 %}
20873 
20874 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
20875   match(Set dst (UMinV (Binary dst src2) mask));
20876   match(Set dst (UMaxV (Binary dst src2) mask));
20877   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20878   ins_encode %{
20879     int vlen_enc = vector_length_encoding(this);
20880     BasicType bt = Matcher::vector_element_basic_type(this);
20881     int opc = this->ideal_Opcode();
20882     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20883                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
20884   %}
20885   ins_pipe( pipe_slow );
20886 %}
20887 
20888 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
20889   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
20890   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
20891   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20892   ins_encode %{
20893     int vlen_enc = vector_length_encoding(this);
20894     BasicType bt = Matcher::vector_element_basic_type(this);
20895     int opc = this->ideal_Opcode();
20896     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20897                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
20898   %}
20899   ins_pipe( pipe_slow );
20900 %}
20901 
20902 // --------------------------------- Signum/CopySign ---------------------------
20903 
20904 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
20905   match(Set dst (SignumF dst (Binary zero one)));
20906   effect(KILL cr);
20907   format %{ "signumF $dst, $dst" %}
20908   ins_encode %{
20909     int opcode = this->ideal_Opcode();
20910     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20911   %}
20912   ins_pipe( pipe_slow );
20913 %}
20914 
20915 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
20916   match(Set dst (SignumD dst (Binary zero one)));
20917   effect(KILL cr);
20918   format %{ "signumD $dst, $dst" %}
20919   ins_encode %{
20920     int opcode = this->ideal_Opcode();
20921     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20922   %}
20923   ins_pipe( pipe_slow );
20924 %}
20925 
20926 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
20927   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
20928   match(Set dst (SignumVF src (Binary zero one)));
20929   match(Set dst (SignumVD src (Binary zero one)));
20930   effect(TEMP dst, TEMP xtmp1);
20931   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
20932   ins_encode %{
20933     int opcode = this->ideal_Opcode();
20934     int vec_enc = vector_length_encoding(this);
20935     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20936                          $xtmp1$$XMMRegister, vec_enc);
20937   %}
20938   ins_pipe( pipe_slow );
20939 %}
20940 
20941 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
20942   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
20943   match(Set dst (SignumVF src (Binary zero one)));
20944   match(Set dst (SignumVD src (Binary zero one)));
20945   effect(TEMP dst, TEMP ktmp1);
20946   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
20947   ins_encode %{
20948     int opcode = this->ideal_Opcode();
20949     int vec_enc = vector_length_encoding(this);
20950     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20951                           $ktmp1$$KRegister, vec_enc);
20952   %}
20953   ins_pipe( pipe_slow );
20954 %}
20955 
20956 // ---------------------------------------
20957 // For copySign use 0xE4 as writemask for vpternlog
20958 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
20959 // C (xmm2) is set to 0x7FFFFFFF
20960 // Wherever xmm2 is 0, we want to pick from B (sign)
20961 // Wherever xmm2 is 1, we want to pick from A (src)
20962 //
20963 // A B C Result
20964 // 0 0 0 0
20965 // 0 0 1 0
20966 // 0 1 0 1
20967 // 0 1 1 0
20968 // 1 0 0 0
20969 // 1 0 1 1
20970 // 1 1 0 1
20971 // 1 1 1 1
20972 //
20973 // Result going from high bit to low bit is 0x11100100 = 0xe4
20974 // ---------------------------------------
20975 
20976 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
20977   match(Set dst (CopySignF dst src));
20978   effect(TEMP tmp1, TEMP tmp2);
20979   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20980   ins_encode %{
20981     __ movl($tmp2$$Register, 0x7FFFFFFF);
20982     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
20983     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20984   %}
20985   ins_pipe( pipe_slow );
20986 %}
20987 
20988 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
20989   match(Set dst (CopySignD dst (Binary src zero)));
20990   ins_cost(100);
20991   effect(TEMP tmp1, TEMP tmp2);
20992   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20993   ins_encode %{
20994     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
20995     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
20996     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20997   %}
20998   ins_pipe( pipe_slow );
20999 %}
21000 
21001 //----------------------------- CompressBits/ExpandBits ------------------------
21002 
21003 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21004   predicate(n->bottom_type()->isa_int());
21005   match(Set dst (CompressBits src mask));
21006   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21007   ins_encode %{
21008     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21009   %}
21010   ins_pipe( pipe_slow );
21011 %}
21012 
21013 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21014   predicate(n->bottom_type()->isa_int());
21015   match(Set dst (ExpandBits src mask));
21016   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21017   ins_encode %{
21018     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21019   %}
21020   ins_pipe( pipe_slow );
21021 %}
21022 
21023 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21024   predicate(n->bottom_type()->isa_int());
21025   match(Set dst (CompressBits src (LoadI mask)));
21026   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21027   ins_encode %{
21028     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21029   %}
21030   ins_pipe( pipe_slow );
21031 %}
21032 
21033 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21034   predicate(n->bottom_type()->isa_int());
21035   match(Set dst (ExpandBits src (LoadI mask)));
21036   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21037   ins_encode %{
21038     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21039   %}
21040   ins_pipe( pipe_slow );
21041 %}
21042 
21043 // --------------------------------- Sqrt --------------------------------------
21044 
21045 instruct vsqrtF_reg(vec dst, vec src) %{
21046   match(Set dst (SqrtVF src));
21047   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
21048   ins_encode %{
21049     assert(UseAVX > 0, "required");
21050     int vlen_enc = vector_length_encoding(this);
21051     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21052   %}
21053   ins_pipe( pipe_slow );
21054 %}
21055 
21056 instruct vsqrtF_mem(vec dst, memory mem) %{
21057   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21058   match(Set dst (SqrtVF (LoadVector mem)));
21059   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
21060   ins_encode %{
21061     assert(UseAVX > 0, "required");
21062     int vlen_enc = vector_length_encoding(this);
21063     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21064   %}
21065   ins_pipe( pipe_slow );
21066 %}
21067 
21068 // Floating point vector sqrt
21069 instruct vsqrtD_reg(vec dst, vec src) %{
21070   match(Set dst (SqrtVD src));
21071   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
21072   ins_encode %{
21073     assert(UseAVX > 0, "required");
21074     int vlen_enc = vector_length_encoding(this);
21075     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21076   %}
21077   ins_pipe( pipe_slow );
21078 %}
21079 
21080 instruct vsqrtD_mem(vec dst, memory mem) %{
21081   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21082   match(Set dst (SqrtVD (LoadVector mem)));
21083   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
21084   ins_encode %{
21085     assert(UseAVX > 0, "required");
21086     int vlen_enc = vector_length_encoding(this);
21087     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21088   %}
21089   ins_pipe( pipe_slow );
21090 %}
21091 
21092 // ------------------------------ Shift ---------------------------------------
21093 
21094 // Left and right shift count vectors are the same on x86
21095 // (only lowest bits of xmm reg are used for count).
21096 instruct vshiftcnt(vec dst, rRegI cnt) %{
21097   match(Set dst (LShiftCntV cnt));
21098   match(Set dst (RShiftCntV cnt));
21099   format %{ "movdl    $dst,$cnt\t! load shift count" %}
21100   ins_encode %{
21101     __ movdl($dst$$XMMRegister, $cnt$$Register);
21102   %}
21103   ins_pipe( pipe_slow );
21104 %}
21105 
21106 // Byte vector shift
21107 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21108   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21109   match(Set dst ( LShiftVB src shift));
21110   match(Set dst ( RShiftVB src shift));
21111   match(Set dst (URShiftVB src shift));
21112   effect(TEMP dst, USE src, USE shift, TEMP tmp);
21113   format %{"vector_byte_shift $dst,$src,$shift" %}
21114   ins_encode %{
21115     assert(UseSSE > 3, "required");
21116     int opcode = this->ideal_Opcode();
21117     bool sign = (opcode != Op_URShiftVB);
21118     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21119     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21120     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21121     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21122     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21123   %}
21124   ins_pipe( pipe_slow );
21125 %}
21126 
21127 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21128   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21129             UseAVX <= 1);
21130   match(Set dst ( LShiftVB src shift));
21131   match(Set dst ( RShiftVB src shift));
21132   match(Set dst (URShiftVB src shift));
21133   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21134   format %{"vector_byte_shift $dst,$src,$shift" %}
21135   ins_encode %{
21136     assert(UseSSE > 3, "required");
21137     int opcode = this->ideal_Opcode();
21138     bool sign = (opcode != Op_URShiftVB);
21139     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21140     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21141     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21142     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21143     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21144     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21145     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21146     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21147     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21148   %}
21149   ins_pipe( pipe_slow );
21150 %}
21151 
21152 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21153   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21154             UseAVX > 1);
21155   match(Set dst ( LShiftVB src shift));
21156   match(Set dst ( RShiftVB src shift));
21157   match(Set dst (URShiftVB src shift));
21158   effect(TEMP dst, TEMP tmp);
21159   format %{"vector_byte_shift $dst,$src,$shift" %}
21160   ins_encode %{
21161     int opcode = this->ideal_Opcode();
21162     bool sign = (opcode != Op_URShiftVB);
21163     int vlen_enc = Assembler::AVX_256bit;
21164     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21165     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21166     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21167     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21168     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21169   %}
21170   ins_pipe( pipe_slow );
21171 %}
21172 
21173 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21174   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21175   match(Set dst ( LShiftVB src shift));
21176   match(Set dst ( RShiftVB src shift));
21177   match(Set dst (URShiftVB src shift));
21178   effect(TEMP dst, TEMP tmp);
21179   format %{"vector_byte_shift $dst,$src,$shift" %}
21180   ins_encode %{
21181     assert(UseAVX > 1, "required");
21182     int opcode = this->ideal_Opcode();
21183     bool sign = (opcode != Op_URShiftVB);
21184     int vlen_enc = Assembler::AVX_256bit;
21185     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21186     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21187     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21188     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21189     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21190     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21191     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21192     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21193     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21194   %}
21195   ins_pipe( pipe_slow );
21196 %}
21197 
21198 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21199   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21200   match(Set dst ( LShiftVB src shift));
21201   match(Set dst  (RShiftVB src shift));
21202   match(Set dst (URShiftVB src shift));
21203   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21204   format %{"vector_byte_shift $dst,$src,$shift" %}
21205   ins_encode %{
21206     assert(UseAVX > 2, "required");
21207     int opcode = this->ideal_Opcode();
21208     bool sign = (opcode != Op_URShiftVB);
21209     int vlen_enc = Assembler::AVX_512bit;
21210     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21211     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21212     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21213     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21214     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21215     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21216     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21217     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21218     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21219     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21220     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21221     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21222   %}
21223   ins_pipe( pipe_slow );
21224 %}
21225 
21226 // Shorts vector logical right shift produces incorrect Java result
21227 // for negative data because java code convert short value into int with
21228 // sign extension before a shift. But char vectors are fine since chars are
21229 // unsigned values.
21230 // Shorts/Chars vector left shift
21231 instruct vshiftS(vec dst, vec src, vec shift) %{
21232   predicate(!n->as_ShiftV()->is_var_shift());
21233   match(Set dst ( LShiftVS src shift));
21234   match(Set dst ( RShiftVS src shift));
21235   match(Set dst (URShiftVS src shift));
21236   effect(TEMP dst, USE src, USE shift);
21237   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
21238   ins_encode %{
21239     int opcode = this->ideal_Opcode();
21240     if (UseAVX > 0) {
21241       int vlen_enc = vector_length_encoding(this);
21242       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21243     } else {
21244       int vlen = Matcher::vector_length(this);
21245       if (vlen == 2) {
21246         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21247         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21248       } else if (vlen == 4) {
21249         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21250         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21251       } else {
21252         assert (vlen == 8, "sanity");
21253         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21254         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21255       }
21256     }
21257   %}
21258   ins_pipe( pipe_slow );
21259 %}
21260 
21261 // Integers vector left shift
21262 instruct vshiftI(vec dst, vec src, vec shift) %{
21263   predicate(!n->as_ShiftV()->is_var_shift());
21264   match(Set dst ( LShiftVI src shift));
21265   match(Set dst ( RShiftVI src shift));
21266   match(Set dst (URShiftVI src shift));
21267   effect(TEMP dst, USE src, USE shift);
21268   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
21269   ins_encode %{
21270     int opcode = this->ideal_Opcode();
21271     if (UseAVX > 0) {
21272       int vlen_enc = vector_length_encoding(this);
21273       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21274     } else {
21275       int vlen = Matcher::vector_length(this);
21276       if (vlen == 2) {
21277         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21278         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21279       } else {
21280         assert(vlen == 4, "sanity");
21281         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21282         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21283       }
21284     }
21285   %}
21286   ins_pipe( pipe_slow );
21287 %}
21288 
21289 // Integers vector left constant shift
21290 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21291   match(Set dst (LShiftVI src (LShiftCntV shift)));
21292   match(Set dst (RShiftVI src (RShiftCntV shift)));
21293   match(Set dst (URShiftVI src (RShiftCntV shift)));
21294   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
21295   ins_encode %{
21296     int opcode = this->ideal_Opcode();
21297     if (UseAVX > 0) {
21298       int vector_len = vector_length_encoding(this);
21299       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21300     } else {
21301       int vlen = Matcher::vector_length(this);
21302       if (vlen == 2) {
21303         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21304         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21305       } else {
21306         assert(vlen == 4, "sanity");
21307         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21308         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21309       }
21310     }
21311   %}
21312   ins_pipe( pipe_slow );
21313 %}
21314 
21315 // Longs vector shift
21316 instruct vshiftL(vec dst, vec src, vec shift) %{
21317   predicate(!n->as_ShiftV()->is_var_shift());
21318   match(Set dst ( LShiftVL src shift));
21319   match(Set dst (URShiftVL src shift));
21320   effect(TEMP dst, USE src, USE shift);
21321   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
21322   ins_encode %{
21323     int opcode = this->ideal_Opcode();
21324     if (UseAVX > 0) {
21325       int vlen_enc = vector_length_encoding(this);
21326       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21327     } else {
21328       assert(Matcher::vector_length(this) == 2, "");
21329       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21330       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21331     }
21332   %}
21333   ins_pipe( pipe_slow );
21334 %}
21335 
21336 // Longs vector constant shift
21337 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21338   match(Set dst (LShiftVL src (LShiftCntV shift)));
21339   match(Set dst (URShiftVL src (RShiftCntV shift)));
21340   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
21341   ins_encode %{
21342     int opcode = this->ideal_Opcode();
21343     if (UseAVX > 0) {
21344       int vector_len = vector_length_encoding(this);
21345       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21346     } else {
21347       assert(Matcher::vector_length(this) == 2, "");
21348       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21349       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21350     }
21351   %}
21352   ins_pipe( pipe_slow );
21353 %}
21354 
21355 // -------------------ArithmeticRightShift -----------------------------------
21356 // Long vector arithmetic right shift
21357 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21358   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21359   match(Set dst (RShiftVL src shift));
21360   effect(TEMP dst, TEMP tmp);
21361   format %{ "vshiftq $dst,$src,$shift" %}
21362   ins_encode %{
21363     uint vlen = Matcher::vector_length(this);
21364     if (vlen == 2) {
21365       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21366       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21367       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21368       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21369       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21370       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21371     } else {
21372       assert(vlen == 4, "sanity");
21373       assert(UseAVX > 1, "required");
21374       int vlen_enc = Assembler::AVX_256bit;
21375       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21376       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21377       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21378       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21379       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21380     }
21381   %}
21382   ins_pipe( pipe_slow );
21383 %}
21384 
21385 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21386   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21387   match(Set dst (RShiftVL src shift));
21388   format %{ "vshiftq $dst,$src,$shift" %}
21389   ins_encode %{
21390     int vlen_enc = vector_length_encoding(this);
21391     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21392   %}
21393   ins_pipe( pipe_slow );
21394 %}
21395 
21396 // ------------------- Variable Shift -----------------------------
21397 // Byte variable shift
21398 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21399   predicate(Matcher::vector_length(n) <= 8 &&
21400             n->as_ShiftV()->is_var_shift() &&
21401             !VM_Version::supports_avx512bw());
21402   match(Set dst ( LShiftVB src shift));
21403   match(Set dst ( RShiftVB src shift));
21404   match(Set dst (URShiftVB src shift));
21405   effect(TEMP dst, TEMP vtmp);
21406   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21407   ins_encode %{
21408     assert(UseAVX >= 2, "required");
21409 
21410     int opcode = this->ideal_Opcode();
21411     int vlen_enc = Assembler::AVX_128bit;
21412     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21413     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21414   %}
21415   ins_pipe( pipe_slow );
21416 %}
21417 
21418 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21419   predicate(Matcher::vector_length(n) == 16 &&
21420             n->as_ShiftV()->is_var_shift() &&
21421             !VM_Version::supports_avx512bw());
21422   match(Set dst ( LShiftVB src shift));
21423   match(Set dst ( RShiftVB src shift));
21424   match(Set dst (URShiftVB src shift));
21425   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21426   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21427   ins_encode %{
21428     assert(UseAVX >= 2, "required");
21429 
21430     int opcode = this->ideal_Opcode();
21431     int vlen_enc = Assembler::AVX_128bit;
21432     // Shift lower half and get word result in dst
21433     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21434 
21435     // Shift upper half and get word result in vtmp1
21436     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21437     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21438     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21439 
21440     // Merge and down convert the two word results to byte in dst
21441     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21442   %}
21443   ins_pipe( pipe_slow );
21444 %}
21445 
21446 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21447   predicate(Matcher::vector_length(n) == 32 &&
21448             n->as_ShiftV()->is_var_shift() &&
21449             !VM_Version::supports_avx512bw());
21450   match(Set dst ( LShiftVB src shift));
21451   match(Set dst ( RShiftVB src shift));
21452   match(Set dst (URShiftVB src shift));
21453   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21454   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21455   ins_encode %{
21456     assert(UseAVX >= 2, "required");
21457 
21458     int opcode = this->ideal_Opcode();
21459     int vlen_enc = Assembler::AVX_128bit;
21460     // Process lower 128 bits and get result in dst
21461     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21462     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21463     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21464     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21465     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21466 
21467     // Process higher 128 bits and get result in vtmp3
21468     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21469     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21470     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21471     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21472     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21473     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21474     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21475 
21476     // Merge the two results in dst
21477     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21478   %}
21479   ins_pipe( pipe_slow );
21480 %}
21481 
21482 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21483   predicate(Matcher::vector_length(n) <= 32 &&
21484             n->as_ShiftV()->is_var_shift() &&
21485             VM_Version::supports_avx512bw());
21486   match(Set dst ( LShiftVB src shift));
21487   match(Set dst ( RShiftVB src shift));
21488   match(Set dst (URShiftVB src shift));
21489   effect(TEMP dst, TEMP vtmp);
21490   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21491   ins_encode %{
21492     assert(UseAVX > 2, "required");
21493 
21494     int opcode = this->ideal_Opcode();
21495     int vlen_enc = vector_length_encoding(this);
21496     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21497   %}
21498   ins_pipe( pipe_slow );
21499 %}
21500 
21501 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21502   predicate(Matcher::vector_length(n) == 64 &&
21503             n->as_ShiftV()->is_var_shift() &&
21504             VM_Version::supports_avx512bw());
21505   match(Set dst ( LShiftVB src shift));
21506   match(Set dst ( RShiftVB src shift));
21507   match(Set dst (URShiftVB src shift));
21508   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21509   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21510   ins_encode %{
21511     assert(UseAVX > 2, "required");
21512 
21513     int opcode = this->ideal_Opcode();
21514     int vlen_enc = Assembler::AVX_256bit;
21515     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21516     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21517     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21518     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21519     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21520   %}
21521   ins_pipe( pipe_slow );
21522 %}
21523 
21524 // Short variable shift
21525 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21526   predicate(Matcher::vector_length(n) <= 8 &&
21527             n->as_ShiftV()->is_var_shift() &&
21528             !VM_Version::supports_avx512bw());
21529   match(Set dst ( LShiftVS src shift));
21530   match(Set dst ( RShiftVS src shift));
21531   match(Set dst (URShiftVS src shift));
21532   effect(TEMP dst, TEMP vtmp);
21533   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21534   ins_encode %{
21535     assert(UseAVX >= 2, "required");
21536 
21537     int opcode = this->ideal_Opcode();
21538     bool sign = (opcode != Op_URShiftVS);
21539     int vlen_enc = Assembler::AVX_256bit;
21540     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21541     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21542     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21543     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21544     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21545     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21546   %}
21547   ins_pipe( pipe_slow );
21548 %}
21549 
21550 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21551   predicate(Matcher::vector_length(n) == 16 &&
21552             n->as_ShiftV()->is_var_shift() &&
21553             !VM_Version::supports_avx512bw());
21554   match(Set dst ( LShiftVS src shift));
21555   match(Set dst ( RShiftVS src shift));
21556   match(Set dst (URShiftVS src shift));
21557   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21558   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21559   ins_encode %{
21560     assert(UseAVX >= 2, "required");
21561 
21562     int opcode = this->ideal_Opcode();
21563     bool sign = (opcode != Op_URShiftVS);
21564     int vlen_enc = Assembler::AVX_256bit;
21565     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21566     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21567     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21568     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21569     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21570 
21571     // Shift upper half, with result in dst using vtmp1 as TEMP
21572     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21573     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21574     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21575     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21576     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21577     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21578 
21579     // Merge lower and upper half result into dst
21580     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21581     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21582   %}
21583   ins_pipe( pipe_slow );
21584 %}
21585 
21586 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21587   predicate(n->as_ShiftV()->is_var_shift() &&
21588             VM_Version::supports_avx512bw());
21589   match(Set dst ( LShiftVS src shift));
21590   match(Set dst ( RShiftVS src shift));
21591   match(Set dst (URShiftVS src shift));
21592   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21593   ins_encode %{
21594     assert(UseAVX > 2, "required");
21595 
21596     int opcode = this->ideal_Opcode();
21597     int vlen_enc = vector_length_encoding(this);
21598     if (!VM_Version::supports_avx512vl()) {
21599       vlen_enc = Assembler::AVX_512bit;
21600     }
21601     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21602   %}
21603   ins_pipe( pipe_slow );
21604 %}
21605 
21606 //Integer variable shift
21607 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21608   predicate(n->as_ShiftV()->is_var_shift());
21609   match(Set dst ( LShiftVI src shift));
21610   match(Set dst ( RShiftVI src shift));
21611   match(Set dst (URShiftVI src shift));
21612   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21613   ins_encode %{
21614     assert(UseAVX >= 2, "required");
21615 
21616     int opcode = this->ideal_Opcode();
21617     int vlen_enc = vector_length_encoding(this);
21618     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21619   %}
21620   ins_pipe( pipe_slow );
21621 %}
21622 
21623 //Long variable shift
21624 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21625   predicate(n->as_ShiftV()->is_var_shift());
21626   match(Set dst ( LShiftVL src shift));
21627   match(Set dst (URShiftVL src shift));
21628   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21629   ins_encode %{
21630     assert(UseAVX >= 2, "required");
21631 
21632     int opcode = this->ideal_Opcode();
21633     int vlen_enc = vector_length_encoding(this);
21634     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21635   %}
21636   ins_pipe( pipe_slow );
21637 %}
21638 
21639 //Long variable right shift arithmetic
21640 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21641   predicate(Matcher::vector_length(n) <= 4 &&
21642             n->as_ShiftV()->is_var_shift() &&
21643             UseAVX == 2);
21644   match(Set dst (RShiftVL src shift));
21645   effect(TEMP dst, TEMP vtmp);
21646   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21647   ins_encode %{
21648     int opcode = this->ideal_Opcode();
21649     int vlen_enc = vector_length_encoding(this);
21650     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21651                  $vtmp$$XMMRegister);
21652   %}
21653   ins_pipe( pipe_slow );
21654 %}
21655 
21656 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21657   predicate(n->as_ShiftV()->is_var_shift() &&
21658             UseAVX > 2);
21659   match(Set dst (RShiftVL src shift));
21660   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21661   ins_encode %{
21662     int opcode = this->ideal_Opcode();
21663     int vlen_enc = vector_length_encoding(this);
21664     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21665   %}
21666   ins_pipe( pipe_slow );
21667 %}
21668 
21669 // --------------------------------- AND --------------------------------------
21670 
21671 instruct vand(vec dst, vec src) %{
21672   predicate(UseAVX == 0);
21673   match(Set dst (AndV dst src));
21674   format %{ "pand    $dst,$src\t! and vectors" %}
21675   ins_encode %{
21676     __ pand($dst$$XMMRegister, $src$$XMMRegister);
21677   %}
21678   ins_pipe( pipe_slow );
21679 %}
21680 
21681 instruct vand_reg(vec dst, vec src1, vec src2) %{
21682   predicate(UseAVX > 0);
21683   match(Set dst (AndV src1 src2));
21684   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
21685   ins_encode %{
21686     int vlen_enc = vector_length_encoding(this);
21687     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21688   %}
21689   ins_pipe( pipe_slow );
21690 %}
21691 
21692 instruct vand_mem(vec dst, vec src, memory mem) %{
21693   predicate((UseAVX > 0) &&
21694             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21695   match(Set dst (AndV src (LoadVector mem)));
21696   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
21697   ins_encode %{
21698     int vlen_enc = vector_length_encoding(this);
21699     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21700   %}
21701   ins_pipe( pipe_slow );
21702 %}
21703 
21704 // --------------------------------- OR ---------------------------------------
21705 
21706 instruct vor(vec dst, vec src) %{
21707   predicate(UseAVX == 0);
21708   match(Set dst (OrV dst src));
21709   format %{ "por     $dst,$src\t! or vectors" %}
21710   ins_encode %{
21711     __ por($dst$$XMMRegister, $src$$XMMRegister);
21712   %}
21713   ins_pipe( pipe_slow );
21714 %}
21715 
21716 instruct vor_reg(vec dst, vec src1, vec src2) %{
21717   predicate(UseAVX > 0);
21718   match(Set dst (OrV src1 src2));
21719   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
21720   ins_encode %{
21721     int vlen_enc = vector_length_encoding(this);
21722     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21723   %}
21724   ins_pipe( pipe_slow );
21725 %}
21726 
21727 instruct vor_mem(vec dst, vec src, memory mem) %{
21728   predicate((UseAVX > 0) &&
21729             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21730   match(Set dst (OrV src (LoadVector mem)));
21731   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
21732   ins_encode %{
21733     int vlen_enc = vector_length_encoding(this);
21734     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21735   %}
21736   ins_pipe( pipe_slow );
21737 %}
21738 
21739 // --------------------------------- XOR --------------------------------------
21740 
21741 instruct vxor(vec dst, vec src) %{
21742   predicate(UseAVX == 0);
21743   match(Set dst (XorV dst src));
21744   format %{ "pxor    $dst,$src\t! xor vectors" %}
21745   ins_encode %{
21746     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21747   %}
21748   ins_pipe( pipe_slow );
21749 %}
21750 
21751 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21752   predicate(UseAVX > 0);
21753   match(Set dst (XorV src1 src2));
21754   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
21755   ins_encode %{
21756     int vlen_enc = vector_length_encoding(this);
21757     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21758   %}
21759   ins_pipe( pipe_slow );
21760 %}
21761 
21762 instruct vxor_mem(vec dst, vec src, memory mem) %{
21763   predicate((UseAVX > 0) &&
21764             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21765   match(Set dst (XorV src (LoadVector mem)));
21766   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
21767   ins_encode %{
21768     int vlen_enc = vector_length_encoding(this);
21769     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21770   %}
21771   ins_pipe( pipe_slow );
21772 %}
21773 
21774 // --------------------------------- VectorCast --------------------------------------
21775 
21776 instruct vcastBtoX(vec dst, vec src) %{
21777   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21778   match(Set dst (VectorCastB2X src));
21779   format %{ "vector_cast_b2x $dst,$src\t!" %}
21780   ins_encode %{
21781     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21782     int vlen_enc = vector_length_encoding(this);
21783     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21784   %}
21785   ins_pipe( pipe_slow );
21786 %}
21787 
21788 instruct vcastBtoD(legVec dst, legVec src) %{
21789   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21790   match(Set dst (VectorCastB2X src));
21791   format %{ "vector_cast_b2x $dst,$src\t!" %}
21792   ins_encode %{
21793     int vlen_enc = vector_length_encoding(this);
21794     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21795   %}
21796   ins_pipe( pipe_slow );
21797 %}
21798 
21799 instruct castStoX(vec dst, vec src) %{
21800   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21801             Matcher::vector_length(n->in(1)) <= 8 && // src
21802             Matcher::vector_element_basic_type(n) == T_BYTE);
21803   match(Set dst (VectorCastS2X src));
21804   format %{ "vector_cast_s2x $dst,$src" %}
21805   ins_encode %{
21806     assert(UseAVX > 0, "required");
21807 
21808     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21809     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21810   %}
21811   ins_pipe( pipe_slow );
21812 %}
21813 
21814 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21815   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21816             Matcher::vector_length(n->in(1)) == 16 && // src
21817             Matcher::vector_element_basic_type(n) == T_BYTE);
21818   effect(TEMP dst, TEMP vtmp);
21819   match(Set dst (VectorCastS2X src));
21820   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21821   ins_encode %{
21822     assert(UseAVX > 0, "required");
21823 
21824     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
21825     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21826     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
21827     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21828   %}
21829   ins_pipe( pipe_slow );
21830 %}
21831 
21832 instruct vcastStoX_evex(vec dst, vec src) %{
21833   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
21834             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21835   match(Set dst (VectorCastS2X src));
21836   format %{ "vector_cast_s2x $dst,$src\t!" %}
21837   ins_encode %{
21838     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21839     int src_vlen_enc = vector_length_encoding(this, $src);
21840     int vlen_enc = vector_length_encoding(this);
21841     switch (to_elem_bt) {
21842       case T_BYTE:
21843         if (!VM_Version::supports_avx512vl()) {
21844           vlen_enc = Assembler::AVX_512bit;
21845         }
21846         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21847         break;
21848       case T_INT:
21849         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21850         break;
21851       case T_FLOAT:
21852         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21853         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21854         break;
21855       case T_LONG:
21856         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21857         break;
21858       case T_DOUBLE: {
21859         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
21860         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
21861         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21862         break;
21863       }
21864       default:
21865         ShouldNotReachHere();
21866     }
21867   %}
21868   ins_pipe( pipe_slow );
21869 %}
21870 
21871 instruct castItoX(vec dst, vec src) %{
21872   predicate(UseAVX <= 2 &&
21873             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
21874             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21875   match(Set dst (VectorCastI2X src));
21876   format %{ "vector_cast_i2x $dst,$src" %}
21877   ins_encode %{
21878     assert(UseAVX > 0, "required");
21879 
21880     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21881     int vlen_enc = vector_length_encoding(this, $src);
21882 
21883     if (to_elem_bt == T_BYTE) {
21884       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21885       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21886       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21887     } else {
21888       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21889       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21890       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21891     }
21892   %}
21893   ins_pipe( pipe_slow );
21894 %}
21895 
21896 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
21897   predicate(UseAVX <= 2 &&
21898             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
21899             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21900   match(Set dst (VectorCastI2X src));
21901   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
21902   effect(TEMP dst, TEMP vtmp);
21903   ins_encode %{
21904     assert(UseAVX > 0, "required");
21905 
21906     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21907     int vlen_enc = vector_length_encoding(this, $src);
21908 
21909     if (to_elem_bt == T_BYTE) {
21910       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21911       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21912       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21913       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21914     } else {
21915       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21916       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21917       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21918       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21919     }
21920   %}
21921   ins_pipe( pipe_slow );
21922 %}
21923 
21924 instruct vcastItoX_evex(vec dst, vec src) %{
21925   predicate(UseAVX > 2 ||
21926             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21927   match(Set dst (VectorCastI2X src));
21928   format %{ "vector_cast_i2x $dst,$src\t!" %}
21929   ins_encode %{
21930     assert(UseAVX > 0, "required");
21931 
21932     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
21933     int src_vlen_enc = vector_length_encoding(this, $src);
21934     int dst_vlen_enc = vector_length_encoding(this);
21935     switch (dst_elem_bt) {
21936       case T_BYTE:
21937         if (!VM_Version::supports_avx512vl()) {
21938           src_vlen_enc = Assembler::AVX_512bit;
21939         }
21940         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21941         break;
21942       case T_SHORT:
21943         if (!VM_Version::supports_avx512vl()) {
21944           src_vlen_enc = Assembler::AVX_512bit;
21945         }
21946         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21947         break;
21948       case T_FLOAT:
21949         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21950         break;
21951       case T_LONG:
21952         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21953         break;
21954       case T_DOUBLE:
21955         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21956         break;
21957       default:
21958         ShouldNotReachHere();
21959     }
21960   %}
21961   ins_pipe( pipe_slow );
21962 %}
21963 
21964 instruct vcastLtoBS(vec dst, vec src) %{
21965   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
21966             UseAVX <= 2);
21967   match(Set dst (VectorCastL2X src));
21968   format %{ "vector_cast_l2x  $dst,$src" %}
21969   ins_encode %{
21970     assert(UseAVX > 0, "required");
21971 
21972     int vlen = Matcher::vector_length_in_bytes(this, $src);
21973     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
21974     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
21975                                                       : ExternalAddress(vector_int_to_short_mask());
21976     if (vlen <= 16) {
21977       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
21978       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21979       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21980     } else {
21981       assert(vlen <= 32, "required");
21982       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
21983       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
21984       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21985       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21986     }
21987     if (to_elem_bt == T_BYTE) {
21988       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21989     }
21990   %}
21991   ins_pipe( pipe_slow );
21992 %}
21993 
21994 instruct vcastLtoX_evex(vec dst, vec src) %{
21995   predicate(UseAVX > 2 ||
21996             (Matcher::vector_element_basic_type(n) == T_INT ||
21997              Matcher::vector_element_basic_type(n) == T_FLOAT ||
21998              Matcher::vector_element_basic_type(n) == T_DOUBLE));
21999   match(Set dst (VectorCastL2X src));
22000   format %{ "vector_cast_l2x  $dst,$src\t!" %}
22001   ins_encode %{
22002     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22003     int vlen = Matcher::vector_length_in_bytes(this, $src);
22004     int vlen_enc = vector_length_encoding(this, $src);
22005     switch (to_elem_bt) {
22006       case T_BYTE:
22007         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22008           vlen_enc = Assembler::AVX_512bit;
22009         }
22010         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22011         break;
22012       case T_SHORT:
22013         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22014           vlen_enc = Assembler::AVX_512bit;
22015         }
22016         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22017         break;
22018       case T_INT:
22019         if (vlen == 8) {
22020           if ($dst$$XMMRegister != $src$$XMMRegister) {
22021             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22022           }
22023         } else if (vlen == 16) {
22024           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22025         } else if (vlen == 32) {
22026           if (UseAVX > 2) {
22027             if (!VM_Version::supports_avx512vl()) {
22028               vlen_enc = Assembler::AVX_512bit;
22029             }
22030             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22031           } else {
22032             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22033             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22034           }
22035         } else { // vlen == 64
22036           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22037         }
22038         break;
22039       case T_FLOAT:
22040         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22041         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22042         break;
22043       case T_DOUBLE:
22044         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22045         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22046         break;
22047 
22048       default: assert(false, "%s", type2name(to_elem_bt));
22049     }
22050   %}
22051   ins_pipe( pipe_slow );
22052 %}
22053 
22054 instruct vcastFtoD_reg(vec dst, vec src) %{
22055   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22056   match(Set dst (VectorCastF2X src));
22057   format %{ "vector_cast_f2d  $dst,$src\t!" %}
22058   ins_encode %{
22059     int vlen_enc = vector_length_encoding(this);
22060     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22061   %}
22062   ins_pipe( pipe_slow );
22063 %}
22064 
22065 
22066 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22067   predicate(!VM_Version::supports_avx10_2() &&
22068             !VM_Version::supports_avx512vl() &&
22069             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22070             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22071             is_integral_type(Matcher::vector_element_basic_type(n)));
22072   match(Set dst (VectorCastF2X src));
22073   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22074   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22075   ins_encode %{
22076     int vlen_enc = vector_length_encoding(this, $src);
22077     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22078     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22079     // 32 bit addresses for register indirect addressing mode since stub constants
22080     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22081     // However, targets are free to increase this limit, but having a large code cache size
22082     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22083     // cap we save a temporary register allocation which in limiting case can prevent
22084     // spilling in high register pressure blocks.
22085     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22086                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22087                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22088   %}
22089   ins_pipe( pipe_slow );
22090 %}
22091 
22092 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22093   predicate(!VM_Version::supports_avx10_2() &&
22094             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22095             is_integral_type(Matcher::vector_element_basic_type(n)));
22096   match(Set dst (VectorCastF2X src));
22097   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22098   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22099   ins_encode %{
22100     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22101     if (to_elem_bt == T_LONG) {
22102       int vlen_enc = vector_length_encoding(this);
22103       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22104                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22105                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22106     } else {
22107       int vlen_enc = vector_length_encoding(this, $src);
22108       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22109                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22110                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22111     }
22112   %}
22113   ins_pipe( pipe_slow );
22114 %}
22115 
22116 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22117   predicate(VM_Version::supports_avx10_2() &&
22118             is_integral_type(Matcher::vector_element_basic_type(n)));
22119   match(Set dst (VectorCastF2X src));
22120   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22121   ins_encode %{
22122     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22123     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22124     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22125   %}
22126   ins_pipe( pipe_slow );
22127 %}
22128 
22129 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22130   predicate(VM_Version::supports_avx10_2() &&
22131             is_integral_type(Matcher::vector_element_basic_type(n)));
22132   match(Set dst (VectorCastF2X (LoadVector src)));
22133   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22134   ins_encode %{
22135     int vlen = Matcher::vector_length(this);
22136     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22137     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22138     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22139   %}
22140   ins_pipe( pipe_slow );
22141 %}
22142 
22143 instruct vcastDtoF_reg(vec dst, vec src) %{
22144   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22145   match(Set dst (VectorCastD2X src));
22146   format %{ "vector_cast_d2x  $dst,$src\t!" %}
22147   ins_encode %{
22148     int vlen_enc = vector_length_encoding(this, $src);
22149     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22150   %}
22151   ins_pipe( pipe_slow );
22152 %}
22153 
22154 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22155   predicate(!VM_Version::supports_avx10_2() &&
22156             !VM_Version::supports_avx512vl() &&
22157             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22158             is_integral_type(Matcher::vector_element_basic_type(n)));
22159   match(Set dst (VectorCastD2X src));
22160   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22161   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22162   ins_encode %{
22163     int vlen_enc = vector_length_encoding(this, $src);
22164     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22165     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22166                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22167                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22168   %}
22169   ins_pipe( pipe_slow );
22170 %}
22171 
22172 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22173   predicate(!VM_Version::supports_avx10_2() &&
22174             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22175             is_integral_type(Matcher::vector_element_basic_type(n)));
22176   match(Set dst (VectorCastD2X src));
22177   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22178   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22179   ins_encode %{
22180     int vlen_enc = vector_length_encoding(this, $src);
22181     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22182     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22183                               ExternalAddress(vector_float_signflip());
22184     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22185                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22186   %}
22187   ins_pipe( pipe_slow );
22188 %}
22189 
22190 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22191   predicate(VM_Version::supports_avx10_2() &&
22192             is_integral_type(Matcher::vector_element_basic_type(n)));
22193   match(Set dst (VectorCastD2X src));
22194   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22195   ins_encode %{
22196     int vlen_enc = vector_length_encoding(this, $src);
22197     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22198     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22199   %}
22200   ins_pipe( pipe_slow );
22201 %}
22202 
22203 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22204   predicate(VM_Version::supports_avx10_2() &&
22205             is_integral_type(Matcher::vector_element_basic_type(n)));
22206   match(Set dst (VectorCastD2X (LoadVector src)));
22207   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22208   ins_encode %{
22209     int vlen = Matcher::vector_length(this);
22210     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22211     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22212     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22213   %}
22214   ins_pipe( pipe_slow );
22215 %}
22216 
22217 instruct vucast(vec dst, vec src) %{
22218   match(Set dst (VectorUCastB2X src));
22219   match(Set dst (VectorUCastS2X src));
22220   match(Set dst (VectorUCastI2X src));
22221   format %{ "vector_ucast $dst,$src\t!" %}
22222   ins_encode %{
22223     assert(UseAVX > 0, "required");
22224 
22225     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22226     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22227     int vlen_enc = vector_length_encoding(this);
22228     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22229   %}
22230   ins_pipe( pipe_slow );
22231 %}
22232 
22233 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22234   predicate(!VM_Version::supports_avx512vl() &&
22235             Matcher::vector_length_in_bytes(n) < 64 &&
22236             Matcher::vector_element_basic_type(n) == T_INT);
22237   match(Set dst (RoundVF src));
22238   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22239   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22240   ins_encode %{
22241     int vlen_enc = vector_length_encoding(this);
22242     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22243     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22244                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22245                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22246   %}
22247   ins_pipe( pipe_slow );
22248 %}
22249 
22250 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22251   predicate((VM_Version::supports_avx512vl() ||
22252              Matcher::vector_length_in_bytes(n) == 64) &&
22253              Matcher::vector_element_basic_type(n) == T_INT);
22254   match(Set dst (RoundVF src));
22255   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22256   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22257   ins_encode %{
22258     int vlen_enc = vector_length_encoding(this);
22259     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22260     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22261                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22262                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22263   %}
22264   ins_pipe( pipe_slow );
22265 %}
22266 
22267 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22268   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22269   match(Set dst (RoundVD src));
22270   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
22271   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22272   ins_encode %{
22273     int vlen_enc = vector_length_encoding(this);
22274     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22275     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22276                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22277                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22278   %}
22279   ins_pipe( pipe_slow );
22280 %}
22281 
22282 // --------------------------------- VectorMaskCmp --------------------------------------
22283 
22284 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22285   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22286             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
22287             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22288             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22289   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22290   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22291   ins_encode %{
22292     int vlen_enc = vector_length_encoding(this, $src1);
22293     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22294     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22295       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22296     } else {
22297       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22298     }
22299   %}
22300   ins_pipe( pipe_slow );
22301 %}
22302 
22303 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22304   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22305             n->bottom_type()->isa_vectmask() == nullptr &&
22306             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22307   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22308   effect(TEMP ktmp);
22309   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22310   ins_encode %{
22311     int vlen_enc = Assembler::AVX_512bit;
22312     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22313     KRegister mask = k0; // The comparison itself is not being masked.
22314     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22315       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22316       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22317     } else {
22318       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22319       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22320     }
22321   %}
22322   ins_pipe( pipe_slow );
22323 %}
22324 
22325 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22326   predicate(n->bottom_type()->isa_vectmask() &&
22327             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22328   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22329   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22330   ins_encode %{
22331     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22332     int vlen_enc = vector_length_encoding(this, $src1);
22333     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22334     KRegister mask = k0; // The comparison itself is not being masked.
22335     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22336       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22337     } else {
22338       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22339     }
22340   %}
22341   ins_pipe( pipe_slow );
22342 %}
22343 
22344 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22345   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22346             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22347             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22348             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22349             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22350             (n->in(2)->get_int() == BoolTest::eq ||
22351              n->in(2)->get_int() == BoolTest::lt ||
22352              n->in(2)->get_int() == BoolTest::gt)); // cond
22353   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22354   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22355   ins_encode %{
22356     int vlen_enc = vector_length_encoding(this, $src1);
22357     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22358     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22359     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22360   %}
22361   ins_pipe( pipe_slow );
22362 %}
22363 
22364 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22365   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22366             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22367             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22368             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22369             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22370             (n->in(2)->get_int() == BoolTest::ne ||
22371              n->in(2)->get_int() == BoolTest::le ||
22372              n->in(2)->get_int() == BoolTest::ge)); // cond
22373   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22374   effect(TEMP dst, TEMP xtmp);
22375   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22376   ins_encode %{
22377     int vlen_enc = vector_length_encoding(this, $src1);
22378     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22379     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22380     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22381   %}
22382   ins_pipe( pipe_slow );
22383 %}
22384 
22385 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22386   predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22387             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22388             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22389             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22390             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22391   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22392   effect(TEMP dst, TEMP xtmp);
22393   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22394   ins_encode %{
22395     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22396     int vlen_enc = vector_length_encoding(this, $src1);
22397     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22398     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22399 
22400     if (vlen_enc == Assembler::AVX_128bit) {
22401       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22402     } else {
22403       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22404     }
22405     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22406     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22407     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22408   %}
22409   ins_pipe( pipe_slow );
22410 %}
22411 
22412 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22413   predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22414              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22415              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22416   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22417   effect(TEMP ktmp);
22418   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22419   ins_encode %{
22420     assert(UseAVX > 2, "required");
22421 
22422     int vlen_enc = vector_length_encoding(this, $src1);
22423     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22424     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22425     KRegister mask = k0; // The comparison itself is not being masked.
22426     bool merge = false;
22427     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22428 
22429     switch (src1_elem_bt) {
22430       case T_INT: {
22431         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22432         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22433         break;
22434       }
22435       case T_LONG: {
22436         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22437         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22438         break;
22439       }
22440       default: assert(false, "%s", type2name(src1_elem_bt));
22441     }
22442   %}
22443   ins_pipe( pipe_slow );
22444 %}
22445 
22446 
22447 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22448   predicate(n->bottom_type()->isa_vectmask() &&
22449             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22450   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22451   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22452   ins_encode %{
22453     assert(UseAVX > 2, "required");
22454     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22455 
22456     int vlen_enc = vector_length_encoding(this, $src1);
22457     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22458     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22459     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22460 
22461     // Comparison i
22462     switch (src1_elem_bt) {
22463       case T_BYTE: {
22464         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22465         break;
22466       }
22467       case T_SHORT: {
22468         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22469         break;
22470       }
22471       case T_INT: {
22472         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22473         break;
22474       }
22475       case T_LONG: {
22476         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22477         break;
22478       }
22479       default: assert(false, "%s", type2name(src1_elem_bt));
22480     }
22481   %}
22482   ins_pipe( pipe_slow );
22483 %}
22484 
22485 // Extract
22486 
22487 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22488   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22489   match(Set dst (ExtractI src idx));
22490   match(Set dst (ExtractS src idx));
22491   match(Set dst (ExtractB src idx));
22492   format %{ "extractI $dst,$src,$idx\t!" %}
22493   ins_encode %{
22494     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22495 
22496     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22497     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22498   %}
22499   ins_pipe( pipe_slow );
22500 %}
22501 
22502 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22503   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22504             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
22505   match(Set dst (ExtractI src idx));
22506   match(Set dst (ExtractS src idx));
22507   match(Set dst (ExtractB src idx));
22508   effect(TEMP vtmp);
22509   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22510   ins_encode %{
22511     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22512 
22513     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22514     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22515     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22516   %}
22517   ins_pipe( pipe_slow );
22518 %}
22519 
22520 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22521   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22522   match(Set dst (ExtractL src idx));
22523   format %{ "extractL $dst,$src,$idx\t!" %}
22524   ins_encode %{
22525     assert(UseSSE >= 4, "required");
22526     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22527 
22528     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22529   %}
22530   ins_pipe( pipe_slow );
22531 %}
22532 
22533 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22534   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22535             Matcher::vector_length(n->in(1)) == 8);  // src
22536   match(Set dst (ExtractL src idx));
22537   effect(TEMP vtmp);
22538   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22539   ins_encode %{
22540     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22541 
22542     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22543     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22544   %}
22545   ins_pipe( pipe_slow );
22546 %}
22547 
22548 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22549   predicate(Matcher::vector_length(n->in(1)) <= 4);
22550   match(Set dst (ExtractF src idx));
22551   effect(TEMP dst, TEMP vtmp);
22552   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22553   ins_encode %{
22554     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22555 
22556     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22557   %}
22558   ins_pipe( pipe_slow );
22559 %}
22560 
22561 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22562   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22563             Matcher::vector_length(n->in(1)/*src*/) == 16);
22564   match(Set dst (ExtractF src idx));
22565   effect(TEMP vtmp);
22566   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22567   ins_encode %{
22568     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22569 
22570     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22571     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22572   %}
22573   ins_pipe( pipe_slow );
22574 %}
22575 
22576 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22577   predicate(Matcher::vector_length(n->in(1)) == 2); // src
22578   match(Set dst (ExtractD src idx));
22579   format %{ "extractD $dst,$src,$idx\t!" %}
22580   ins_encode %{
22581     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22582 
22583     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22584   %}
22585   ins_pipe( pipe_slow );
22586 %}
22587 
22588 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22589   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22590             Matcher::vector_length(n->in(1)) == 8);  // src
22591   match(Set dst (ExtractD src idx));
22592   effect(TEMP vtmp);
22593   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22594   ins_encode %{
22595     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22596 
22597     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22598     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22599   %}
22600   ins_pipe( pipe_slow );
22601 %}
22602 
22603 // --------------------------------- Vector Blend --------------------------------------
22604 
22605 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22606   predicate(UseAVX == 0);
22607   match(Set dst (VectorBlend (Binary dst src) mask));
22608   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
22609   effect(TEMP tmp);
22610   ins_encode %{
22611     assert(UseSSE >= 4, "required");
22612 
22613     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22614       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22615     }
22616     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22617   %}
22618   ins_pipe( pipe_slow );
22619 %}
22620 
22621 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22622   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22623             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22624             Matcher::vector_length_in_bytes(n) <= 32 &&
22625             is_integral_type(Matcher::vector_element_basic_type(n)));
22626   match(Set dst (VectorBlend (Binary src1 src2) mask));
22627   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22628   ins_encode %{
22629     int vlen_enc = vector_length_encoding(this);
22630     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22631   %}
22632   ins_pipe( pipe_slow );
22633 %}
22634 
22635 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22636   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22637             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22638             Matcher::vector_length_in_bytes(n) <= 32 &&
22639             !is_integral_type(Matcher::vector_element_basic_type(n)));
22640   match(Set dst (VectorBlend (Binary src1 src2) mask));
22641   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22642   ins_encode %{
22643     int vlen_enc = vector_length_encoding(this);
22644     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22645   %}
22646   ins_pipe( pipe_slow );
22647 %}
22648 
22649 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22650   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22651             n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22652             Matcher::vector_length_in_bytes(n) <= 32);
22653   match(Set dst (VectorBlend (Binary src1 src2) mask));
22654   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22655   effect(TEMP vtmp, TEMP dst);
22656   ins_encode %{
22657     int vlen_enc = vector_length_encoding(this);
22658     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22659     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22660     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
22661   %}
22662   ins_pipe( pipe_slow );
22663 %}
22664 
22665 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22666   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22667             n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22668   match(Set dst (VectorBlend (Binary src1 src2) mask));
22669   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22670   effect(TEMP ktmp);
22671   ins_encode %{
22672      int vlen_enc = Assembler::AVX_512bit;
22673      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22674     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22675     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22676   %}
22677   ins_pipe( pipe_slow );
22678 %}
22679 
22680 
22681 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22682   predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22683             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22684              VM_Version::supports_avx512bw()));
22685   match(Set dst (VectorBlend (Binary src1 src2) mask));
22686   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22687   ins_encode %{
22688     int vlen_enc = vector_length_encoding(this);
22689     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22690     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22691   %}
22692   ins_pipe( pipe_slow );
22693 %}
22694 
22695 // --------------------------------- ABS --------------------------------------
22696 // a = |a|
22697 instruct vabsB_reg(vec dst, vec src) %{
22698   match(Set dst (AbsVB  src));
22699   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22700   ins_encode %{
22701     uint vlen = Matcher::vector_length(this);
22702     if (vlen <= 16) {
22703       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22704     } else {
22705       int vlen_enc = vector_length_encoding(this);
22706       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22707     }
22708   %}
22709   ins_pipe( pipe_slow );
22710 %}
22711 
22712 instruct vabsS_reg(vec dst, vec src) %{
22713   match(Set dst (AbsVS  src));
22714   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22715   ins_encode %{
22716     uint vlen = Matcher::vector_length(this);
22717     if (vlen <= 8) {
22718       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22719     } else {
22720       int vlen_enc = vector_length_encoding(this);
22721       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22722     }
22723   %}
22724   ins_pipe( pipe_slow );
22725 %}
22726 
22727 instruct vabsI_reg(vec dst, vec src) %{
22728   match(Set dst (AbsVI  src));
22729   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22730   ins_encode %{
22731     uint vlen = Matcher::vector_length(this);
22732     if (vlen <= 4) {
22733       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22734     } else {
22735       int vlen_enc = vector_length_encoding(this);
22736       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22737     }
22738   %}
22739   ins_pipe( pipe_slow );
22740 %}
22741 
22742 instruct vabsL_reg(vec dst, vec src) %{
22743   match(Set dst (AbsVL  src));
22744   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22745   ins_encode %{
22746     assert(UseAVX > 2, "required");
22747     int vlen_enc = vector_length_encoding(this);
22748     if (!VM_Version::supports_avx512vl()) {
22749       vlen_enc = Assembler::AVX_512bit;
22750     }
22751     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22752   %}
22753   ins_pipe( pipe_slow );
22754 %}
22755 
22756 // --------------------------------- ABSNEG --------------------------------------
22757 
22758 instruct vabsnegF(vec dst, vec src) %{
22759   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22760   match(Set dst (AbsVF src));
22761   match(Set dst (NegVF src));
22762   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22763   ins_cost(150);
22764   ins_encode %{
22765     int opcode = this->ideal_Opcode();
22766     int vlen = Matcher::vector_length(this);
22767     if (vlen == 2) {
22768       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22769     } else {
22770       assert(vlen == 8 || vlen == 16, "required");
22771       int vlen_enc = vector_length_encoding(this);
22772       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22773     }
22774   %}
22775   ins_pipe( pipe_slow );
22776 %}
22777 
22778 instruct vabsneg4F(vec dst) %{
22779   predicate(Matcher::vector_length(n) == 4);
22780   match(Set dst (AbsVF dst));
22781   match(Set dst (NegVF dst));
22782   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22783   ins_cost(150);
22784   ins_encode %{
22785     int opcode = this->ideal_Opcode();
22786     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22787   %}
22788   ins_pipe( pipe_slow );
22789 %}
22790 
22791 instruct vabsnegD(vec dst, vec src) %{
22792   match(Set dst (AbsVD  src));
22793   match(Set dst (NegVD  src));
22794   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22795   ins_encode %{
22796     int opcode = this->ideal_Opcode();
22797     uint vlen = Matcher::vector_length(this);
22798     if (vlen == 2) {
22799       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22800     } else {
22801       int vlen_enc = vector_length_encoding(this);
22802       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22803     }
22804   %}
22805   ins_pipe( pipe_slow );
22806 %}
22807 
22808 //------------------------------------- VectorTest --------------------------------------------
22809 
22810 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22811   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22812   match(Set cr (VectorTest src1 src2));
22813   effect(TEMP vtmp);
22814   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
22815   ins_encode %{
22816     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22817     int vlen = Matcher::vector_length_in_bytes(this, $src1);
22818     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22819   %}
22820   ins_pipe( pipe_slow );
22821 %}
22822 
22823 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
22824   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
22825   match(Set cr (VectorTest src1 src2));
22826   format %{ "vptest_ge16  $src1, $src2\n\t" %}
22827   ins_encode %{
22828     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22829     int vlen = Matcher::vector_length_in_bytes(this, $src1);
22830     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
22831   %}
22832   ins_pipe( pipe_slow );
22833 %}
22834 
22835 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22836   predicate((Matcher::vector_length(n->in(1)) < 8 ||
22837              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22838             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
22839   match(Set cr (VectorTest src1 src2));
22840   effect(TEMP tmp);
22841   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
22842   ins_encode %{
22843     uint masklen = Matcher::vector_length(this, $src1);
22844     __ kmovwl($tmp$$Register, $src1$$KRegister);
22845     __ andl($tmp$$Register, (1 << masklen) - 1);
22846     __ cmpl($tmp$$Register, (1 << masklen) - 1);
22847   %}
22848   ins_pipe( pipe_slow );
22849 %}
22850 
22851 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22852   predicate((Matcher::vector_length(n->in(1)) < 8 ||
22853              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22854             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
22855   match(Set cr (VectorTest src1 src2));
22856   effect(TEMP tmp);
22857   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
22858   ins_encode %{
22859     uint masklen = Matcher::vector_length(this, $src1);
22860     __ kmovwl($tmp$$Register, $src1$$KRegister);
22861     __ andl($tmp$$Register, (1 << masklen) - 1);
22862   %}
22863   ins_pipe( pipe_slow );
22864 %}
22865 
22866 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
22867   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
22868             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
22869   match(Set cr (VectorTest src1 src2));
22870   format %{ "ktest_ge8  $src1, $src2\n\t" %}
22871   ins_encode %{
22872     uint masklen = Matcher::vector_length(this, $src1);
22873     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
22874   %}
22875   ins_pipe( pipe_slow );
22876 %}
22877 
22878 //------------------------------------- LoadMask --------------------------------------------
22879 
22880 instruct loadMask(legVec dst, legVec src) %{
22881   predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
22882   match(Set dst (VectorLoadMask src));
22883   effect(TEMP dst);
22884   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
22885   ins_encode %{
22886     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22887     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22888     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
22889   %}
22890   ins_pipe( pipe_slow );
22891 %}
22892 
22893 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
22894   predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
22895   match(Set dst (VectorLoadMask src));
22896   effect(TEMP xtmp);
22897   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
22898   ins_encode %{
22899     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22900                         true, Assembler::AVX_512bit);
22901   %}
22902   ins_pipe( pipe_slow );
22903 %}
22904 
22905 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
22906   predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
22907   match(Set dst (VectorLoadMask src));
22908   effect(TEMP xtmp);
22909   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
22910   ins_encode %{
22911     int vlen_enc = vector_length_encoding(in(1));
22912     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22913                         false, vlen_enc);
22914   %}
22915   ins_pipe( pipe_slow );
22916 %}
22917 
22918 //------------------------------------- StoreMask --------------------------------------------
22919 
22920 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
22921   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22922   match(Set dst (VectorStoreMask src size));
22923   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22924   ins_encode %{
22925     int vlen = Matcher::vector_length(this);
22926     if (vlen <= 16 && UseAVX <= 2) {
22927       assert(UseSSE >= 3, "required");
22928       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22929     } else {
22930       assert(UseAVX > 0, "required");
22931       int src_vlen_enc = vector_length_encoding(this, $src);
22932       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22933     }
22934   %}
22935   ins_pipe( pipe_slow );
22936 %}
22937 
22938 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
22939   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22940   match(Set dst (VectorStoreMask src size));
22941   effect(TEMP_DEF dst, TEMP xtmp);
22942   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22943   ins_encode %{
22944     int vlen_enc = Assembler::AVX_128bit;
22945     int vlen = Matcher::vector_length(this);
22946     if (vlen <= 8) {
22947       assert(UseSSE >= 3, "required");
22948       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22949       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22950       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22951     } else {
22952       assert(UseAVX > 0, "required");
22953       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22954       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22955       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22956     }
22957   %}
22958   ins_pipe( pipe_slow );
22959 %}
22960 
22961 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
22962   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22963   match(Set dst (VectorStoreMask src size));
22964   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22965   effect(TEMP_DEF dst, TEMP xtmp);
22966   ins_encode %{
22967     int vlen_enc = Assembler::AVX_128bit;
22968     int vlen = Matcher::vector_length(this);
22969     if (vlen <= 4) {
22970       assert(UseSSE >= 3, "required");
22971       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22972       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22973       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22974       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22975     } else {
22976       assert(UseAVX > 0, "required");
22977       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22978       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22979       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22980       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22981       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22982     }
22983   %}
22984   ins_pipe( pipe_slow );
22985 %}
22986 
22987 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
22988   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
22989   match(Set dst (VectorStoreMask src size));
22990   effect(TEMP_DEF dst, TEMP xtmp);
22991   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22992   ins_encode %{
22993     assert(UseSSE >= 3, "required");
22994     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22995     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
22996     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
22997     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22998     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22999   %}
23000   ins_pipe( pipe_slow );
23001 %}
23002 
23003 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23004   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23005   match(Set dst (VectorStoreMask src size));
23006   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23007   effect(TEMP_DEF dst, TEMP vtmp);
23008   ins_encode %{
23009     int vlen_enc = Assembler::AVX_128bit;
23010     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23011     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23012     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23013     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23014     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23015     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23016     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23017   %}
23018   ins_pipe( pipe_slow );
23019 %}
23020 
23021 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23022   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23023   match(Set dst (VectorStoreMask src size));
23024   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23025   ins_encode %{
23026     int src_vlen_enc = vector_length_encoding(this, $src);
23027     int dst_vlen_enc = vector_length_encoding(this);
23028     if (!VM_Version::supports_avx512vl()) {
23029       src_vlen_enc = Assembler::AVX_512bit;
23030     }
23031     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23032     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23033   %}
23034   ins_pipe( pipe_slow );
23035 %}
23036 
23037 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23038   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23039   match(Set dst (VectorStoreMask src size));
23040   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23041   ins_encode %{
23042     int src_vlen_enc = vector_length_encoding(this, $src);
23043     int dst_vlen_enc = vector_length_encoding(this);
23044     if (!VM_Version::supports_avx512vl()) {
23045       src_vlen_enc = Assembler::AVX_512bit;
23046     }
23047     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23048     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23049   %}
23050   ins_pipe( pipe_slow );
23051 %}
23052 
23053 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23054   predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23055   match(Set dst (VectorStoreMask mask size));
23056   effect(TEMP_DEF dst);
23057   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23058   ins_encode %{
23059     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23060     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23061                  false, Assembler::AVX_512bit, noreg);
23062     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23063   %}
23064   ins_pipe( pipe_slow );
23065 %}
23066 
23067 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23068   predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23069   match(Set dst (VectorStoreMask mask size));
23070   effect(TEMP_DEF dst);
23071   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23072   ins_encode %{
23073     int dst_vlen_enc = vector_length_encoding(this);
23074     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23075     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23076   %}
23077   ins_pipe( pipe_slow );
23078 %}
23079 
23080 instruct vmaskcast_evex(kReg dst) %{
23081   match(Set dst (VectorMaskCast dst));
23082   ins_cost(0);
23083   format %{ "vector_mask_cast $dst" %}
23084   ins_encode %{
23085     // empty
23086   %}
23087   ins_pipe(empty);
23088 %}
23089 
23090 instruct vmaskcast(vec dst) %{
23091   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23092   match(Set dst (VectorMaskCast dst));
23093   ins_cost(0);
23094   format %{ "vector_mask_cast $dst" %}
23095   ins_encode %{
23096     // empty
23097   %}
23098   ins_pipe(empty);
23099 %}
23100 
23101 instruct vmaskcast_avx(vec dst, vec src) %{
23102   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23103   match(Set dst (VectorMaskCast src));
23104   format %{ "vector_mask_cast $dst, $src" %}
23105   ins_encode %{
23106     int vlen = Matcher::vector_length(this);
23107     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23108     BasicType dst_bt = Matcher::vector_element_basic_type(this);
23109     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23110   %}
23111   ins_pipe(pipe_slow);
23112 %}
23113 
23114 //-------------------------------- Load Iota Indices ----------------------------------
23115 
23116 instruct loadIotaIndices(vec dst, immI_0 src) %{
23117   match(Set dst (VectorLoadConst src));
23118   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23119   ins_encode %{
23120      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23121      BasicType bt = Matcher::vector_element_basic_type(this);
23122      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23123   %}
23124   ins_pipe( pipe_slow );
23125 %}
23126 
23127 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23128   match(Set dst (PopulateIndex src1 src2));
23129   effect(TEMP dst, TEMP vtmp);
23130   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23131   ins_encode %{
23132      assert($src2$$constant == 1, "required");
23133      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23134      int vlen_enc = vector_length_encoding(this);
23135      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23136      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23137      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23138      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23139   %}
23140   ins_pipe( pipe_slow );
23141 %}
23142 
23143 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23144   match(Set dst (PopulateIndex src1 src2));
23145   effect(TEMP dst, TEMP vtmp);
23146   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23147   ins_encode %{
23148      assert($src2$$constant == 1, "required");
23149      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23150      int vlen_enc = vector_length_encoding(this);
23151      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23152      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23153      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23154      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23155   %}
23156   ins_pipe( pipe_slow );
23157 %}
23158 
23159 //-------------------------------- Rearrange ----------------------------------
23160 
23161 // LoadShuffle/Rearrange for Byte
23162 instruct rearrangeB(vec dst, vec shuffle) %{
23163   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23164             Matcher::vector_length(n) < 32);
23165   match(Set dst (VectorRearrange dst shuffle));
23166   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23167   ins_encode %{
23168     assert(UseSSE >= 4, "required");
23169     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23170   %}
23171   ins_pipe( pipe_slow );
23172 %}
23173 
23174 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23175   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23176             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23177   match(Set dst (VectorRearrange src shuffle));
23178   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23179   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23180   ins_encode %{
23181     assert(UseAVX >= 2, "required");
23182     // Swap src into vtmp1
23183     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23184     // Shuffle swapped src to get entries from other 128 bit lane
23185     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23186     // Shuffle original src to get entries from self 128 bit lane
23187     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23188     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23189     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23190     // Perform the blend
23191     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23192   %}
23193   ins_pipe( pipe_slow );
23194 %}
23195 
23196 
23197 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23198   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23199             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23200   match(Set dst (VectorRearrange src shuffle));
23201   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23202   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23203   ins_encode %{
23204     int vlen_enc = vector_length_encoding(this);
23205     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23206                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23207                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23208   %}
23209   ins_pipe( pipe_slow );
23210 %}
23211 
23212 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23213   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23214             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23215   match(Set dst (VectorRearrange src shuffle));
23216   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23217   ins_encode %{
23218     int vlen_enc = vector_length_encoding(this);
23219     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23220   %}
23221   ins_pipe( pipe_slow );
23222 %}
23223 
23224 // LoadShuffle/Rearrange for Short
23225 
23226 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23227   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23228             !VM_Version::supports_avx512bw());
23229   match(Set dst (VectorLoadShuffle src));
23230   effect(TEMP dst, TEMP vtmp);
23231   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23232   ins_encode %{
23233     // Create a byte shuffle mask from short shuffle mask
23234     // only byte shuffle instruction available on these platforms
23235     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23236     if (UseAVX == 0) {
23237       assert(vlen_in_bytes <= 16, "required");
23238       // Multiply each shuffle by two to get byte index
23239       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23240       __ psllw($vtmp$$XMMRegister, 1);
23241 
23242       // Duplicate to create 2 copies of byte index
23243       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23244       __ psllw($dst$$XMMRegister, 8);
23245       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23246 
23247       // Add one to get alternate byte index
23248       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23249       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23250     } else {
23251       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23252       int vlen_enc = vector_length_encoding(this);
23253       // Multiply each shuffle by two to get byte index
23254       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23255 
23256       // Duplicate to create 2 copies of byte index
23257       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
23258       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23259 
23260       // Add one to get alternate byte index
23261       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23262     }
23263   %}
23264   ins_pipe( pipe_slow );
23265 %}
23266 
23267 instruct rearrangeS(vec dst, vec shuffle) %{
23268   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23269             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23270   match(Set dst (VectorRearrange dst shuffle));
23271   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23272   ins_encode %{
23273     assert(UseSSE >= 4, "required");
23274     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23275   %}
23276   ins_pipe( pipe_slow );
23277 %}
23278 
23279 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23280   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23281             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23282   match(Set dst (VectorRearrange src shuffle));
23283   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23284   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23285   ins_encode %{
23286     assert(UseAVX >= 2, "required");
23287     // Swap src into vtmp1
23288     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23289     // Shuffle swapped src to get entries from other 128 bit lane
23290     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23291     // Shuffle original src to get entries from self 128 bit lane
23292     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23293     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23294     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23295     // Perform the blend
23296     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23297   %}
23298   ins_pipe( pipe_slow );
23299 %}
23300 
23301 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23302   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23303             VM_Version::supports_avx512bw());
23304   match(Set dst (VectorRearrange src shuffle));
23305   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23306   ins_encode %{
23307     int vlen_enc = vector_length_encoding(this);
23308     if (!VM_Version::supports_avx512vl()) {
23309       vlen_enc = Assembler::AVX_512bit;
23310     }
23311     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23312   %}
23313   ins_pipe( pipe_slow );
23314 %}
23315 
23316 // LoadShuffle/Rearrange for Integer and Float
23317 
23318 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23319   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23320             Matcher::vector_length(n) == 4 && UseAVX == 0);
23321   match(Set dst (VectorLoadShuffle src));
23322   effect(TEMP dst, TEMP vtmp);
23323   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23324   ins_encode %{
23325     assert(UseSSE >= 4, "required");
23326 
23327     // Create a byte shuffle mask from int shuffle mask
23328     // only byte shuffle instruction available on these platforms
23329 
23330     // Duplicate and multiply each shuffle by 4
23331     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23332     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23333     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23334     __ psllw($vtmp$$XMMRegister, 2);
23335 
23336     // Duplicate again to create 4 copies of byte index
23337     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23338     __ psllw($dst$$XMMRegister, 8);
23339     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23340 
23341     // Add 3,2,1,0 to get alternate byte index
23342     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23343     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23344   %}
23345   ins_pipe( pipe_slow );
23346 %}
23347 
23348 instruct rearrangeI(vec dst, vec shuffle) %{
23349   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23350             UseAVX == 0);
23351   match(Set dst (VectorRearrange dst shuffle));
23352   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23353   ins_encode %{
23354     assert(UseSSE >= 4, "required");
23355     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23356   %}
23357   ins_pipe( pipe_slow );
23358 %}
23359 
23360 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23361   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23362             UseAVX > 0);
23363   match(Set dst (VectorRearrange src shuffle));
23364   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23365   ins_encode %{
23366     int vlen_enc = vector_length_encoding(this);
23367     BasicType bt = Matcher::vector_element_basic_type(this);
23368     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23369   %}
23370   ins_pipe( pipe_slow );
23371 %}
23372 
23373 // LoadShuffle/Rearrange for Long and Double
23374 
23375 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23376   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23377             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23378   match(Set dst (VectorLoadShuffle src));
23379   effect(TEMP dst, TEMP vtmp);
23380   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23381   ins_encode %{
23382     assert(UseAVX >= 2, "required");
23383 
23384     int vlen_enc = vector_length_encoding(this);
23385     // Create a double word shuffle mask from long shuffle mask
23386     // only double word shuffle instruction available on these platforms
23387 
23388     // Multiply each shuffle by two to get double word index
23389     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23390 
23391     // Duplicate each double word shuffle
23392     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23393     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23394 
23395     // Add one to get alternate double word index
23396     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23397   %}
23398   ins_pipe( pipe_slow );
23399 %}
23400 
23401 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23402   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23403             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23404   match(Set dst (VectorRearrange src shuffle));
23405   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23406   ins_encode %{
23407     assert(UseAVX >= 2, "required");
23408 
23409     int vlen_enc = vector_length_encoding(this);
23410     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23411   %}
23412   ins_pipe( pipe_slow );
23413 %}
23414 
23415 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23416   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23417             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23418   match(Set dst (VectorRearrange src shuffle));
23419   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23420   ins_encode %{
23421     assert(UseAVX > 2, "required");
23422 
23423     int vlen_enc = vector_length_encoding(this);
23424     if (vlen_enc == Assembler::AVX_128bit) {
23425       vlen_enc = Assembler::AVX_256bit;
23426     }
23427     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23428   %}
23429   ins_pipe( pipe_slow );
23430 %}
23431 
23432 // --------------------------------- FMA --------------------------------------
23433 // a * b + c
23434 
23435 instruct vfmaF_reg(vec a, vec b, vec c) %{
23436   match(Set c (FmaVF  c (Binary a b)));
23437   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23438   ins_cost(150);
23439   ins_encode %{
23440     assert(UseFMA, "not enabled");
23441     int vlen_enc = vector_length_encoding(this);
23442     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23443   %}
23444   ins_pipe( pipe_slow );
23445 %}
23446 
23447 instruct vfmaF_mem(vec a, memory b, vec c) %{
23448   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23449   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23450   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23451   ins_cost(150);
23452   ins_encode %{
23453     assert(UseFMA, "not enabled");
23454     int vlen_enc = vector_length_encoding(this);
23455     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23456   %}
23457   ins_pipe( pipe_slow );
23458 %}
23459 
23460 instruct vfmaD_reg(vec a, vec b, vec c) %{
23461   match(Set c (FmaVD  c (Binary a b)));
23462   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23463   ins_cost(150);
23464   ins_encode %{
23465     assert(UseFMA, "not enabled");
23466     int vlen_enc = vector_length_encoding(this);
23467     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23468   %}
23469   ins_pipe( pipe_slow );
23470 %}
23471 
23472 instruct vfmaD_mem(vec a, memory b, vec c) %{
23473   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23474   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23475   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23476   ins_cost(150);
23477   ins_encode %{
23478     assert(UseFMA, "not enabled");
23479     int vlen_enc = vector_length_encoding(this);
23480     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23481   %}
23482   ins_pipe( pipe_slow );
23483 %}
23484 
23485 // --------------------------------- Vector Multiply Add --------------------------------------
23486 
23487 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23488   predicate(UseAVX == 0);
23489   match(Set dst (MulAddVS2VI dst src1));
23490   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23491   ins_encode %{
23492     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23493   %}
23494   ins_pipe( pipe_slow );
23495 %}
23496 
23497 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23498   predicate(UseAVX > 0);
23499   match(Set dst (MulAddVS2VI src1 src2));
23500   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23501   ins_encode %{
23502     int vlen_enc = vector_length_encoding(this);
23503     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23504   %}
23505   ins_pipe( pipe_slow );
23506 %}
23507 
23508 // --------------------------------- Vector Multiply Add Add ----------------------------------
23509 
23510 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23511   predicate(VM_Version::supports_avx512_vnni());
23512   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23513   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23514   ins_encode %{
23515     assert(UseAVX > 2, "required");
23516     int vlen_enc = vector_length_encoding(this);
23517     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23518   %}
23519   ins_pipe( pipe_slow );
23520   ins_cost(10);
23521 %}
23522 
23523 // --------------------------------- PopCount --------------------------------------
23524 
23525 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23526   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23527   match(Set dst (PopCountVI src));
23528   match(Set dst (PopCountVL src));
23529   format %{ "vector_popcount_integral $dst, $src" %}
23530   ins_encode %{
23531     int opcode = this->ideal_Opcode();
23532     int vlen_enc = vector_length_encoding(this, $src);
23533     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23534     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23535   %}
23536   ins_pipe( pipe_slow );
23537 %}
23538 
23539 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23540   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23541   match(Set dst (PopCountVI src mask));
23542   match(Set dst (PopCountVL src mask));
23543   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23544   ins_encode %{
23545     int vlen_enc = vector_length_encoding(this, $src);
23546     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23547     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23548     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23549   %}
23550   ins_pipe( pipe_slow );
23551 %}
23552 
23553 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23554   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23555   match(Set dst (PopCountVI src));
23556   match(Set dst (PopCountVL src));
23557   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23558   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23559   ins_encode %{
23560     int opcode = this->ideal_Opcode();
23561     int vlen_enc = vector_length_encoding(this, $src);
23562     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23563     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23564                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23565   %}
23566   ins_pipe( pipe_slow );
23567 %}
23568 
23569 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23570 
23571 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23572   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23573                                               Matcher::vector_length_in_bytes(n->in(1))));
23574   match(Set dst (CountTrailingZerosV src));
23575   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23576   ins_cost(400);
23577   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23578   ins_encode %{
23579     int vlen_enc = vector_length_encoding(this, $src);
23580     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23581     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23582                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23583   %}
23584   ins_pipe( pipe_slow );
23585 %}
23586 
23587 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23588   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23589             VM_Version::supports_avx512cd() &&
23590             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23591   match(Set dst (CountTrailingZerosV src));
23592   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23593   ins_cost(400);
23594   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23595   ins_encode %{
23596     int vlen_enc = vector_length_encoding(this, $src);
23597     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23598     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23599                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23600   %}
23601   ins_pipe( pipe_slow );
23602 %}
23603 
23604 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23605   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23606   match(Set dst (CountTrailingZerosV src));
23607   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23608   ins_cost(400);
23609   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23610   ins_encode %{
23611     int vlen_enc = vector_length_encoding(this, $src);
23612     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23613     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23614                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23615                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23616   %}
23617   ins_pipe( pipe_slow );
23618 %}
23619 
23620 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23621   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23622   match(Set dst (CountTrailingZerosV src));
23623   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23624   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23625   ins_encode %{
23626     int vlen_enc = vector_length_encoding(this, $src);
23627     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23628     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23629                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23630   %}
23631   ins_pipe( pipe_slow );
23632 %}
23633 
23634 
23635 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23636 
23637 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23638   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23639   effect(TEMP dst);
23640   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23641   ins_encode %{
23642     int vector_len = vector_length_encoding(this);
23643     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23644   %}
23645   ins_pipe( pipe_slow );
23646 %}
23647 
23648 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23649   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23650   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23651   effect(TEMP dst);
23652   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23653   ins_encode %{
23654     int vector_len = vector_length_encoding(this);
23655     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23656   %}
23657   ins_pipe( pipe_slow );
23658 %}
23659 
23660 // --------------------------------- Rotation Operations ----------------------------------
23661 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23662   match(Set dst (RotateLeftV src shift));
23663   match(Set dst (RotateRightV src shift));
23664   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23665   ins_encode %{
23666     int opcode      = this->ideal_Opcode();
23667     int vector_len  = vector_length_encoding(this);
23668     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23669     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23670   %}
23671   ins_pipe( pipe_slow );
23672 %}
23673 
23674 instruct vprorate(vec dst, vec src, vec shift) %{
23675   match(Set dst (RotateLeftV src shift));
23676   match(Set dst (RotateRightV src shift));
23677   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23678   ins_encode %{
23679     int opcode      = this->ideal_Opcode();
23680     int vector_len  = vector_length_encoding(this);
23681     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23682     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23683   %}
23684   ins_pipe( pipe_slow );
23685 %}
23686 
23687 // ---------------------------------- Masked Operations ------------------------------------
23688 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23689   predicate(!n->in(3)->bottom_type()->isa_vectmask());
23690   match(Set dst (LoadVectorMasked mem mask));
23691   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23692   ins_encode %{
23693     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23694     int vlen_enc = vector_length_encoding(this);
23695     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23696   %}
23697   ins_pipe( pipe_slow );
23698 %}
23699 
23700 
23701 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23702   predicate(n->in(3)->bottom_type()->isa_vectmask());
23703   match(Set dst (LoadVectorMasked mem mask));
23704   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23705   ins_encode %{
23706     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
23707     int vector_len = vector_length_encoding(this);
23708     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23709   %}
23710   ins_pipe( pipe_slow );
23711 %}
23712 
23713 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23714   predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23715   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23716   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23717   ins_encode %{
23718     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23719     int vlen_enc = vector_length_encoding(src_node);
23720     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23721     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23722   %}
23723   ins_pipe( pipe_slow );
23724 %}
23725 
23726 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23727   predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23728   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23729   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23730   ins_encode %{
23731     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23732     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23733     int vlen_enc = vector_length_encoding(src_node);
23734     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23735   %}
23736   ins_pipe( pipe_slow );
23737 %}
23738 
23739 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23740   match(Set addr (VerifyVectorAlignment addr mask));
23741   effect(KILL cr);
23742   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23743   ins_encode %{
23744     Label Lskip;
23745     // check if masked bits of addr are zero
23746     __ testq($addr$$Register, $mask$$constant);
23747     __ jccb(Assembler::equal, Lskip);
23748     __ stop("verify_vector_alignment found a misaligned vector memory access");
23749     __ bind(Lskip);
23750   %}
23751   ins_pipe(pipe_slow);
23752 %}
23753 
23754 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23755   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23756   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23757   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23758   ins_encode %{
23759     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23760     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23761 
23762     Label DONE;
23763     int vlen_enc = vector_length_encoding(this, $src1);
23764     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23765 
23766     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23767     __ mov64($dst$$Register, -1L);
23768     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23769     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23770     __ jccb(Assembler::carrySet, DONE);
23771     __ kmovql($dst$$Register, $ktmp1$$KRegister);
23772     __ notq($dst$$Register);
23773     __ tzcntq($dst$$Register, $dst$$Register);
23774     __ bind(DONE);
23775   %}
23776   ins_pipe( pipe_slow );
23777 %}
23778 
23779 
23780 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23781   match(Set dst (VectorMaskGen len));
23782   effect(TEMP temp, KILL cr);
23783   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23784   ins_encode %{
23785     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23786   %}
23787   ins_pipe( pipe_slow );
23788 %}
23789 
23790 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23791   match(Set dst (VectorMaskGen len));
23792   format %{ "vector_mask_gen $len \t! vector mask generator" %}
23793   effect(TEMP temp);
23794   ins_encode %{
23795     __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
23796     __ kmovql($dst$$KRegister, $temp$$Register);
23797   %}
23798   ins_pipe( pipe_slow );
23799 %}
23800 
23801 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23802   predicate(n->in(1)->bottom_type()->isa_vectmask());
23803   match(Set dst (VectorMaskToLong mask));
23804   effect(TEMP dst, KILL cr);
23805   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23806   ins_encode %{
23807     int opcode = this->ideal_Opcode();
23808     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23809     int mask_len = Matcher::vector_length(this, $mask);
23810     int mask_size = mask_len * type2aelembytes(mbt);
23811     int vlen_enc = vector_length_encoding(this, $mask);
23812     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23813                              $dst$$Register, mask_len, mask_size, vlen_enc);
23814   %}
23815   ins_pipe( pipe_slow );
23816 %}
23817 
23818 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23819   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23820   match(Set dst (VectorMaskToLong mask));
23821   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23822   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23823   ins_encode %{
23824     int opcode = this->ideal_Opcode();
23825     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23826     int mask_len = Matcher::vector_length(this, $mask);
23827     int vlen_enc = vector_length_encoding(this, $mask);
23828     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23829                              $dst$$Register, mask_len, mbt, vlen_enc);
23830   %}
23831   ins_pipe( pipe_slow );
23832 %}
23833 
23834 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
23835   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23836   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
23837   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
23838   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23839   ins_encode %{
23840     int opcode = this->ideal_Opcode();
23841     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23842     int mask_len = Matcher::vector_length(this, $mask);
23843     int vlen_enc = vector_length_encoding(this, $mask);
23844     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23845                              $dst$$Register, mask_len, mbt, vlen_enc);
23846   %}
23847   ins_pipe( pipe_slow );
23848 %}
23849 
23850 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23851   predicate(n->in(1)->bottom_type()->isa_vectmask());
23852   match(Set dst (VectorMaskTrueCount mask));
23853   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23854   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
23855   ins_encode %{
23856     int opcode = this->ideal_Opcode();
23857     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23858     int mask_len = Matcher::vector_length(this, $mask);
23859     int mask_size = mask_len * type2aelembytes(mbt);
23860     int vlen_enc = vector_length_encoding(this, $mask);
23861     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23862                              $tmp$$Register, mask_len, mask_size, vlen_enc);
23863   %}
23864   ins_pipe( pipe_slow );
23865 %}
23866 
23867 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23868   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23869   match(Set dst (VectorMaskTrueCount mask));
23870   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23871   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23872   ins_encode %{
23873     int opcode = this->ideal_Opcode();
23874     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23875     int mask_len = Matcher::vector_length(this, $mask);
23876     int vlen_enc = vector_length_encoding(this, $mask);
23877     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23878                              $tmp$$Register, mask_len, mbt, vlen_enc);
23879   %}
23880   ins_pipe( pipe_slow );
23881 %}
23882 
23883 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23884   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23885   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
23886   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23887   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23888   ins_encode %{
23889     int opcode = this->ideal_Opcode();
23890     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23891     int mask_len = Matcher::vector_length(this, $mask);
23892     int vlen_enc = vector_length_encoding(this, $mask);
23893     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23894                              $tmp$$Register, mask_len, mbt, vlen_enc);
23895   %}
23896   ins_pipe( pipe_slow );
23897 %}
23898 
23899 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23900   predicate(n->in(1)->bottom_type()->isa_vectmask());
23901   match(Set dst (VectorMaskFirstTrue mask));
23902   match(Set dst (VectorMaskLastTrue mask));
23903   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23904   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
23905   ins_encode %{
23906     int opcode = this->ideal_Opcode();
23907     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23908     int mask_len = Matcher::vector_length(this, $mask);
23909     int mask_size = mask_len * type2aelembytes(mbt);
23910     int vlen_enc = vector_length_encoding(this, $mask);
23911     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23912                              $tmp$$Register, mask_len, mask_size, vlen_enc);
23913   %}
23914   ins_pipe( pipe_slow );
23915 %}
23916 
23917 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23918   predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23919   match(Set dst (VectorMaskFirstTrue mask));
23920   match(Set dst (VectorMaskLastTrue mask));
23921   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23922   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23923   ins_encode %{
23924     int opcode = this->ideal_Opcode();
23925     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23926     int mask_len = Matcher::vector_length(this, $mask);
23927     int vlen_enc = vector_length_encoding(this, $mask);
23928     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23929                              $tmp$$Register, mask_len, mbt, vlen_enc);
23930   %}
23931   ins_pipe( pipe_slow );
23932 %}
23933 
23934 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23935   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23936   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
23937   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
23938   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23939   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23940   ins_encode %{
23941     int opcode = this->ideal_Opcode();
23942     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23943     int mask_len = Matcher::vector_length(this, $mask);
23944     int vlen_enc = vector_length_encoding(this, $mask);
23945     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23946                              $tmp$$Register, mask_len, mbt, vlen_enc);
23947   %}
23948   ins_pipe( pipe_slow );
23949 %}
23950 
23951 // --------------------------------- Compress/Expand Operations ---------------------------
23952 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
23953   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
23954   match(Set dst (CompressV src mask));
23955   match(Set dst (ExpandV src mask));
23956   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
23957   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
23958   ins_encode %{
23959     int opcode = this->ideal_Opcode();
23960     int vlen_enc = vector_length_encoding(this);
23961     BasicType bt  = Matcher::vector_element_basic_type(this);
23962     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
23963                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
23964   %}
23965   ins_pipe( pipe_slow );
23966 %}
23967 
23968 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
23969   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
23970   match(Set dst (CompressV src mask));
23971   match(Set dst (ExpandV src mask));
23972   format %{ "vector_compress_expand $dst, $src, $mask" %}
23973   ins_encode %{
23974     int opcode = this->ideal_Opcode();
23975     int vector_len = vector_length_encoding(this);
23976     BasicType bt  = Matcher::vector_element_basic_type(this);
23977     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
23978   %}
23979   ins_pipe( pipe_slow );
23980 %}
23981 
23982 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
23983   match(Set dst (CompressM mask));
23984   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
23985   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
23986   ins_encode %{
23987     assert(this->in(1)->bottom_type()->isa_vectmask(), "");
23988     int mask_len = Matcher::vector_length(this);
23989     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
23990   %}
23991   ins_pipe( pipe_slow );
23992 %}
23993 
23994 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
23995 
23996 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23997   predicate(!VM_Version::supports_gfni());
23998   match(Set dst (ReverseV src));
23999   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24000   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24001   ins_encode %{
24002     int vec_enc = vector_length_encoding(this);
24003     BasicType bt = Matcher::vector_element_basic_type(this);
24004     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24005                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24006   %}
24007   ins_pipe( pipe_slow );
24008 %}
24009 
24010 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24011   predicate(VM_Version::supports_gfni());
24012   match(Set dst (ReverseV src));
24013   effect(TEMP dst, TEMP xtmp);
24014   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24015   ins_encode %{
24016     int vec_enc = vector_length_encoding(this);
24017     BasicType bt  = Matcher::vector_element_basic_type(this);
24018     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24019     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24020                                $xtmp$$XMMRegister);
24021   %}
24022   ins_pipe( pipe_slow );
24023 %}
24024 
24025 instruct vreverse_byte_reg(vec dst, vec src) %{
24026   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24027   match(Set dst (ReverseBytesV src));
24028   effect(TEMP dst);
24029   format %{ "vector_reverse_byte $dst, $src" %}
24030   ins_encode %{
24031     int vec_enc = vector_length_encoding(this);
24032     BasicType bt = Matcher::vector_element_basic_type(this);
24033     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24034   %}
24035   ins_pipe( pipe_slow );
24036 %}
24037 
24038 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24039   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24040   match(Set dst (ReverseBytesV src));
24041   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24042   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24043   ins_encode %{
24044     int vec_enc = vector_length_encoding(this);
24045     BasicType bt = Matcher::vector_element_basic_type(this);
24046     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24047                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24048   %}
24049   ins_pipe( pipe_slow );
24050 %}
24051 
24052 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24053 
24054 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24055   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24056                                               Matcher::vector_length_in_bytes(n->in(1))));
24057   match(Set dst (CountLeadingZerosV src));
24058   format %{ "vector_count_leading_zeros $dst, $src" %}
24059   ins_encode %{
24060      int vlen_enc = vector_length_encoding(this, $src);
24061      BasicType bt = Matcher::vector_element_basic_type(this, $src);
24062      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24063                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24064   %}
24065   ins_pipe( pipe_slow );
24066 %}
24067 
24068 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24069   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24070                                               Matcher::vector_length_in_bytes(n->in(1))));
24071   match(Set dst (CountLeadingZerosV src mask));
24072   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24073   ins_encode %{
24074     int vlen_enc = vector_length_encoding(this, $src);
24075     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24076     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24077     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24078                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24079   %}
24080   ins_pipe( pipe_slow );
24081 %}
24082 
24083 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24084   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24085             VM_Version::supports_avx512cd() &&
24086             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24087   match(Set dst (CountLeadingZerosV src));
24088   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24089   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24090   ins_encode %{
24091     int vlen_enc = vector_length_encoding(this, $src);
24092     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24093     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24094                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24095   %}
24096   ins_pipe( pipe_slow );
24097 %}
24098 
24099 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24100   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24101   match(Set dst (CountLeadingZerosV src));
24102   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24103   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24104   ins_encode %{
24105     int vlen_enc = vector_length_encoding(this, $src);
24106     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24107     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24108                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24109                                        $rtmp$$Register, true, vlen_enc);
24110   %}
24111   ins_pipe( pipe_slow );
24112 %}
24113 
24114 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24115   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24116             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24117   match(Set dst (CountLeadingZerosV src));
24118   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24119   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24120   ins_encode %{
24121     int vlen_enc = vector_length_encoding(this, $src);
24122     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24123     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24124                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24125   %}
24126   ins_pipe( pipe_slow );
24127 %}
24128 
24129 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24130   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24131             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24132   match(Set dst (CountLeadingZerosV src));
24133   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24134   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24135   ins_encode %{
24136     int vlen_enc = vector_length_encoding(this, $src);
24137     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24138     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24139                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24140   %}
24141   ins_pipe( pipe_slow );
24142 %}
24143 
24144 // ---------------------------------- Vector Masked Operations ------------------------------------
24145 
24146 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24147   match(Set dst (AddVB (Binary dst src2) mask));
24148   match(Set dst (AddVS (Binary dst src2) mask));
24149   match(Set dst (AddVI (Binary dst src2) mask));
24150   match(Set dst (AddVL (Binary dst src2) mask));
24151   match(Set dst (AddVF (Binary dst src2) mask));
24152   match(Set dst (AddVD (Binary dst src2) mask));
24153   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24154   ins_encode %{
24155     int vlen_enc = vector_length_encoding(this);
24156     BasicType bt = Matcher::vector_element_basic_type(this);
24157     int opc = this->ideal_Opcode();
24158     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24159                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24160   %}
24161   ins_pipe( pipe_slow );
24162 %}
24163 
24164 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24165   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24166   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24167   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24168   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24169   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24170   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24171   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24172   ins_encode %{
24173     int vlen_enc = vector_length_encoding(this);
24174     BasicType bt = Matcher::vector_element_basic_type(this);
24175     int opc = this->ideal_Opcode();
24176     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24177                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24178   %}
24179   ins_pipe( pipe_slow );
24180 %}
24181 
24182 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24183   match(Set dst (XorV (Binary dst src2) mask));
24184   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24185   ins_encode %{
24186     int vlen_enc = vector_length_encoding(this);
24187     BasicType bt = Matcher::vector_element_basic_type(this);
24188     int opc = this->ideal_Opcode();
24189     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24190                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24191   %}
24192   ins_pipe( pipe_slow );
24193 %}
24194 
24195 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24196   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24197   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24198   ins_encode %{
24199     int vlen_enc = vector_length_encoding(this);
24200     BasicType bt = Matcher::vector_element_basic_type(this);
24201     int opc = this->ideal_Opcode();
24202     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24203                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24204   %}
24205   ins_pipe( pipe_slow );
24206 %}
24207 
24208 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24209   match(Set dst (OrV (Binary dst src2) mask));
24210   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24211   ins_encode %{
24212     int vlen_enc = vector_length_encoding(this);
24213     BasicType bt = Matcher::vector_element_basic_type(this);
24214     int opc = this->ideal_Opcode();
24215     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24216                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24217   %}
24218   ins_pipe( pipe_slow );
24219 %}
24220 
24221 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24222   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24223   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24224   ins_encode %{
24225     int vlen_enc = vector_length_encoding(this);
24226     BasicType bt = Matcher::vector_element_basic_type(this);
24227     int opc = this->ideal_Opcode();
24228     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24229                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24230   %}
24231   ins_pipe( pipe_slow );
24232 %}
24233 
24234 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24235   match(Set dst (AndV (Binary dst src2) mask));
24236   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24237   ins_encode %{
24238     int vlen_enc = vector_length_encoding(this);
24239     BasicType bt = Matcher::vector_element_basic_type(this);
24240     int opc = this->ideal_Opcode();
24241     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24242                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24243   %}
24244   ins_pipe( pipe_slow );
24245 %}
24246 
24247 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24248   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24249   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24250   ins_encode %{
24251     int vlen_enc = vector_length_encoding(this);
24252     BasicType bt = Matcher::vector_element_basic_type(this);
24253     int opc = this->ideal_Opcode();
24254     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24255                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24256   %}
24257   ins_pipe( pipe_slow );
24258 %}
24259 
24260 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24261   match(Set dst (SubVB (Binary dst src2) mask));
24262   match(Set dst (SubVS (Binary dst src2) mask));
24263   match(Set dst (SubVI (Binary dst src2) mask));
24264   match(Set dst (SubVL (Binary dst src2) mask));
24265   match(Set dst (SubVF (Binary dst src2) mask));
24266   match(Set dst (SubVD (Binary dst src2) mask));
24267   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24268   ins_encode %{
24269     int vlen_enc = vector_length_encoding(this);
24270     BasicType bt = Matcher::vector_element_basic_type(this);
24271     int opc = this->ideal_Opcode();
24272     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24273                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24274   %}
24275   ins_pipe( pipe_slow );
24276 %}
24277 
24278 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24279   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24280   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24281   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24282   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24283   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24284   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24285   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24286   ins_encode %{
24287     int vlen_enc = vector_length_encoding(this);
24288     BasicType bt = Matcher::vector_element_basic_type(this);
24289     int opc = this->ideal_Opcode();
24290     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24291                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24292   %}
24293   ins_pipe( pipe_slow );
24294 %}
24295 
24296 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24297   match(Set dst (MulVS (Binary dst src2) mask));
24298   match(Set dst (MulVI (Binary dst src2) mask));
24299   match(Set dst (MulVL (Binary dst src2) mask));
24300   match(Set dst (MulVF (Binary dst src2) mask));
24301   match(Set dst (MulVD (Binary dst src2) mask));
24302   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24303   ins_encode %{
24304     int vlen_enc = vector_length_encoding(this);
24305     BasicType bt = Matcher::vector_element_basic_type(this);
24306     int opc = this->ideal_Opcode();
24307     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24308                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24309   %}
24310   ins_pipe( pipe_slow );
24311 %}
24312 
24313 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24314   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24315   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24316   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24317   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24318   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24319   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24320   ins_encode %{
24321     int vlen_enc = vector_length_encoding(this);
24322     BasicType bt = Matcher::vector_element_basic_type(this);
24323     int opc = this->ideal_Opcode();
24324     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24325                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24326   %}
24327   ins_pipe( pipe_slow );
24328 %}
24329 
24330 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24331   match(Set dst (SqrtVF dst mask));
24332   match(Set dst (SqrtVD dst mask));
24333   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24334   ins_encode %{
24335     int vlen_enc = vector_length_encoding(this);
24336     BasicType bt = Matcher::vector_element_basic_type(this);
24337     int opc = this->ideal_Opcode();
24338     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24339                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24340   %}
24341   ins_pipe( pipe_slow );
24342 %}
24343 
24344 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24345   match(Set dst (DivVF (Binary dst src2) mask));
24346   match(Set dst (DivVD (Binary dst src2) mask));
24347   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24348   ins_encode %{
24349     int vlen_enc = vector_length_encoding(this);
24350     BasicType bt = Matcher::vector_element_basic_type(this);
24351     int opc = this->ideal_Opcode();
24352     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24353                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24354   %}
24355   ins_pipe( pipe_slow );
24356 %}
24357 
24358 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24359   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24360   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24361   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24362   ins_encode %{
24363     int vlen_enc = vector_length_encoding(this);
24364     BasicType bt = Matcher::vector_element_basic_type(this);
24365     int opc = this->ideal_Opcode();
24366     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24367                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24368   %}
24369   ins_pipe( pipe_slow );
24370 %}
24371 
24372 
24373 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24374   match(Set dst (RotateLeftV (Binary dst shift) mask));
24375   match(Set dst (RotateRightV (Binary dst shift) mask));
24376   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24377   ins_encode %{
24378     int vlen_enc = vector_length_encoding(this);
24379     BasicType bt = Matcher::vector_element_basic_type(this);
24380     int opc = this->ideal_Opcode();
24381     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24382                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24383   %}
24384   ins_pipe( pipe_slow );
24385 %}
24386 
24387 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24388   match(Set dst (RotateLeftV (Binary dst src2) mask));
24389   match(Set dst (RotateRightV (Binary dst src2) mask));
24390   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24391   ins_encode %{
24392     int vlen_enc = vector_length_encoding(this);
24393     BasicType bt = Matcher::vector_element_basic_type(this);
24394     int opc = this->ideal_Opcode();
24395     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24396                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24397   %}
24398   ins_pipe( pipe_slow );
24399 %}
24400 
24401 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24402   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24403   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24404   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24405   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24406   ins_encode %{
24407     int vlen_enc = vector_length_encoding(this);
24408     BasicType bt = Matcher::vector_element_basic_type(this);
24409     int opc = this->ideal_Opcode();
24410     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24411                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24412   %}
24413   ins_pipe( pipe_slow );
24414 %}
24415 
24416 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24417   predicate(!n->as_ShiftV()->is_var_shift());
24418   match(Set dst (LShiftVS (Binary dst src2) mask));
24419   match(Set dst (LShiftVI (Binary dst src2) mask));
24420   match(Set dst (LShiftVL (Binary dst src2) mask));
24421   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24422   ins_encode %{
24423     int vlen_enc = vector_length_encoding(this);
24424     BasicType bt = Matcher::vector_element_basic_type(this);
24425     int opc = this->ideal_Opcode();
24426     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24427                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24428   %}
24429   ins_pipe( pipe_slow );
24430 %}
24431 
24432 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24433   predicate(n->as_ShiftV()->is_var_shift());
24434   match(Set dst (LShiftVS (Binary dst src2) mask));
24435   match(Set dst (LShiftVI (Binary dst src2) mask));
24436   match(Set dst (LShiftVL (Binary dst src2) mask));
24437   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24438   ins_encode %{
24439     int vlen_enc = vector_length_encoding(this);
24440     BasicType bt = Matcher::vector_element_basic_type(this);
24441     int opc = this->ideal_Opcode();
24442     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24443                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24444   %}
24445   ins_pipe( pipe_slow );
24446 %}
24447 
24448 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24449   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24450   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24451   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24452   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24453   ins_encode %{
24454     int vlen_enc = vector_length_encoding(this);
24455     BasicType bt = Matcher::vector_element_basic_type(this);
24456     int opc = this->ideal_Opcode();
24457     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24458                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24459   %}
24460   ins_pipe( pipe_slow );
24461 %}
24462 
24463 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24464   predicate(!n->as_ShiftV()->is_var_shift());
24465   match(Set dst (RShiftVS (Binary dst src2) mask));
24466   match(Set dst (RShiftVI (Binary dst src2) mask));
24467   match(Set dst (RShiftVL (Binary dst src2) mask));
24468   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24469   ins_encode %{
24470     int vlen_enc = vector_length_encoding(this);
24471     BasicType bt = Matcher::vector_element_basic_type(this);
24472     int opc = this->ideal_Opcode();
24473     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24474                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24475   %}
24476   ins_pipe( pipe_slow );
24477 %}
24478 
24479 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24480   predicate(n->as_ShiftV()->is_var_shift());
24481   match(Set dst (RShiftVS (Binary dst src2) mask));
24482   match(Set dst (RShiftVI (Binary dst src2) mask));
24483   match(Set dst (RShiftVL (Binary dst src2) mask));
24484   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24485   ins_encode %{
24486     int vlen_enc = vector_length_encoding(this);
24487     BasicType bt = Matcher::vector_element_basic_type(this);
24488     int opc = this->ideal_Opcode();
24489     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24490                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24491   %}
24492   ins_pipe( pipe_slow );
24493 %}
24494 
24495 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24496   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24497   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24498   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24499   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24500   ins_encode %{
24501     int vlen_enc = vector_length_encoding(this);
24502     BasicType bt = Matcher::vector_element_basic_type(this);
24503     int opc = this->ideal_Opcode();
24504     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24505                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24506   %}
24507   ins_pipe( pipe_slow );
24508 %}
24509 
24510 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24511   predicate(!n->as_ShiftV()->is_var_shift());
24512   match(Set dst (URShiftVS (Binary dst src2) mask));
24513   match(Set dst (URShiftVI (Binary dst src2) mask));
24514   match(Set dst (URShiftVL (Binary dst src2) mask));
24515   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24516   ins_encode %{
24517     int vlen_enc = vector_length_encoding(this);
24518     BasicType bt = Matcher::vector_element_basic_type(this);
24519     int opc = this->ideal_Opcode();
24520     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24521                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24522   %}
24523   ins_pipe( pipe_slow );
24524 %}
24525 
24526 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24527   predicate(n->as_ShiftV()->is_var_shift());
24528   match(Set dst (URShiftVS (Binary dst src2) mask));
24529   match(Set dst (URShiftVI (Binary dst src2) mask));
24530   match(Set dst (URShiftVL (Binary dst src2) mask));
24531   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24532   ins_encode %{
24533     int vlen_enc = vector_length_encoding(this);
24534     BasicType bt = Matcher::vector_element_basic_type(this);
24535     int opc = this->ideal_Opcode();
24536     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24537                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24538   %}
24539   ins_pipe( pipe_slow );
24540 %}
24541 
24542 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24543   match(Set dst (MaxV (Binary dst src2) mask));
24544   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24545   ins_encode %{
24546     int vlen_enc = vector_length_encoding(this);
24547     BasicType bt = Matcher::vector_element_basic_type(this);
24548     int opc = this->ideal_Opcode();
24549     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24550                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24551   %}
24552   ins_pipe( pipe_slow );
24553 %}
24554 
24555 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24556   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24557   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24558   ins_encode %{
24559     int vlen_enc = vector_length_encoding(this);
24560     BasicType bt = Matcher::vector_element_basic_type(this);
24561     int opc = this->ideal_Opcode();
24562     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24563                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24564   %}
24565   ins_pipe( pipe_slow );
24566 %}
24567 
24568 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24569   match(Set dst (MinV (Binary dst src2) mask));
24570   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24571   ins_encode %{
24572     int vlen_enc = vector_length_encoding(this);
24573     BasicType bt = Matcher::vector_element_basic_type(this);
24574     int opc = this->ideal_Opcode();
24575     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24576                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24577   %}
24578   ins_pipe( pipe_slow );
24579 %}
24580 
24581 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24582   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24583   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24584   ins_encode %{
24585     int vlen_enc = vector_length_encoding(this);
24586     BasicType bt = Matcher::vector_element_basic_type(this);
24587     int opc = this->ideal_Opcode();
24588     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24589                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24590   %}
24591   ins_pipe( pipe_slow );
24592 %}
24593 
24594 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24595   match(Set dst (VectorRearrange (Binary dst src2) mask));
24596   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24597   ins_encode %{
24598     int vlen_enc = vector_length_encoding(this);
24599     BasicType bt = Matcher::vector_element_basic_type(this);
24600     int opc = this->ideal_Opcode();
24601     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24602                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24603   %}
24604   ins_pipe( pipe_slow );
24605 %}
24606 
24607 instruct vabs_masked(vec dst, kReg mask) %{
24608   match(Set dst (AbsVB dst mask));
24609   match(Set dst (AbsVS dst mask));
24610   match(Set dst (AbsVI dst mask));
24611   match(Set dst (AbsVL dst mask));
24612   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24613   ins_encode %{
24614     int vlen_enc = vector_length_encoding(this);
24615     BasicType bt = Matcher::vector_element_basic_type(this);
24616     int opc = this->ideal_Opcode();
24617     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24618                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24619   %}
24620   ins_pipe( pipe_slow );
24621 %}
24622 
24623 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24624   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24625   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24626   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24627   ins_encode %{
24628     assert(UseFMA, "Needs FMA instructions support.");
24629     int vlen_enc = vector_length_encoding(this);
24630     BasicType bt = Matcher::vector_element_basic_type(this);
24631     int opc = this->ideal_Opcode();
24632     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24633                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24634   %}
24635   ins_pipe( pipe_slow );
24636 %}
24637 
24638 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24639   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24640   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24641   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24642   ins_encode %{
24643     assert(UseFMA, "Needs FMA instructions support.");
24644     int vlen_enc = vector_length_encoding(this);
24645     BasicType bt = Matcher::vector_element_basic_type(this);
24646     int opc = this->ideal_Opcode();
24647     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24648                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24649   %}
24650   ins_pipe( pipe_slow );
24651 %}
24652 
24653 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24654   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24655   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24656   ins_encode %{
24657     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24658     int vlen_enc = vector_length_encoding(this, $src1);
24659     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24660 
24661     // Comparison i
24662     switch (src1_elem_bt) {
24663       case T_BYTE: {
24664         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24665         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24666         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24667         break;
24668       }
24669       case T_SHORT: {
24670         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24671         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24672         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24673         break;
24674       }
24675       case T_INT: {
24676         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24677         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24678         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24679         break;
24680       }
24681       case T_LONG: {
24682         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24683         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24684         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24685         break;
24686       }
24687       case T_FLOAT: {
24688         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24689         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24690         break;
24691       }
24692       case T_DOUBLE: {
24693         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24694         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24695         break;
24696       }
24697       default: assert(false, "%s", type2name(src1_elem_bt)); break;
24698     }
24699   %}
24700   ins_pipe( pipe_slow );
24701 %}
24702 
24703 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24704   predicate(Matcher::vector_length(n) <= 32);
24705   match(Set dst (MaskAll src));
24706   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24707   ins_encode %{
24708     int mask_len = Matcher::vector_length(this);
24709     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24710   %}
24711   ins_pipe( pipe_slow );
24712 %}
24713 
24714 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24715   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24716   match(Set dst (XorVMask src (MaskAll cnt)));
24717   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24718   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24719   ins_encode %{
24720     uint masklen = Matcher::vector_length(this);
24721     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24722   %}
24723   ins_pipe( pipe_slow );
24724 %}
24725 
24726 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24727   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24728             (Matcher::vector_length(n) == 16) ||
24729             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24730   match(Set dst (XorVMask src (MaskAll cnt)));
24731   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24732   ins_encode %{
24733     uint masklen = Matcher::vector_length(this);
24734     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24735   %}
24736   ins_pipe( pipe_slow );
24737 %}
24738 
24739 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24740   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24741   match(Set dst (VectorLongToMask src));
24742   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24743   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24744   ins_encode %{
24745     int mask_len = Matcher::vector_length(this);
24746     int vec_enc  = vector_length_encoding(mask_len);
24747     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24748                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24749   %}
24750   ins_pipe( pipe_slow );
24751 %}
24752 
24753 
24754 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24755   predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24756   match(Set dst (VectorLongToMask src));
24757   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24758   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24759   ins_encode %{
24760     int mask_len = Matcher::vector_length(this);
24761     assert(mask_len <= 32, "invalid mask length");
24762     int vec_enc  = vector_length_encoding(mask_len);
24763     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24764                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24765   %}
24766   ins_pipe( pipe_slow );
24767 %}
24768 
24769 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24770   predicate(n->bottom_type()->isa_vectmask());
24771   match(Set dst (VectorLongToMask src));
24772   format %{ "long_to_mask_evex $dst, $src\t!" %}
24773   ins_encode %{
24774     __ kmov($dst$$KRegister, $src$$Register);
24775   %}
24776   ins_pipe( pipe_slow );
24777 %}
24778 
24779 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24780   match(Set dst (AndVMask src1 src2));
24781   match(Set dst (OrVMask src1 src2));
24782   match(Set dst (XorVMask src1 src2));
24783   effect(TEMP kscratch);
24784   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24785   ins_encode %{
24786     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24787     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24788     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24789     uint masklen = Matcher::vector_length(this);
24790     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24791     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24792   %}
24793   ins_pipe( pipe_slow );
24794 %}
24795 
24796 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24797   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24798   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24799   ins_encode %{
24800     int vlen_enc = vector_length_encoding(this);
24801     BasicType bt = Matcher::vector_element_basic_type(this);
24802     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24803                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24804   %}
24805   ins_pipe( pipe_slow );
24806 %}
24807 
24808 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24809   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24810   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24811   ins_encode %{
24812     int vlen_enc = vector_length_encoding(this);
24813     BasicType bt = Matcher::vector_element_basic_type(this);
24814     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24815                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24816   %}
24817   ins_pipe( pipe_slow );
24818 %}
24819 
24820 instruct castMM(kReg dst)
24821 %{
24822   match(Set dst (CastVV dst));
24823 
24824   size(0);
24825   format %{ "# castVV of $dst" %}
24826   ins_encode(/* empty encoding */);
24827   ins_cost(0);
24828   ins_pipe(empty);
24829 %}
24830 
24831 instruct castVV(vec dst)
24832 %{
24833   match(Set dst (CastVV dst));
24834 
24835   size(0);
24836   format %{ "# castVV of $dst" %}
24837   ins_encode(/* empty encoding */);
24838   ins_cost(0);
24839   ins_pipe(empty);
24840 %}
24841 
24842 instruct castVVLeg(legVec dst)
24843 %{
24844   match(Set dst (CastVV dst));
24845 
24846   size(0);
24847   format %{ "# castVV of $dst" %}
24848   ins_encode(/* empty encoding */);
24849   ins_cost(0);
24850   ins_pipe(empty);
24851 %}
24852 
24853 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
24854 %{
24855   match(Set dst (IsInfiniteF src));
24856   effect(TEMP ktmp, KILL cr);
24857   format %{ "float_class_check $dst, $src" %}
24858   ins_encode %{
24859     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24860     __ kmovbl($dst$$Register, $ktmp$$KRegister);
24861   %}
24862   ins_pipe(pipe_slow);
24863 %}
24864 
24865 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
24866 %{
24867   match(Set dst (IsInfiniteD src));
24868   effect(TEMP ktmp, KILL cr);
24869   format %{ "double_class_check $dst, $src" %}
24870   ins_encode %{
24871     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24872     __ kmovbl($dst$$Register, $ktmp$$KRegister);
24873   %}
24874   ins_pipe(pipe_slow);
24875 %}
24876 
24877 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
24878 %{
24879   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24880             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24881   match(Set dst (SaturatingAddV src1 src2));
24882   match(Set dst (SaturatingSubV src1 src2));
24883   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24884   ins_encode %{
24885     int vlen_enc = vector_length_encoding(this);
24886     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24887     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24888                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24889   %}
24890   ins_pipe(pipe_slow);
24891 %}
24892 
24893 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
24894 %{
24895   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24896             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24897   match(Set dst (SaturatingAddV src1 src2));
24898   match(Set dst (SaturatingSubV src1 src2));
24899   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24900   ins_encode %{
24901     int vlen_enc = vector_length_encoding(this);
24902     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24903     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24904                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24905   %}
24906   ins_pipe(pipe_slow);
24907 %}
24908 
24909 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
24910 %{
24911   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24912             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24913             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24914   match(Set dst (SaturatingAddV src1 src2));
24915   match(Set dst (SaturatingSubV src1 src2));
24916   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
24917   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
24918   ins_encode %{
24919     int vlen_enc = vector_length_encoding(this);
24920     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24921     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24922                                         $src1$$XMMRegister, $src2$$XMMRegister,
24923                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24924                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
24925   %}
24926   ins_pipe(pipe_slow);
24927 %}
24928 
24929 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
24930 %{
24931   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24932             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24933             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24934   match(Set dst (SaturatingAddV src1 src2));
24935   match(Set dst (SaturatingSubV src1 src2));
24936   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
24937   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
24938   ins_encode %{
24939     int vlen_enc = vector_length_encoding(this);
24940     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24941     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24942                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24943                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
24944   %}
24945   ins_pipe(pipe_slow);
24946 %}
24947 
24948 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
24949 %{
24950   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24951             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24952             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24953   match(Set dst (SaturatingAddV src1 src2));
24954   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
24955   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
24956   ins_encode %{
24957     int vlen_enc = vector_length_encoding(this);
24958     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24959     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24960                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24961   %}
24962   ins_pipe(pipe_slow);
24963 %}
24964 
24965 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
24966 %{
24967   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24968             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24969             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24970   match(Set dst (SaturatingAddV src1 src2));
24971   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24972   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24973   ins_encode %{
24974     int vlen_enc = vector_length_encoding(this);
24975     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24976     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24977                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
24978   %}
24979   ins_pipe(pipe_slow);
24980 %}
24981 
24982 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
24983 %{
24984   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24985             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24986             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24987   match(Set dst (SaturatingSubV src1 src2));
24988   effect(TEMP ktmp);
24989   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
24990   ins_encode %{
24991     int vlen_enc = vector_length_encoding(this);
24992     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24993     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24994                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24995   %}
24996   ins_pipe(pipe_slow);
24997 %}
24998 
24999 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25000 %{
25001   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25002             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25003             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25004   match(Set dst (SaturatingSubV src1 src2));
25005   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25006   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25007   ins_encode %{
25008     int vlen_enc = vector_length_encoding(this);
25009     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25010     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25011                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25012   %}
25013   ins_pipe(pipe_slow);
25014 %}
25015 
25016 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25017 %{
25018   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25019             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25020   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25021   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25022   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25023   ins_encode %{
25024     int vlen_enc = vector_length_encoding(this);
25025     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25026     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25027                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25028   %}
25029   ins_pipe(pipe_slow);
25030 %}
25031 
25032 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25033 %{
25034   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25035             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25036   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25037   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25038   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25039   ins_encode %{
25040     int vlen_enc = vector_length_encoding(this);
25041     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25042     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25043                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25044   %}
25045   ins_pipe(pipe_slow);
25046 %}
25047 
25048 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25049   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25050             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25051   match(Set dst (SaturatingAddV (Binary dst src) mask));
25052   match(Set dst (SaturatingSubV (Binary dst src) mask));
25053   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25054   ins_encode %{
25055     int vlen_enc = vector_length_encoding(this);
25056     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25057     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25058                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25059   %}
25060   ins_pipe( pipe_slow );
25061 %}
25062 
25063 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25064   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25065             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25066   match(Set dst (SaturatingAddV (Binary dst src) mask));
25067   match(Set dst (SaturatingSubV (Binary dst src) mask));
25068   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25069   ins_encode %{
25070     int vlen_enc = vector_length_encoding(this);
25071     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25072     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25073                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25074   %}
25075   ins_pipe( pipe_slow );
25076 %}
25077 
25078 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25079   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25080             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25081   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25082   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25083   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25084   ins_encode %{
25085     int vlen_enc = vector_length_encoding(this);
25086     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25087     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25088                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25089   %}
25090   ins_pipe( pipe_slow );
25091 %}
25092 
25093 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25094   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25095             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25096   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25097   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25098   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25099   ins_encode %{
25100     int vlen_enc = vector_length_encoding(this);
25101     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25102     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25103                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25104   %}
25105   ins_pipe( pipe_slow );
25106 %}
25107 
25108 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25109 %{
25110   match(Set index (SelectFromTwoVector (Binary index src1) src2));
25111   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25112   ins_encode %{
25113     int vlen_enc = vector_length_encoding(this);
25114     BasicType bt = Matcher::vector_element_basic_type(this);
25115     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25116   %}
25117   ins_pipe(pipe_slow);
25118 %}
25119 
25120 instruct reinterpretS2HF(regF dst, rRegI src)
25121 %{
25122   match(Set dst (ReinterpretS2HF src));
25123   format %{ "vmovw $dst, $src" %}
25124   ins_encode %{
25125     __ vmovw($dst$$XMMRegister, $src$$Register);
25126   %}
25127   ins_pipe(pipe_slow);
25128 %}
25129 
25130 instruct reinterpretHF2S(rRegI dst, regF src)
25131 %{
25132   match(Set dst (ReinterpretHF2S src));
25133   format %{ "vmovw $dst, $src" %}
25134   ins_encode %{
25135     __ vmovw($dst$$Register, $src$$XMMRegister);
25136   %}
25137   ins_pipe(pipe_slow);
25138 %}
25139 
25140 instruct convF2HFAndS2HF(regF dst, regF src)
25141 %{
25142   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25143   format %{ "convF2HFAndS2HF $dst, $src" %}
25144   ins_encode %{
25145     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25146   %}
25147   ins_pipe(pipe_slow);
25148 %}
25149 
25150 instruct convHF2SAndHF2F(regF dst, regF src)
25151 %{
25152   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25153   format %{ "convHF2SAndHF2F $dst, $src" %}
25154   ins_encode %{
25155     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25156   %}
25157   ins_pipe(pipe_slow);
25158 %}
25159 
25160 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25161 %{
25162   match(Set dst (SqrtHF src));
25163   format %{ "scalar_sqrt_fp16 $dst, $src" %}
25164   ins_encode %{
25165     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25166   %}
25167   ins_pipe(pipe_slow);
25168 %}
25169 
25170 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25171 %{
25172   match(Set dst (AddHF src1 src2));
25173   match(Set dst (DivHF src1 src2));
25174   match(Set dst (MulHF src1 src2));
25175   match(Set dst (SubHF src1 src2));
25176   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25177   ins_encode %{
25178     int opcode = this->ideal_Opcode();
25179     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25180   %}
25181   ins_pipe(pipe_slow);
25182 %}
25183 
25184 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25185 %{
25186   predicate(VM_Version::supports_avx10_2());
25187   match(Set dst (MaxHF src1 src2));
25188   match(Set dst (MinHF src1 src2));
25189   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25190   ins_encode %{
25191     int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25192     __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25193   %}
25194   ins_pipe( pipe_slow );
25195 %}
25196 
25197 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25198 %{
25199   predicate(!VM_Version::supports_avx10_2());
25200   match(Set dst (MaxHF src1 src2));
25201   match(Set dst (MinHF src1 src2));
25202   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25203   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25204   ins_encode %{
25205     int opcode = this->ideal_Opcode();
25206     __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25207                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25208   %}
25209   ins_pipe( pipe_slow );
25210 %}
25211 
25212 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25213 %{
25214   match(Set dst (FmaHF  src2 (Binary dst src1)));
25215   effect(DEF dst);
25216   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25217   ins_encode %{
25218     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25219   %}
25220   ins_pipe( pipe_slow );
25221 %}
25222 
25223 
25224 instruct vector_sqrt_HF_reg(vec dst, vec src)
25225 %{
25226   match(Set dst (SqrtVHF src));
25227   format %{ "vector_sqrt_fp16 $dst, $src" %}
25228   ins_encode %{
25229     int vlen_enc = vector_length_encoding(this);
25230     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25231   %}
25232   ins_pipe(pipe_slow);
25233 %}
25234 
25235 instruct vector_sqrt_HF_mem(vec dst, memory src)
25236 %{
25237   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25238   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25239   ins_encode %{
25240     int vlen_enc = vector_length_encoding(this);
25241     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25242   %}
25243   ins_pipe(pipe_slow);
25244 %}
25245 
25246 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25247 %{
25248   match(Set dst (AddVHF src1 src2));
25249   match(Set dst (DivVHF src1 src2));
25250   match(Set dst (MulVHF src1 src2));
25251   match(Set dst (SubVHF src1 src2));
25252   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25253   ins_encode %{
25254     int vlen_enc = vector_length_encoding(this);
25255     int opcode = this->ideal_Opcode();
25256     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25257   %}
25258   ins_pipe(pipe_slow);
25259 %}
25260 
25261 
25262 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25263 %{
25264   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25265   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25266   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25267   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25268   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25269   ins_encode %{
25270     int vlen_enc = vector_length_encoding(this);
25271     int opcode = this->ideal_Opcode();
25272     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25273   %}
25274   ins_pipe(pipe_slow);
25275 %}
25276 
25277 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25278 %{
25279   match(Set dst (FmaVHF src2 (Binary dst src1)));
25280   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25281   ins_encode %{
25282     int vlen_enc = vector_length_encoding(this);
25283     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25284   %}
25285   ins_pipe( pipe_slow );
25286 %}
25287 
25288 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25289 %{
25290   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25291   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25292   ins_encode %{
25293     int vlen_enc = vector_length_encoding(this);
25294     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25295   %}
25296   ins_pipe( pipe_slow );
25297 %}
25298 
25299 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25300 %{
25301   predicate(VM_Version::supports_avx10_2());
25302   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25303   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25304   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25305   ins_encode %{
25306     int vlen_enc = vector_length_encoding(this);
25307     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25308     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25309   %}
25310   ins_pipe( pipe_slow );
25311 %}
25312 
25313 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25314 %{
25315   predicate(VM_Version::supports_avx10_2());
25316   match(Set dst (MinVHF src1 src2));
25317   match(Set dst (MaxVHF src1 src2));
25318   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25319   ins_encode %{
25320     int vlen_enc = vector_length_encoding(this);
25321     int function =  this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25322     __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25323   %}
25324   ins_pipe( pipe_slow );
25325 %}
25326 
25327 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25328 %{
25329   predicate(!VM_Version::supports_avx10_2());
25330   match(Set dst (MinVHF src1 src2));
25331   match(Set dst (MaxVHF src1 src2));
25332   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25333   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25334   ins_encode %{
25335     int vlen_enc = vector_length_encoding(this);
25336     int opcode = this->ideal_Opcode();
25337     __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25338                            $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25339   %}
25340   ins_pipe( pipe_slow );
25341 %}
25342 
25343 //----------PEEPHOLE RULES-----------------------------------------------------
25344 // These must follow all instruction definitions as they use the names
25345 // defined in the instructions definitions.
25346 //
25347 // peeppredicate ( rule_predicate );
25348 // // the predicate unless which the peephole rule will be ignored
25349 //
25350 // peepmatch ( root_instr_name [preceding_instruction]* );
25351 //
25352 // peepprocedure ( procedure_name );
25353 // // provide a procedure name to perform the optimization, the procedure should
25354 // // reside in the architecture dependent peephole file, the method has the
25355 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25356 // // with the arguments being the basic block, the current node index inside the
25357 // // block, the register allocator, the functions upon invoked return a new node
25358 // // defined in peepreplace, and the rules of the nodes appearing in the
25359 // // corresponding peepmatch, the function return true if successful, else
25360 // // return false
25361 //
25362 // peepconstraint %{
25363 // (instruction_number.operand_name relational_op instruction_number.operand_name
25364 //  [, ...] );
25365 // // instruction numbers are zero-based using left to right order in peepmatch
25366 //
25367 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25368 // // provide an instruction_number.operand_name for each operand that appears
25369 // // in the replacement instruction's match rule
25370 //
25371 // ---------VM FLAGS---------------------------------------------------------
25372 //
25373 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25374 //
25375 // Each peephole rule is given an identifying number starting with zero and
25376 // increasing by one in the order seen by the parser.  An individual peephole
25377 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25378 // on the command-line.
25379 //
25380 // ---------CURRENT LIMITATIONS----------------------------------------------
25381 //
25382 // Only transformations inside a basic block (do we need more for peephole)
25383 //
25384 // ---------EXAMPLE----------------------------------------------------------
25385 //
25386 // // pertinent parts of existing instructions in architecture description
25387 // instruct movI(rRegI dst, rRegI src)
25388 // %{
25389 //   match(Set dst (CopyI src));
25390 // %}
25391 //
25392 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25393 // %{
25394 //   match(Set dst (AddI dst src));
25395 //   effect(KILL cr);
25396 // %}
25397 //
25398 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25399 // %{
25400 //   match(Set dst (AddI dst src));
25401 // %}
25402 //
25403 // 1. Simple replacement
25404 // - Only match adjacent instructions in same basic block
25405 // - Only equality constraints
25406 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25407 // - Only one replacement instruction
25408 //
25409 // // Change (inc mov) to lea
25410 // peephole %{
25411 //   // lea should only be emitted when beneficial
25412 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25413 //   // increment preceded by register-register move
25414 //   peepmatch ( incI_rReg movI );
25415 //   // require that the destination register of the increment
25416 //   // match the destination register of the move
25417 //   peepconstraint ( 0.dst == 1.dst );
25418 //   // construct a replacement instruction that sets
25419 //   // the destination to ( move's source register + one )
25420 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25421 // %}
25422 //
25423 // 2. Procedural replacement
25424 // - More flexible finding relevent nodes
25425 // - More flexible constraints
25426 // - More flexible transformations
25427 // - May utilise architecture-dependent API more effectively
25428 // - Currently only one replacement instruction due to adlc parsing capabilities
25429 //
25430 // // Change (inc mov) to lea
25431 // peephole %{
25432 //   // lea should only be emitted when beneficial
25433 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25434 //   // the rule numbers of these nodes inside are passed into the function below
25435 //   peepmatch ( incI_rReg movI );
25436 //   // the method that takes the responsibility of transformation
25437 //   peepprocedure ( inc_mov_to_lea );
25438 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25439 //   // node is passed into the function above
25440 //   peepreplace ( leaI_rReg_immI() );
25441 // %}
25442 
25443 // These instructions is not matched by the matcher but used by the peephole
25444 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25445 %{
25446   predicate(false);
25447   match(Set dst (AddI src1 src2));
25448   format %{ "leal    $dst, [$src1 + $src2]" %}
25449   ins_encode %{
25450     Register dst = $dst$$Register;
25451     Register src1 = $src1$$Register;
25452     Register src2 = $src2$$Register;
25453     if (src1 != rbp && src1 != r13) {
25454       __ leal(dst, Address(src1, src2, Address::times_1));
25455     } else {
25456       assert(src2 != rbp && src2 != r13, "");
25457       __ leal(dst, Address(src2, src1, Address::times_1));
25458     }
25459   %}
25460   ins_pipe(ialu_reg_reg);
25461 %}
25462 
25463 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25464 %{
25465   predicate(false);
25466   match(Set dst (AddI src1 src2));
25467   format %{ "leal    $dst, [$src1 + $src2]" %}
25468   ins_encode %{
25469     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25470   %}
25471   ins_pipe(ialu_reg_reg);
25472 %}
25473 
25474 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25475 %{
25476   predicate(false);
25477   match(Set dst (LShiftI src shift));
25478   format %{ "leal    $dst, [$src << $shift]" %}
25479   ins_encode %{
25480     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25481     Register src = $src$$Register;
25482     if (scale == Address::times_2 && src != rbp && src != r13) {
25483       __ leal($dst$$Register, Address(src, src, Address::times_1));
25484     } else {
25485       __ leal($dst$$Register, Address(noreg, src, scale));
25486     }
25487   %}
25488   ins_pipe(ialu_reg_reg);
25489 %}
25490 
25491 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25492 %{
25493   predicate(false);
25494   match(Set dst (AddL src1 src2));
25495   format %{ "leaq    $dst, [$src1 + $src2]" %}
25496   ins_encode %{
25497     Register dst = $dst$$Register;
25498     Register src1 = $src1$$Register;
25499     Register src2 = $src2$$Register;
25500     if (src1 != rbp && src1 != r13) {
25501       __ leaq(dst, Address(src1, src2, Address::times_1));
25502     } else {
25503       assert(src2 != rbp && src2 != r13, "");
25504       __ leaq(dst, Address(src2, src1, Address::times_1));
25505     }
25506   %}
25507   ins_pipe(ialu_reg_reg);
25508 %}
25509 
25510 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25511 %{
25512   predicate(false);
25513   match(Set dst (AddL src1 src2));
25514   format %{ "leaq    $dst, [$src1 + $src2]" %}
25515   ins_encode %{
25516     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25517   %}
25518   ins_pipe(ialu_reg_reg);
25519 %}
25520 
25521 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25522 %{
25523   predicate(false);
25524   match(Set dst (LShiftL src shift));
25525   format %{ "leaq    $dst, [$src << $shift]" %}
25526   ins_encode %{
25527     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25528     Register src = $src$$Register;
25529     if (scale == Address::times_2 && src != rbp && src != r13) {
25530       __ leaq($dst$$Register, Address(src, src, Address::times_1));
25531     } else {
25532       __ leaq($dst$$Register, Address(noreg, src, scale));
25533     }
25534   %}
25535   ins_pipe(ialu_reg_reg);
25536 %}
25537 
25538 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25539 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25540 // processors with at least partial ALU support for lea
25541 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25542 // beneficial for processors with full ALU support
25543 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25544 
25545 peephole
25546 %{
25547   peeppredicate(VM_Version::supports_fast_2op_lea());
25548   peepmatch (addI_rReg);
25549   peepprocedure (lea_coalesce_reg);
25550   peepreplace (leaI_rReg_rReg_peep());
25551 %}
25552 
25553 peephole
25554 %{
25555   peeppredicate(VM_Version::supports_fast_2op_lea());
25556   peepmatch (addI_rReg_imm);
25557   peepprocedure (lea_coalesce_imm);
25558   peepreplace (leaI_rReg_immI_peep());
25559 %}
25560 
25561 peephole
25562 %{
25563   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25564                 VM_Version::is_intel_cascade_lake());
25565   peepmatch (incI_rReg);
25566   peepprocedure (lea_coalesce_imm);
25567   peepreplace (leaI_rReg_immI_peep());
25568 %}
25569 
25570 peephole
25571 %{
25572   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25573                 VM_Version::is_intel_cascade_lake());
25574   peepmatch (decI_rReg);
25575   peepprocedure (lea_coalesce_imm);
25576   peepreplace (leaI_rReg_immI_peep());
25577 %}
25578 
25579 peephole
25580 %{
25581   peeppredicate(VM_Version::supports_fast_2op_lea());
25582   peepmatch (salI_rReg_immI2);
25583   peepprocedure (lea_coalesce_imm);
25584   peepreplace (leaI_rReg_immI2_peep());
25585 %}
25586 
25587 peephole
25588 %{
25589   peeppredicate(VM_Version::supports_fast_2op_lea());
25590   peepmatch (addL_rReg);
25591   peepprocedure (lea_coalesce_reg);
25592   peepreplace (leaL_rReg_rReg_peep());
25593 %}
25594 
25595 peephole
25596 %{
25597   peeppredicate(VM_Version::supports_fast_2op_lea());
25598   peepmatch (addL_rReg_imm);
25599   peepprocedure (lea_coalesce_imm);
25600   peepreplace (leaL_rReg_immL32_peep());
25601 %}
25602 
25603 peephole
25604 %{
25605   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25606                 VM_Version::is_intel_cascade_lake());
25607   peepmatch (incL_rReg);
25608   peepprocedure (lea_coalesce_imm);
25609   peepreplace (leaL_rReg_immL32_peep());
25610 %}
25611 
25612 peephole
25613 %{
25614   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25615                 VM_Version::is_intel_cascade_lake());
25616   peepmatch (decL_rReg);
25617   peepprocedure (lea_coalesce_imm);
25618   peepreplace (leaL_rReg_immL32_peep());
25619 %}
25620 
25621 peephole
25622 %{
25623   peeppredicate(VM_Version::supports_fast_2op_lea());
25624   peepmatch (salL_rReg_immI2);
25625   peepprocedure (lea_coalesce_imm);
25626   peepreplace (leaL_rReg_immI2_peep());
25627 %}
25628 
25629 peephole
25630 %{
25631   peepmatch (leaPCompressedOopOffset);
25632   peepprocedure (lea_remove_redundant);
25633 %}
25634 
25635 peephole
25636 %{
25637   peepmatch (leaP8Narrow);
25638   peepprocedure (lea_remove_redundant);
25639 %}
25640 
25641 peephole
25642 %{
25643   peepmatch (leaP32Narrow);
25644   peepprocedure (lea_remove_redundant);
25645 %}
25646 
25647 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25648 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25649 
25650 //int variant
25651 peephole
25652 %{
25653   peepmatch (testI_reg);
25654   peepprocedure (test_may_remove);
25655 %}
25656 
25657 //long variant
25658 peephole
25659 %{
25660   peepmatch (testL_reg);
25661   peepprocedure (test_may_remove);
25662 %}
25663 
25664 
25665 //----------SMARTSPILL RULES---------------------------------------------------
25666 // These must follow all instruction definitions as they use the names
25667 // defined in the instructions definitions.